import os
import json
import urllib.error
import urllib.request
import dashscope
from openai import OpenAI
class AliyunModelClient:
def __init__(self, api_key: str, region: str = "beijing"):
"""
初始化阿里云百炼大模型客户端
:param api_key: 你的阿里云百炼 API Key
:param region: 地域,默认为北京 (beijing),也支持 intl(新加坡), us(美国) 等
"""
self.api_key = api_key
dashscope.api_key = api_key
# 根据地域配置 base_url
base_urls = {
"beijing": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"intl": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
"us": "https://dashscope-us.aliyuncs.com/compatible-mode/v1"
}
rerank_urls = {
"beijing": "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank",
"intl": "https://dashscope-intl.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank",
"us": "https://dashscope-us.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank"
}
self.base_url = base_urls.get(region, base_urls["beijing"])
self.rerank_url = rerank_urls.get(region, rerank_urls["beijing"])
# 初始化 OpenAI 兼容客户端 (用于 LLM, VL, Embedding)
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
def chat(self, prompt: str, model: str = "qwen-plus", system_prompt: str = "你是一个乐于助人的AI助手。"):
"""1. 问答模型 (LLM)"""
try:
completion = self.client.chat.completions.create(
model=model,
messages=[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': prompt}
]
)
return completion.choices[0].message.content
except Exception as e:
return f"LLM 调用出错: {e}"
def vision_chat(self, prompt: str, image_url: str, model: str = "qwen3-vl-plus"):
"""2. 视觉语言模型 (VL)"""
try:
completion = self.client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
}
]
)
return completion.choices[0].message.content
except Exception as e:
return f"VL 调用出错: {e}"
def get_embedding(self, texts: list, model: str = "text-embedding-v4"):
"""3a. Embedding 文本向量化"""
try:
# 如果是单条字符串,转为列表
if isinstance(texts, str):
texts = [texts]
response = self.client.embeddings.create(model=model, input=texts)
# 返回向量列表
return [item.embedding for item in response.data]
except Exception as e:
return f"Embedding 调用出错: {e}"
def get_rerank(self, query: str, documents: list, model: str = "gte-rerank-v2", top_n: int = None):
"""3b. Rerank 文本重排序"""
try:
if isinstance(documents, str):
documents = [documents]
payload = {
"model": model,
"input": {
"query": query,
"documents": documents
},
"parameters": {
"return_documents": True
}
}
if top_n is not None:
payload["parameters"]["top_n"] = top_n
data = json.dumps(payload).encode("utf-8")
request = urllib.request.Request(
self.rerank_url,
data=data,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
method="POST"
)
with urllib.request.urlopen(request, timeout=60) as response:
result = json.loads(response.read().decode("utf-8"))
return result["output"]["results"]
except urllib.error.HTTPError as e:
error_body = e.read().decode("utf-8", errors="replace")
return f"Rerank 调用出错: HTTP {e.code} - {error_body}"
except Exception as e:
return f"Rerank 调用出错: {e}"
def asr_transcribe(self, audio_url: str, model: str = "paraformer-v2"):
"""4. 语音转文字模型 (ASR - 异步任务)"""
try:
# 提交异步转写任务
task_response = dashscope.audio.asr.Transcription.async_call(
model=model,
file_urls=[audio_url],
language_hints=['zh', 'en'] # 针对中英混杂优化
)
# 轮询等待任务完成
transcription_response = dashscope.audio.asr.Transcription.wait(task=task_response.output.task_id)
if transcription_response.status_code == 200:
# wait() 返回的是结果文件 URL,转写文本需要继续下载结果 JSON 后读取。
results = transcription_response.output.results
texts = []
failed_results = []
for result in results:
if result.get("subtask_status") != "SUCCEEDED":
failed_results.append(result)
continue
transcription_url = result.get("transcription_url")
if not transcription_url:
failed_results.append({
"file_url": result.get("file_url"),
"message": "缺少 transcription_url"
})
continue
with urllib.request.urlopen(transcription_url, timeout=60) as response:
transcription_result = json.loads(response.read().decode("utf-8"))
transcripts = transcription_result.get("transcripts", [])
texts.extend(
transcript.get("text", "")
for transcript in transcripts
if transcript.get("text")
)
if texts:
return " ".join(texts)
return f"ASR 未获取到转写文本,任务结果: {failed_results or results}"
else:
return f"ASR 识别失败: {transcription_response.message}"
except Exception as e:
return f"ASR 提交任务出错: {e}"
# ================= 测试调用示例 =================
if __name__ == "__main__":
MY_API_KEY = os.getenv("DASHSCOPE_API_KEY")
if not MY_API_KEY:
raise RuntimeError("请先设置环境变量 DASHSCOPE_API_KEY,例如:$env:DASHSCOPE_API_KEY='sk-...'")
# 初始化客户端
ai = AliyunModelClient(api_key=MY_API_KEY, region="beijing")
print("---------- 1. 问答模型测试 ----------")
print(ai.chat("用一句话解释什么是量子计算?"))
print("\n---------- 2. 视觉模型测试 ----------")
print(ai.vision_chat("这张图片里有什么?", "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"))
print("\n---------- 3a. Embedding 模型测试 ----------")
vectors = ai.get_embedding(["今天天气真好", "机器学习入门"])
if isinstance(vectors, list):
print(f"成功获取向量,维度: {len(vectors[0])}")
print("\n---------- 3b. Rerank 模型测试 ----------")
print(ai.get_rerank("如何学习Python", ["Python基础语法", "Java开发指南", "Python爬虫实战"], top_n=3))
print("\n---------- 4. ASR 语音转文字测试 ----------")
# 替换为一个真实的音频文件公网 URL
audio_url = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world.wav"
print(ai.asr_transcribe(audio_url))