# ================= 必须放在文件的第一行 =================
import os
import sys
# 强制覆盖环境变量,确保 LangChain/OpenAI 库读取到正确的地址
os.environ["OPENAI_BASE_URL"] = "https://api.siliconflow.cn/v1"
os.environ["OPENAI_API_BASE"] = "https://api.siliconflow.cn/v1" # 兼容旧版
os.environ["OPENAI_API_KEY"] = "xxx"
# 调试打印,确认代码执行了
print(f"[DEBUG] Force set OPENAI_BASE_URL to: {os.environ.get('OPENAI_BASE_URL')}")
# =====================================================
# 下面是原有的导入
from evalscope.run import run_task
# 原有的配置 (保持你上次成功的配置,embeddings 改回本地,llm 用 Qwen2.5)
generate_testset_task_cfg = {
"eval_backend": "RAGEval",
"eval_config": {
"tool": "RAGAS",
"testset_generation": {
"docs": [r"./testdata/test.docx"],
"test_size": 100, # 先跑 5 个测试,成功后再改大
"output_file": "zskoutput1s/testset.json",
"knowledge_graph": "zskoutput1s/knowledge_graph.json",
"generator_llm": {
# 模型名必须是硅基流动上存在的
"model_name": "Qwen/Qwen3-8B", # Qwen/Qwen3.5-4B
# 这里的 api_base 其实可能被忽略,主要靠上面的环境变量,但写上也没坏处
# 注意:有些库要求 base_url 不带 /chat/completions 后缀
"api_base": "https://api.siliconflow.cn/v1",
"api_key": "sk-xxx"
},
"embeddings": {
"model_name_or_path": "AI-ModelScope/m3e-base"
},
"language": "chinese"
}
},
}
if __name__ == "__main__":
try:
run_task(task_cfg=generate_testset_task_cfg)
print("\n✅ 任务成功完成!")
except Exception as e:
print(f"\n❌ 任务失败: {e}")
# 打印更详细的错误以便排查
import traceback
traceback.print_exc()
生成的文件示例

依赖包示例:
pip list
Package Version
------------------------- ------------
absl-py 2.4.0
accelerate 1.13.0
addict 2.4.0
aiofiles 25.1.0
aiohappyeyeballs 2.6.1
aiohttp 3.13.3
aiosignal 1.4.0
aliyun-python-sdk-core 2.16.0
aliyun-python-sdk-kms 2.16.5
annotated-doc 0.0.4
annotated-types 0.7.0
antlr4-python3-runtime 4.9.3
anyio 4.13.0
appdirs 1.4.4
async-timeout 4.0.3
attrs 26.1.0
backoff 2.2.1
beartype 0.18.5
beautifulsoup4 4.14.3
blinker 1.9.0
braceexpand 0.1.7
certifi 2026.2.25
cffi 2.0.0
charset-normalizer 3.4.6
click 8.3.1
colorama 0.4.6
coloredlogs 15.0.1
colorlog 6.10.1
contourpy 1.3.2
cpm-kernels 1.0.11
crcmod 1.7
cryptography 46.0.5
cycler 0.12.1
dacite 1.9.2
dataclasses-json 0.6.7
datasets 4.8.4
decord 0.6.0
Deprecated 1.3.1
diffusers 0.37.0
dill 0.3.8
diskcache 5.6.3
distro 1.9.0
docstring_parser 0.17.0
dotenv 0.9.9
editdistance 0.8.1
effdet 0.4.1
einops 0.8.2
emoji 2.15.0
et_xmlfile 2.0.0
eval_type_backport 0.3.1
evalscope 1.5.0
evaluate 0.4.6
exceptiongroup 1.3.1
fastapi 0.135.1
ffmpy 1.0.0
filelock 3.25.1
filetype 1.2.0
fire 0.7.1
Flask 3.1.3
flatbuffers 25.12.19
fonttools 4.62.0
frozenlist 1.8.0
fsspec 2025.3.0
ftfy 6.3.1
func_timeout 4.3.5
fuzzywuzzy 0.18.0
google-api-core 2.30.0
google-auth 2.49.1
google-cloud-vision 3.12.1
googleapis-common-protos 1.73.0
gradio 5.4.0
gradio_client 1.4.2
greenlet 3.3.2
grpcio 1.78.0
grpcio-status 1.78.0
h11 0.16.0
h5py 3.16.0
hf-xet 1.4.2
html5lib 1.1
httpcore 1.0.9
httpx 0.28.1
httpx-sse 0.4.3
huggingface_hub 0.36.2
human-eval 1.0.3
humanfriendly 10.0
idna 3.11
ImageIO 2.37.3
immutabledict 4.3.1
importlib_metadata 8.7.1
iniconfig 2.3.0
instructor 1.14.5
iopath 0.1.10
itsdangerous 2.2.0
jieba 0.42.1
Jinja2 3.1.6
jiter 0.11.1
jmespath 0.10.0
joblib 1.5.3
json5 0.13.0
jsonlines 4.0.0
jsonpatch 1.33
jsonpointer 3.1.1
jsonschema 4.26.0
jsonschema-specifications 2025.9.1
kiwisolver 1.5.0
langchain 1.2.13
langchain-classic 1.0.3
langchain-community 0.4.1
langchain-core 1.2.22
langchain-ollama 1.0.1
langchain-openai 1.1.12
langchain-text-splitters 1.1.1
langchain-unstructured 1.0.1
langdetect 1.0.9
langgraph 1.1.3
langgraph-checkpoint 4.0.1
langgraph-prebuilt 1.0.8
langgraph-sdk 0.3.12
langsmith 0.7.22
latex2sympy2_extended 1.11.0
Levenshtein 0.27.3
llvmlite 0.46.0
lxml 6.0.2
Markdown 3.10.2
markdown-it-py 4.0.0
MarkupSafe 2.1.5
marshmallow 3.26.2
matplotlib 3.10.8
mdurl 0.1.2
ml_dtypes 0.5.4
mmengine-lite 0.10.7
modelscope 1.35.1
more-itertools 10.8.0
mpmath 1.3.0
ms-opencompass 0.1.6
ms-vlmeval 0.0.17
mteb 1.38.20
multidict 6.7.1
multiprocess 0.70.16
mypy_extensions 1.1.0
nest-asyncio 1.6.0
networkx 3.4.2
nltk 3.9.3
numba 0.64.0
numpy 2.2.6
nvidia-ml-py 13.590.48
olefile 0.47
ollama 0.6.1
omegaconf 2.3.0
onnx 1.20.1
onnxruntime 1.19.2
open_clip_torch 3.3.0
openai 2.30.0
OpenCC 1.2.0
opencv-python 4.13.0.92
openpyxl 3.1.5
orjson 3.11.7
ormsgpack 1.12.2
oss2 2.19.1
overrides 7.7.0
packaging 26.0
pandas 2.3.3
pdf2image 1.17.0
pdfminer.six 20260107
peft 0.18.1
pi_heif 1.3.0
pikepdf 10.5.1
pillow 11.3.0
pip 26.0.1
platformdirs 4.9.4
playwright 1.58.0
plotly 5.24.1
pluggy 1.6.0
polars 1.38.1
polars-runtime-32 1.38.1
portalocker 3.2.0
prettytable 3.17.0
propcache 0.4.1
proto-plus 1.27.1
protobuf 6.33.5
psutil 7.2.2
pyarrow 23.0.1
pyasn1 0.6.3
pyasn1_modules 0.4.2
pycocotools 2.0.11
pycparser 3.0
pycryptodome 3.23.0
pydantic 2.12.5
pydantic_core 2.41.5
pydantic-settings 2.13.1
pydub 0.25.1
pyecharts 2.1.0
pyee 13.0.1
Pygments 2.19.2
pylatexenc 2.10
pyparsing 3.3.2
pypdf 6.9.2
pypdfium2 5.6.0
pypinyin 0.55.0
pyreadline3 3.5.4
pysbd 0.3.4
pytest 9.0.2
pytest-base-url 2.1.0
pytest-playwright 0.7.2
python-dateutil 2.9.0.post0
python-docx 1.2.0
python-dotenv 1.2.2
python-iso639 2026.1.31
python-Levenshtein 0.27.3
python-magic 0.4.27
python-multipart 0.0.12
python-oxmsg 0.0.2
python-slugify 8.0.4
pytrec_eval-terrier 0.5.10
pytz 2026.1.post1
pywin32 311
PyYAML 6.0.3
ragas 0.4.3
ragflow-sdk 0.22.1
rank-bm25 0.2.2
RapidFuzz 3.14.3
referencing 0.37.0
regex 2026.2.28
requests 2.33.0
requests-toolbelt 1.0.0
rich 13.9.4
rouge 1.0.1
rouge-chinese 1.0.3
rouge_score 0.1.2
rpds-py 0.30.0
ruff 0.15.5
sacrebleu 2.6.0
safehttpx 0.1.7
safetensors 0.7.0
scikit-learn 1.7.2
scikit-network 0.33.5
scipy 1.15.3
seaborn 0.13.2
semantic-version 2.10.0
sentence-transformers 5.3.0
sentencepiece 0.2.1
setuptools 65.5.0
shellingham 1.5.4
simplejson 3.20.2
six 1.17.0
sniffio 1.3.1
sortedcontainers 2.4.0
soupsieve 2.8.3
SQLAlchemy 2.0.48
sse-starlette 3.3.2
starlette 0.52.1
sty 1.0.6
swanlab 0.7.11
sympy 1.14.0
tabulate 0.10.0
tenacity 9.1.4
termcolor 3.3.0
text-unidecode 1.3
threadpoolctl 3.6.0
tiktoken 0.12.0
timeout-decorator 0.5.0
timm 1.0.25
tokenizers 0.22.2
tomli 2.4.0
tomlkit 0.12.0
torch 2.10.0
torchvision 0.25.0
tqdm 4.67.3
transformers 4.57.6
typer 0.24.1
typing_extensions 4.15.0
typing-inspect 0.9.0
typing-inspection 0.4.2
tzdata 2025.3
unstructured 0.18.32
unstructured-client 0.42.12
unstructured_inference 1.2.0
unstructured.pytesseract 0.3.15
urllib3 2.6.3
uuid_utils 0.14.1
uvicorn 0.41.0
validators 0.35.0
wcwidth 0.6.0
webdataset 1.0.2
webencodings 0.5.1
websockets 12.0
Werkzeug 3.1.6
word2number 1.1
wrapt 2.1.2
xlsxwriter 3.2.9
xxhash 3.6.0
yapf 0.43.0
yarl 1.23.0
zhconv 1.4.3
zipp 3.23.0
zstandard 0.25.0