autodl 安装modelscope OCR 模型 dots_ocr 笔记心得

环境准备

bash 复制代码
cd autodl-tmp

root@autodl-container-14e54cad63-9b7ce591:~/autodl-tmp/dots_ocr# git remote -v
origin	https://gitcode.com/Mingcai_Xiong/dots.ocr-copy-github.git (fetch)
origin	https://gitcode.com/Mingcai_Xiong/dots.ocr-copy-github.git (push)

安装uv 和 vllm==0.9.1

bash 复制代码
wget http://qiniu.dywlkj.com/uv_for_linux_x86/install.sh -O - | sh

uv venv 
 source .venv/bin/activate

# 使用阿里云镜像源
export UV_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/
uv pip install [包名]

# 或清华大学镜像源
export UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple/

下载模型 并将 vllm

bash 复制代码
python3 tools/download_model.py -t modelscope 

vllm 启动 dots_ocr 模型

bash 复制代码
#进入项目根目录后 检查
~/autodl-tmp/dots_ocr

(dots_ocr) root@autodl-container-14e54cad63-9b7ce591:~/autodl-tmp/dots_ocr# ls weights/DotsOCR/
 NOTICE               configuration_dots.py              model.safetensors.index.json   tokenizer.json
 README.md           'dots.ocr LICENSE AGREEMENT'        modeling_dots_ocr.py           tokenizer_config.json
 __pycache__          generation_config.json             modeling_dots_ocr_vllm.py      vocab.json
 chat_template.json   merges.txt                         modeling_dots_vision.py
 config.json          model-00001-of-00002.safetensors   preprocessor_config.json


# launch vllm server
export hf_model_path=./weights/DotsOCR 

export PYTHONPATH=$(dirname "$hf_model_path"):$PYTHONPATH

 VLLM_USE_MODELSCOPE=true CUDA_VISIBLE_DEVICES=0 vllm serve ${hf_model_path} --tensor-parallel-size 1 --gpu-memory-utilization 0.95  --chat-template-content-format string --served-model-name model --trust-remote-code

启动操作界面

bash 复制代码
PYTHONPATH=$PWD python demo/demo_gradio.py 6006 

我使用虚拟环境版本号

复制代码
# root @ autodl-container-14e54cad63-9b7ce591 in ~/autodl-tmp/dots_ocr on git:master x .venv [20:00:24] 
$ uv pip freeze > requirements.txt

accelerate==1.10.1
aiofiles==24.1.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.15
aiosignal==1.4.0
airportsdata==20250909
annotated-types==0.7.0
anyio==4.11.0
astor==0.8.1
attrs==25.3.0
av==15.1.0
blake3==1.0.7
brotli==1.1.0
cachetools==6.2.0
certifi==2025.8.3
charset-normalizer==3.4.3
click==8.3.0
cloudpickle==3.1.1
compressed-tensors==0.10.1
cupy-cuda12x==13.6.0
defusedxml==0.7.1
depyf==0.18.0
dill==0.4.0
diskcache==5.6.3
distro==1.9.0
dnspython==2.8.0
einops==0.8.1
email-validator==2.3.0
fastapi==0.118.0
fastapi-cli==0.0.13
fastapi-cloud-cli==0.3.0
fastrlock==0.8.3
ffmpy==0.6.1
filelock==3.19.1
flash-attn==2.8.3
frozenlist==1.7.0
fsspec==2025.9.0
gguf==0.17.1
googleapis-common-protos==1.70.0
gradio==5.49.0
gradio-client==1.13.3
groovy==0.1.2
grpcio==1.75.1
h11==0.16.0
hf-xet==1.1.10
httpcore==1.0.9
httpie==3.2.4
httptools==0.6.4
httpx==0.28.1
huggingface-hub==0.35.3
idna==3.10
importlib-metadata==8.7.0
interegular==0.3.3
jinja2==3.1.6
jiter==0.11.0
jsonschema==4.25.1
jsonschema-specifications==2025.9.1
lark==1.2.2
llguidance==0.7.30
llvmlite==0.44.0
lm-format-enforcer==0.10.12
markdown-it-py==4.0.0
markupsafe==3.0.3
mdurl==0.1.2
mistral-common==1.8.5
modelscope==1.30.0
mpmath==1.3.0
msgpack==1.1.1
msgspec==0.19.0
multidict==6.6.4
nest-asyncio==1.6.0
networkx==3.5
ninja==1.13.0
numba==0.61.2
numpy==2.2.6
nvidia-cublas-cu12==12.6.4.1
nvidia-cuda-cupti-cu12==12.6.80
nvidia-cuda-nvrtc-cu12==12.6.77
nvidia-cuda-runtime-cu12==12.6.77
nvidia-cudnn-cu12==9.5.1.17
nvidia-cufft-cu12==11.3.0.4
nvidia-cufile-cu12==1.11.1.6
nvidia-curand-cu12==10.3.7.77
nvidia-cusolver-cu12==11.7.1.2
nvidia-cusparse-cu12==12.5.4.2
nvidia-cusparselt-cu12==0.6.3
nvidia-nccl-cu12==2.26.2
nvidia-nvjitlink-cu12==12.6.85
nvidia-nvtx-cu12==12.6.77
openai==2.1.0
opencv-python-headless==4.12.0.88
opentelemetry-api==1.37.0
opentelemetry-exporter-otlp==1.37.0
opentelemetry-exporter-otlp-proto-common==1.37.0
opentelemetry-exporter-otlp-proto-grpc==1.37.0
opentelemetry-exporter-otlp-proto-http==1.37.0
opentelemetry-proto==1.37.0
opentelemetry-sdk==1.37.0
opentelemetry-semantic-conventions==0.58b0
opentelemetry-semantic-conventions-ai==0.4.13
orjson==3.11.3
outlines==0.1.11
outlines-core==0.1.26
packaging==25.0
pandas==2.3.3
partial-json-parser==0.2.1.1.post6
pillow==11.3.0
pip==25.2
prometheus-client==0.23.1
prometheus-fastapi-instrumentator==7.1.0
propcache==0.3.2
protobuf==6.32.1
psutil==7.1.0
py-cpuinfo==9.0.0
pycountry==24.6.1
pydantic==2.11.9
pydantic-core==2.33.2
pydantic-extra-types==2.10.5
pydub==0.25.1
pygments==2.19.2
pymupdf==1.26.4
pysocks==1.7.1
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-json-logger==3.3.0
python-multipart==0.0.20
pytz==2025.2
pyyaml==6.0.3
pyzmq==27.1.0
qwen-vl-utils==0.0.14
ray==2.49.2
referencing==0.36.2
regex==2025.9.18
requests==2.32.5
requests-toolbelt==1.0.0
rich==14.1.0
rich-toolkit==0.15.1
rignore==0.7.0
rpds-py==0.27.1
ruff==0.13.3
safehttpx==0.1.6
safetensors==0.6.2
scipy==1.16.2
semantic-version==2.10.0
sentencepiece==0.2.1
sentry-sdk==2.39.0
setuptools==79.0.1
shellingham==1.5.4
six==1.17.0
sniffio==1.3.1
starlette==0.48.0
sympy==1.14.0
tiktoken==0.11.0
tokenizers==0.21.4
tomlkit==0.13.3
torch==2.7.0
torchaudio==2.7.0
torchvision==0.22.0
tqdm==4.67.1
transformers==4.51.3
triton==3.3.0
typer==0.19.2
typing-extensions==4.15.0
typing-inspection==0.4.2
tzdata==2025.2
urllib3==2.5.0
uvicorn==0.37.0
uvloop==0.21.0
vllm==0.9.1
watchfiles==1.1.0
websockets==15.0.1
xformers==0.0.30
xgrammar==0.1.19
yarl==1.20.1
zipp==3.23.0
相关推荐
岑梓铭2 分钟前
《考研408数据结构》第六章(5.1+5.2+5.3树、二叉树、线索二叉树)复习笔记
数据结构·笔记·考研·408·1024程序员节
动能小子ohhh25 分钟前
Langchain从零开始到应用落地案例[AI智能助手]【3】---使用Paddle-OCR识别优化可识别图片进行解析回答
人工智能·python·pycharm·langchain·ocr·paddle·1024程序员节
摇滚侠1 小时前
全面掌握 PostgreSQL 关系型数据库,PostgreSQL 介绍,笔记02
数据库·笔记·postgresql
摇滚侠1 小时前
Spring Boot3零基础教程,生命周期监听,自定义监听器,笔记59
java·开发语言·spring boot·笔记
张人玉2 小时前
WPF 控件速查 PDF 笔记(可直接落地版)
笔记·microsoft·wpf
摇滚侠2 小时前
Spring Boot3零基础教程,事件驱动开发,设计登录成功后增加积分记录信息功能,笔记61
java·spring boot·笔记·后端
Elias不吃糖2 小时前
Qt TCP 网络通信详解(笔记)
笔记·qt·tcp/ip
charlie1145141912 小时前
2D 计算机图形学基础速建——1
笔记·学习·教程·计算机图形学·基础
im_AMBER2 小时前
React 07
前端·笔记·学习·react.js·前端框架
9ilk2 小时前
【仿RabbitMQ的发布订阅式消息队列】--- 介绍
linux·笔记·分布式·后端·rabbitmq