1. 创建项目目录
bash
mkdir ~/markitdown-service && cd ~/markitdown-service
2. 编写 app.py(HTTP 服务主程序)
python
python
from fastapi import FastAPI, UploadFile, File, HTTPException
from markitdown import MarkItDown
import tempfile
import os
from pathlib import Path
app = FastAPI(title="MarkItDown Converter")
md = MarkItDown()
@app.post("/convert")
async def convert_to_markdown(file: UploadFile = File(...)):
# 支持的文件类型
supported = [
"application/pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"text/html",
"text/plain",
"text/csv",
"application/json",
"application/xml",
"text/xml"
]
if file.content_type not in supported:
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file.content_type}")
suffix = Path(file.filename).suffix
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
result = md.convert(tmp_path)
markdown_content = result.text_content
except Exception as e:
raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
finally:
os.unlink(tmp_path)
return {"filename": file.filename, "markdown": markdown_content}
3. 编写 Dockerfile
dockerfile
python
FROM python:3.11-slim
# 替换 apt 源为阿里云(加速系统依赖安装)
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources || \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list
# 安装 ffmpeg(消除 markitdown 警告,可选)
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# 安装 Python 依赖(使用清华源加速)
RUN pip install --no-cache-dir 'markitdown[all]' fastapi uvicorn python-multipart -i https://pypi.tuna.tsinghua.edu.cn/simple
COPY app.py .
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
4. 构建 Docker 镜像
bash
python
docker build -t markitdown-api .
提示:如果之前有旧容器,先停止删除:
bash
docker stop markitdown-api && docker rm markitdown-api docker rmi markitdown-api # 删除旧镜像
5. 运行容器
bash
python
docker run -d -p 8000:8000 --name markitdown-api markitdown-api
6. 验证服务是否正常
bash
python
# 查看日志
docker logs markitdown-api
# 本地测试(需有测试文件,如 test.pdf)
curl -F "file=@/path/to/test.pdf" http://localhost:8000/convert
日志中应出现 Application startup complete.,curl 返回 JSON 格式的 markdown 内容。