一、创建Dockerfile文件(不需要加后缀名,此步骤需要把你的requirements.txt里的内容给到AI结合上面的代码,让他帮你输出最新的Dockerfile文件。【要求使用 GPU 进行推理和使用单阶段】)
# syntax=docker/dockerfile:1.7
# 构建阶段
FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 AS builder
ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# 安装 python3-venv 并创建虚拟环境
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.12-dev python3.12-venv python3-pip gcc default-libmysqlclient-dev pkg-config \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* \
&& userdel -r $(id -nu 1000 2>/dev/null) 2>/dev/null || true \
&& groupdel $(getent group 1000 | cut -d: -f1) 2>/dev/null || true \
&& groupadd --gid 1000 appgroup \
&& useradd --uid 1000 --gid appgroup --create-home appuser \
&& mkdir -p /opt/venv \
&& chown -R appuser:appgroup /opt
# 创建虚拟环境(使用 appuser 身份)
USER appuser
RUN python3.12 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
WORKDIR /app
COPY --chown=appuser:appgroup requirements.txt .
# 在虚拟环境中安装依赖,并强制升级 redis 和 pydub 到兼容 Python 3.12 的版本
RUN pip install --no-cache-dir --no-compile -r requirements.txt && \
pip install --no-cache-dir --no-compile --upgrade "redis>=5.0.0" "pydub>=0.25.1"
# 运行时阶段
FROM nvidia/cuda:12.8.0-runtime-ubuntu24.04
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONFAULTHANDLER=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.12 python3.12-venv libsndfile1 ffmpeg libgomp1 libmysqlclient21 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& userdel -r $(id -nu 1000 2>/dev/null) 2>/dev/null || true \
&& groupdel $(getent group 1000 | cut -d: -f1) 2>/dev/null || true \
&& groupadd --gid 1000 appgroup \
&& useradd --uid 1000 --gid appgroup --shell /bin/bash --create-home appuser
# 从构建阶段复制虚拟环境
COPY --from=builder --chown=appuser:appgroup /opt/venv /opt/venv
COPY --chown=appuser:appgroup . /app
# 激活虚拟环境
ENV PATH="/opt/venv/bin:$PATH" \
PYTHONPATH="/opt/venv/lib/python3.12/site-packages:/app:$PYTHONPATH"
WORKDIR /app
# 验证安装(root 身份运行验证)
RUN python -c "import uvicorn; print(f'uvicorn {uvicorn.__version__} OK')"
USER appuser
EXPOSE 1112
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:1112/health', timeout=5)" || exit 1
CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "1112", "--proxy-headers"]
二、创建.dockerignore文件(此步骤需要根据上面的Dockerfile文件进行AI生成)
# Python 缓存
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
.eggs/
*.egg
pip-wheel-metadata/
*.manifest
*.spec
# 虚拟环境
venv/
.venv/
env/
ENV/
env.bak/
venv.bak/
# IDE 和编辑器
.vscode/
.idea/
*.swp
*.swo
*~
.project
.pydevproject
.settings/
# 版本控制
.git/
.gitignore/
.gitattributes/
.gitmodules
.svn/
.hg/
# 环境变量和秘密(绝不能进镜像)
.env
.env.*
.envrc
*.local
secrets/
*.key
*.pem
# 测试和开发
tests/
test/
testing/
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
.mypy_cache/
.dmypy.json
*.cover
.hypothesis/
# 文档(保留 README 供参考)
docs/_build/
*.md
!README.md
!CHANGELOG.md
# Docker 本身
Dockerfile*
docker-compose*.yml
.docker/
.dockerignore
# 构建产物
build/
dist/
*.tar.gz
*.zip
*.tgz
# 日志
logs/
*.log
*.log.*
log/
# 临时文件
tmp/
temp/
temp_*/
*.tmp
*.bak
*.backup
# 数据(如果很大或通过 volume 挂载)
data/
datasets/
*.csv
*.tsv
*.json
*.jsonl
*.parquet
*.hdf5
*.h5
*.pkl
*.pickle
# 模型文件(通常很大,建议在容器内下载而非复制)
*.pt
*.pth
*.ckpt
*.safetensors
*.onnx
*.pb
models/
checkpoints/
weights/
# 本地开发工具
.pre-commit-config.yaml
.flake8
.black
.isort.cfg
mypy.ini
bandit.yaml
# 系统文件
.DS_Store
Thumbs.db
desktop.ini
三、创建build.sh脚本文件(此步骤需要根据上面的.dockerignore文件进行AI生成)
#!/bin/bash
set -euo pipefail
# 配置区
IMAGE_NAME="new2026_data"
REGISTRY="${REGISTRY:-}"
FULL_IMAGE_NAME="${REGISTRY}${IMAGE_NAME}"
VERSION="${VERSION:-latest}"
BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_debug() { echo -e "${BLUE}[DEBUG]${NC} $1"; }
usage() {
cat << EOF
用法: $0 [选项]
-t, --tag TAG 指定标签 (默认: ${VERSION})
--no-cache 禁用缓存
--push 构建后推送到仓库
-h, --help 显示帮助
EOF
}
# 检查 NVIDIA Container Toolkit
check_nvidia_runtime() {
if ! docker info 2>/dev/null | grep -q "nvidia"; then
log_warn "未检测到 NVIDIA Container Toolkit"
log_warn "请执行以下命令安装:"
echo " sudo apt-get install -y nvidia-container-toolkit"
echo " sudo nvidia-ctk runtime configure --runtime=docker"
echo " sudo systemctl restart docker"
echo ""
else
log_info "NVIDIA Container Toolkit 已配置"
fi
# 检查 nvidia-smi 是否可用
if command -v nvidia-smi &> /dev/null; then
local gpu_info=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -n 1 || echo "Unknown")
log_info "检测到 GPU: $gpu_info"
else
log_warn "宿主机未安装 nvidia-smi,容器内将无法使用 GPU"
fi
}
# 检查资源
check_resources() {
log_info "检查系统资源..."
# 检查磁盘空间(需要至少 20GB)
local docker_root="/var/lib/docker"
if [ ! -d "$docker_root" ]; then
docker_root="/"
fi
local available_gb=$(df -BG "$docker_root" 2>/dev/null | awk 'NR==2 {print $4}' | sed 's/G//' || echo "0")
# 确保是数字
if ! [[ "$available_gb" =~ ^[0-9]+$ ]]; then
available_gb=0
fi
if [ "$available_gb" -lt 20 ]; then
log_warn "磁盘空间不足: ${available_gb}GB < 20GB 推荐值"
log_warn "建议清理: docker system prune -af"
read -p "是否继续? [y/N] " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
exit 1
fi
else
log_info "磁盘空间充足: ${available_gb}GB"
fi
# 检查内存(需要至少 4GB)
local mem_gb=$(free -g 2>/dev/null | awk '/^Mem:/{print $2}' || echo "0")
if ! [[ "$mem_gb" =~ ^[0-9]+$ ]]; then
mem_gb=0
fi
if [ "$mem_gb" -lt 4 ]; then
log_warn "内存不足: ${mem_gb}GB < 4GB 推荐值"
log_warn "建议关闭其他程序或增加交换空间"
else
log_info "内存充足: ${mem_gb}GB"
fi
}
# 验证构建结果
verify_build() {
local image_tag=$1
log_info "验证镜像: ${FULL_IMAGE_NAME}:${image_tag}"
# 检查 uvicorn 是否可用(虚拟环境路径已配置好)
if docker run --rm "${FULL_IMAGE_NAME}:${image_tag}" python -c "import uvicorn; print('uvicorn OK')" 2>/dev/null; then
log_info "✓ Python 模块检查通过"
else
log_error "✗ Python 模块检查失败"
log_error "尝试调试: docker run --rm -it ${FULL_IMAGE_NAME}:${image_tag} /bin/bash"
exit 1
fi
# 检查 GPU 是否可用(如果宿主机有 GPU)
if command -v nvidia-smi &> /dev/null; then
if docker run --rm --gpus all "${FULL_IMAGE_NAME}:${image_tag}" nvidia-smi > /dev/null 2>&1; then
log_info "✓ GPU 支持检查通过"
else
log_warn "✗ GPU 支持检查失败(可能是 NVIDIA Container Toolkit 未配置)"
fi
fi
local image_size=$(docker images --format "{{.Size}}" "${FULL_IMAGE_NAME}:${image_tag}")
log_info "镜像大小: $image_size"
}
build() {
local no_cache_flag=${1:-}
local tag=${2:-$VERSION}
local should_push=${3:-false}
log_info "开始构建镜像: ${FULL_IMAGE_NAME}:${tag}"
log_info "构建时间: ${BUILD_DATE}"
# 清理旧缓存避免冲突
rm -rf /tmp/.buildx-cache-new 2>/dev/null || true
export DOCKER_BUILDKIT=1
set -x
docker build \
${no_cache_flag} \
-t "${FULL_IMAGE_NAME}:${tag}" \
-t "${FULL_IMAGE_NAME}:latest" \
--build-arg BUILD_DATE="${BUILD_DATE}" \
--build-arg VERSION="${tag}" \
-f Dockerfile \
.
set +x
log_info "本地镜像构建完成"
# 验证构建
verify_build "${tag}"
# 推送镜像(如果指定了 --push)
if [ "$should_push" = true ] && [ -n "$REGISTRY" ]; then
log_info "推送到仓库: ${FULL_IMAGE_NAME}:${tag}"
docker push "${FULL_IMAGE_NAME}:${tag}"
docker push "${FULL_IMAGE_NAME}:latest"
fi
log_info "构建流程完成!"
log_info "运行命令: docker run --rm --gpus all -p 1112:1112 ${FULL_IMAGE_NAME}:${tag}"
log_info ""
log_info "调试命令(进入容器):"
log_info " docker run --rm -it --gpus all --entrypoint /bin/bash ${FULL_IMAGE_NAME}:${tag}"
}
main() {
local no_cache=""
local custom_tag=""
local should_push=false
while [[ $# -gt 0 ]]; do
case $1 in
-t|--tag)
if [[ -z "${2:-}" ]] || [[ "$2" =~ ^- ]]; then
log_error "错误: $1 需要一个参数"
usage
exit 1
fi
custom_tag="$2"
shift 2
;;
--no-cache)
no_cache="--no-cache"
shift
;;
--push)
should_push=true
shift
;;
-h|--help)
usage
exit 0
;;
*)
log_error "未知选项: $1"
usage
exit 1
;;
esac
done
check_nvidia_runtime
check_resources
local final_tag="${custom_tag:-$VERSION}"
build "${no_cache}" "${final_tag}" "${should_push}"
}
main "$@"
四、通过以下命令开始执行构建镜像文件
给予权限
chmod +x build.sh
# 1. 构建生产镜像(标签使用版本号)
./build.sh --no-cache -t v4.0.0
# 2. 启动服务(后台运行)
docker-compose up -d
构建过程中的所有日志,最后显示构建流程完成!还提示您构建后运行镜像的命令和调试命令:
运行命令: docker run --rm --gpus all -p 1112:1112 new2026_data:v4.0.0
调试命令:docker run --rm -it --gpus all --entrypoint /bin/bash new2026_data:v4.0.0

五、构建成功后,将在Portainer镜像列表中看到构建成功的镜像文件

六、注意事项
1、如果你的程序里涉及大模型在构建镜像时如果出现以下问题解决办法:
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
解决方法:
步骤1:添加 NVIDIA 官方 GPG 密钥(验证包完整性)
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
步骤2:添加 NVIDIA 软件源(适配 Ubuntu 版本:jammy=22.04,focal=20.04)
echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] [https://nvidia.github.io/libnvidia-container/ubuntu22.04/$(ARCH)](https://nvidia.github.io/libnvidia-container/ubuntu22.04/$(ARCH) "https://nvidia.github.io/libnvidia-container/ubuntu22.04/$(ARCH)") /" | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
步骤3:更新软件包索引
sudo apt-get update
步骤4:执行你提供的核心命令
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
2、如果你反复进行调试程序并进行生成镜像,则需要清理Docker缓存、未使用的镜像、容器信息命令,否则存在docker里文件会越来越大,占用的硬盘空间会越来越大,此时需要执行清理命令(所有提示全部按Y来进行执行):
docker buildx prune && docker buildx prune -af && docker builder prune && docker builder prune -af && docker system prune && docker system prune -a