设置永久清华源
shell
复制代码
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
Docker
dockerfile
shell
复制代码
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04
RUN rm -rf /var/lib/apt/lists/*
RUN apt-get clean
RUN apt update
RUN apt upgrade -y
RUN apt install vim -y
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone
RUN DEBIAN_FRONTEND=noninteractive apt install -y tzdata
RUN apt install software-properties-common -y
RUN add-apt-repository ppa:deadsnakes/ppa -y
RUN apt update && apt install python3.10 python3.10-distutils python3.10-dev python3.10-tk -y
RUN apt install curl gcc git libglib2.0-dev libsm6 libxrender1 libxext6 libgl1 -y
# 安装pip 网速快的话走这步
RUN curl https://bootstrap.pypa.io/get-pip.py -o /root/get-pip.py
RUN python3.10 /root/get-pip.py && rm -rf /root/get-pip.py
# Install pip
# RUN apt install python3-pip git -y
# RUN python3.10 -m pip install --upgrade pip
RUN apt install ffmpeg -y
# 安装python
RUN python3.10 -m pip install -i https://mirrors.aliyun.com/pypi/simple/ --upgrade pip
RUN python3.10 -m pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
运行容器
shell
复制代码
sudo docker run --name <your_container_name> --shm-size 32G -v <your_mount_dir>:<docker_inner_dir> -p 9006:22 -p 9007:7860 -p 9008:8000 --restart=always --gpus=all <image_id> /usr/sbin/init
# 在容器内创建一个新的bash会话,有了这个会话就可以在容器中运行其它命令了
docker exec -it <container_id> /bin/bash
安装conda
shell
复制代码
wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2021.11-Linux-x86_64.sh
启动ssh
shell
复制代码
apt-get install openssh-server
# 调整/etc/ssh/sshd_config
# 确保以下配置项被启用(非命令,vim命令自己学)
vim /etc/ssh/sshd_config
PubkeyAuthentication yes #启用公钥私钥配对认证方式
PermitRootLogin yes #root能使用ssh登录
port=22 #开启22端口
# 修改root密码
passwd
# 重新启动 SSH 服务器以应用更改
service ssh restart
# ssh验证
ssh -p 22 root@0.0.0.0
安装Git
shell
复制代码
sudo apt-get install git
git config --global user.name "Your Name"
git config --global user.email "email@example.com"
部署fastchat
shell
复制代码
# 结合FastChat和vLLM搭建一个网页Demo或者类OpenAI API服务器,首先启动一个controller
python -m fastchat.serve.controller --host 0.0.0.0 --port 9360
# 启动vllm_worker发布模型
python -m fastchat.serve.vllm_worker --model-path Qwen/Qwen1.5-14B-Chat-GPTQ-Int4 --trust-remote
-code --tensor-parallel-size 2 --gpu_memory_utilization 0.7 --max-model-len 1024 --host 0.0.0.0 --port 9360
# 启动gradio页面
python -m fastchat.serve.gradio_web_server --host 0.0.0.0 --port 9360