【新手指南】pyqt可视化远程部署deepseek7B蒸馏版模型

本地效果：（如果想做这个的本科毕设，建议美化界面。）

总结：MobaXterm远程连接autodl服务器，在MobaXterm上利用X11转发使pyqt可视化页面在自己的电脑上展现出来。

1. 官网下载MobaXterm

MobaXterm free Xserver and tabbed SSH client for Windows，我认为这个比pycharm专业版连接autodl服务器更加好用。pycharm需要考虑同步，MobaXterm不需要点那么多同步设置。在MobaXterm上操作也比autodl上的jupyterlab好操作。

2. 在autodl上租服务器

一般一个小时1.6-2r。这里注意显存大小能否支撑起模型的参数量（关于这个具体是怎么判断的，我还不会，会了再补充）我租的是4090D，建议起个早租卡，现在是晚上八点，我刚才创建新的实例租卡租不了，但是早上八点我看有富余。

镜像可以选择社区镜像，我用的是V3版本，它自带了1.5B的模型。deepseek-ai/DeepSeek-R1/DeepSeek-R1: 【一键聊天，多维研究】 DeepSeek-R1，目前位居全球大模型竞技榜前三，性能对标OpenAIo1正式版。深度求索（DeepSeek）公司在2025年1月20日发布的最新模型！ - CG

3. MobaXterm连接autodl服务器

【MobaXterm】登录与连接服务器教程_mobaxterm怎么连接服务器-CSDN博客

4. 我下载了7B的模型，注意下面下载到autodl-tmp目录中。

bash 复制代码

conda activate torch_env
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --local-dir /root/autodl-tmp/DeepSeek-R1-Distill-Qwen-7B --resume-download

7B模型运行相比1.5那个应该只改了模型路径，运行后可以在终端与模型对话，输入exit退出：

文件名：deepseek_multichat-7B.py

终端运行输入：python deepseek_multichat-7B.py

python 复制代码

import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import unicodedata
from typing import List

@torch.inference_mode()
def generate(
    model: AutoModelForCausalLM,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    max_new_tokens: int,
    temperature: float = 1.0
) -> List[int]:
    """
    Generate response from the model with attention_mask provided.
    """
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,  # 提供显式 attention mask
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.95,
    )
    return outputs[0].tolist()

def clean_input(user_input):
    """
    清理用户输入，去除不可见字符和多余的空格。
    """
    user_input = "".join(c for c in user_input if not unicodedata.category(c).startswith("C"))  # 移除控制字符
    return user_input.strip()  # 去除首尾空格

def clean_message_content(content):
    """
    清理消息内容，去除首尾空格并过滤非法输入
    """
    if not content or not isinstance(content, str):
        return ""
    return content.strip()  # 去除首尾空格

def build_prompt(messages, max_history=3):
    """
    Build prompt for the model, limiting the history to the most recent messages.
    """
    template = "The following is a conversation with an AI assistant. The assistant is helpful, knowledgeable, and polite:\n"
    for msg in messages[-max_history:]:
        content = clean_message_content(msg["content"])
        if not content:  # 跳过空内容
            continue
        template += f"{msg['role'].capitalize()}: {content}\n"
    template += "Assistant: "
    return template.strip()  # 确保返回值是字符串

if __name__ == "__main__":
    print("Initializing DeepSeek-R1 Service...")

    # Configuration
    ckpt_path = "/root/autodl-tmp/DeepSeek-R1-Distill-Qwen-7B"  # 模型所在的目录
    config_path = "/root/autodl-tmp/DeepSeek-R1-Distill-Qwen-7B/config.json"  # 配置文件路径

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(ckpt_path) 
    model = AutoModelForCausalLM.from_pretrained(
        ckpt_path,
        torch_dtype=torch.bfloat16,
    ).cuda()
    
    
    # Interactive session
    messages = []  # To maintain context
    while True:
        user_input = input("You: ").strip()  # 去除首尾空格
        user_input = clean_input(user_input)  # 清理不可见字符
        if not user_input or len(user_input.strip()) == 0:  # 检查无效输入
            print("Invalid input. Please type something meaningful!")
            continue

        if user_input.lower() in ["exit", "quit"]:
            print("Exiting conversation. Goodbye!")
            break

        # Append user input to context
        messages.append({"role": "user", "content": user_input})

        # Limit conversation history
        messages = messages[-10:]  # 只保留最近 10 条对话

        # Build prompt and tokenize
        prompt = build_prompt(messages)
        if not isinstance(prompt, str) or len(prompt.strip()) == 0:  # 确保 prompt 非空
            print("Error: Prompt is empty or invalid. Skipping this turn.")
            continue

        tokenized_prompt = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
        input_ids = tokenized_prompt["input_ids"].to("cuda")
        attention_mask = tokenized_prompt["attention_mask"].to("cuda")

        # Generate response
        max_new_tokens = 500 #150
        temperature = 0.7

        completion_tokens = generate(model, input_ids, attention_mask, max_new_tokens, temperature)
        completion = tokenizer.decode(
            completion_tokens[len(input_ids[0]):],  # 从输入长度截取生成部分
            skip_special_tokens=True
        ).split("User:")[0].strip()

        print(f"Assistant: {completion}")

        # Append assistant response to context
        messages.append({"role": "assistant", "content": completion})

5. 接下来是最让我头疼的pyqt图形化页面显示。

autodl上是不能直接运行出图形化页面的。可以解决有两种方式：第一种方法：模型利用flask打包成api接口，然后在自己电脑上访问这个接口，在自己电脑上显示出来图形化页面；第二种方法：利用X11转发功能，在autodl上运行图形化页面程序，然后在自己电脑上显示。我用的第二种方法，第一种方法是问了师兄，这个还没尝试，之后尝试了再记录。

一开始用Xlaunch不行，MobaXterm就不需要那么多配置。

如果X11 Forward 显示红叉可以参考：MobaXterm连接服务器，通过x11 Forwarding实现图形可视化（记录个人学习过程）_mobaxterm x11-CSDN博客

最重要的应该是那个X11UseLocalhost no，我用的pyqt，不需要localhost，也设置了no

pyqt页面代码：

python 复制代码

import sys
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtGui import *
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import unicodedata
from typing import List

# 原模型相关代码保持不变
# [此处插入原文件中的模型加载和生成相关函数，保持原样]

@torch.inference_mode()
def generate(
    model: AutoModelForCausalLM,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    max_new_tokens: int,
    temperature: float = 1.0
) -> List[int]:
    """
    Generate response from the model with attention_mask provided.
    """
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,  # 提供显式 attention mask
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        eos_token_id=model.config.eos_token_id,
        pad_token_id=model.config.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.95,
    )
    return outputs[0].tolist()

def clean_input(user_input):
    """
    清理用户输入，去除不可见字符和多余的空格。
    """
    user_input = "".join(c for c in user_input if not unicodedata.category(c).startswith("C"))  # 移除控制字符
    return user_input.strip()  # 去除首尾空格

def clean_message_content(content):
    """
    清理消息内容，去除首尾空格并过滤非法输入
    """
    if not content or not isinstance(content, str):
        return ""
    return content.strip()  # 去除首尾空格
    
def build_prompt(messages, max_history=3):
    """
    Build prompt for the model, limiting the history to the most recent messages.
    """
    template = "The following is a conversation with an AI assistant. The assistant is helpful, knowledgeable, and polite:\n"
    for msg in messages[-max_history:]:
        content = clean_message_content(msg["content"])
        if not content:  # 跳过空内容
            continue
        template += f"{msg['role'].capitalize()}: {content}\n"
    template += "Assistant: "
    return template.strip()  # 确保返回值是字符串


class ChatWindow(QWidget):
    def __init__(self, model, tokenizer):
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.messages = []
        self.init_ui()
        
    def init_ui(self):
        self.setWindowTitle('DeepSeek Chat')
        self.setGeometry(300, 300, 800, 600)

        # 创建界面组件
        self.history_area = QTextEdit()
        self.history_area.setReadOnly(True)
        self.input_area = QTextEdit()
        self.input_area.setMaximumHeight(100)
        self.send_btn = QPushButton('发送')
        self.clear_btn = QPushButton('清空历史')

        # 布局设置
        vbox = QVBoxLayout()
        vbox.addWidget(self.history_area)
        
        hbox = QHBoxLayout()
        hbox.addWidget(self.input_area)
        hbox.addWidget(self.send_btn)
        hbox.addWidget(self.clear_btn)
        
        vbox.addLayout(hbox)
        self.setLayout(vbox)

        # 信号连接
        self.send_btn.clicked.connect(self.on_send)
        self.clear_btn.clicked.connect(self.on_clear)
        self.input_area.installEventFilter(self)

    def eventFilter(self, obj, event):
        if obj is self.input_area and event.type() == QEvent.KeyPress:
            if event.key() == Qt.Key_Return and event.modifiers() & Qt.ControlModifier:
                self.on_send()
                return True
        return super().eventFilter(obj, event)

    def update_history(self, role, content):
        self.history_area.append(f"<b>{role.capitalize()}:</b> {content}<br>")
        self.history_area.verticalScrollBar().setValue(
            self.history_area.verticalScrollBar().maximum()
        )

    def on_clear(self):
        self.messages = []
        self.history_area.clear()

    def on_send(self):
        user_input = self.input_area.toPlainText().strip()
        if not user_input:
            return

        # 清理输入
        user_input = clean_input(user_input)
        self.input_area.clear()
        self.update_history("user", user_input)

        # 添加到消息历史
        self.messages.append({"role": "user", "content": user_input})
        self.messages = self.messages[-10:]

        # 在后台线程生成回复
        self.worker = Worker(self.model, self.tokenizer, self.messages)
        self.worker.finished.connect(self.handle_response)
        self.worker.start()

    def handle_response(self, completion):
        self.messages.append({"role": "assistant", "content": completion})
        self.update_history("assistant", completion)

class Worker(QThread):
    finished = pyqtSignal(str)

    def __init__(self, model, tokenizer, messages):
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.messages = messages

    def run(self):
        prompt = build_prompt(self.messages)
        tokenized_prompt = self.tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
        input_ids = tokenized_prompt["input_ids"].to("cuda")
        attention_mask = tokenized_prompt["attention_mask"].to("cuda")

        max_new_tokens = 500
        temperature = 0.7

        completion_tokens = generate(self.model, input_ids, attention_mask, max_new_tokens, temperature)
        completion = self.tokenizer.decode(
            completion_tokens[len(input_ids[0]):],
            skip_special_tokens=True
        ).split("User:")[0].strip()

        self.finished.emit(completion)

if __name__ == "__main__":
    # 初始化模型（原代码部分）
    print("Initializing DeepSeek-R1 Service...")
    ckpt_path = "/root/autodl-tmp/DeepSeek-R1-Distill-Qwen-7B"
    tokenizer = AutoTokenizer.from_pretrained(ckpt_path) 
    model = AutoModelForCausalLM.from_pretrained(
        ckpt_path,
        torch_dtype=torch.bfloat16,
    ).cuda()

    # 启动GUI
    app = QApplication(sys.argv)
    window = ChatWindow(model, tokenizer)
    window.show()
    sys.exit(app.exec_())