在使用llama-cpp加载模型的时候会和pyqt5冲突
先看代码1:单个脚本执行加载模型
from llama_cpp import Llama
llm = Llama(
model_path="./models/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf",
n_ctx=2048,
n_gpu_layers=0,
verbose=False
)
messages = [
{
"role": "system",
"content": "你是助手"
},
{"role": "user", "content": "你会干啥"}
]
stream = llm.create_chat_completion(
messages=messages,
stream=True,
temperature=0.7,
max_tokens=256
)
for chunk in stream:
delta = chunk["choices"][0]["delta"]
if "content" in delta:
print(delta["content"], end="", flush=True)
可以正常加载
但是使用PyQt5主线程中同步加载模型就会报错
报错代码:
# minimal_pyqt_llama_test.py
import sys
import logging
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QTextEdit
# --- 日志配置 ---
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s'
)
logger = logging.getLogger(__name__)
# --- 模型加载函数 ---
def attempt_load_llama(model_path, n_ctx=512, n_gpu_layers=0, verbose=False):
from llama_cpp import Llama
return Llama(model_path=model_path, n_ctx=n_ctx, verbose=verbose, n_gpu_layers=n_gpu_layers)
# --- 主窗口 ---
class MainWindow(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("PyQt5 + Llama Test")
layout = QVBoxLayout(self)
self.log_edit = QTextEdit()
self.log_edit.setReadOnly(True)
layout.addWidget(self.log_edit)
load_btn = QPushButton("Load Llama Model")
load_btn.clicked.connect(self.on_load_clicked)
layout.addWidget(load_btn)
self.model_path = "./models/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"
def on_load_clicked(self):
self.log_edit.append("Attempting to load model...")
try:
llm = attempt_load_llama(self.model_path)
self.log_edit.append("Model loaded successfully!")
except Exception as e:
self.log_edit.append(f"Error: {e}")
if __name__ == "__main__":
app = QApplication(sys.argv)
win = MainWindow()
win.show()
sys.exit(app.exec_())
报错:
Attempting to load model...
Error: exception: access violation reading 0x0000000000000000
我尝试了很多方法:
1、将llama降级
2、改为后台线程加载PyQt5 QThread
3、GUI进程 ↔ multiprocessing.Queue ↔ LLM推理进程
4、移除use_mmap=False参数
最后使用subprocess + JSON通信来替代multiprocessing
"""
LLM客户端 - GUI端的接口
使用subprocess管理LLM服务进程,通过stdin/stdout进行JSON通信
"""
import logging
import subprocess
import json
import threading
from typing import List, Dict, Optional
from PyQt5.QtCore import QObject, pyqtSignal
logger = logging.getLogger("VoiceAssistant")
class LLMClient(QObject):
"""LLM客户端 - 管理服务进程和通信"""
# Qt信号
service_ready = pyqtSignal()
service_error = pyqtSignal(str)
token_received = pyqtSignal(str)
response_finished = pyqtSignal(str)
error_occurred = pyqtSignal(str)
def __init__(self, model_path: str, n_ctx: int = 2048, n_gpu_layers: int = 0):
super().__init__()
self.model_path = model_path
self.n_ctx = n_ctx
self.n_gpu_layers = n_gpu_layers
self.process: Optional[subprocess.Popen] = None
self.reader_thread: Optional[threading.Thread] = None
self._stop_flag = False
self._is_ready = False
def start_service(self) -> bool:
"""启动LLM服务进程"""
if self.process and self.process.poll() is None:
logger.warning("LLM服务已在运行")
return True
try:
logger.info("启动LLM服务进程...")
# 获取Python解释器路径
import sys
python_exe = sys.executable
# 启动服务进程
self.process = subprocess.Popen(
[python_exe, "llm_server.py", self.model_path, str(self.n_ctx), str(self.n_gpu_layers)],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1, # 行缓冲
encoding='utf-8',
errors='replace' # 替换无法编码的字符,避免代理字符错误
)
# 启动读取线程
self._stop_flag = False
self.reader_thread = threading.Thread(target=self._read_output, daemon=True)
self.reader_thread.start()
logger.info("LLM服务进程已启动")
return True
except Exception as e:
error_msg = f"启动LLM服务失败: {e}"
logger.error(error_msg, exc_info=True)
self.service_error.emit(error_msg)
return False
def _read_output(self):
"""读取服务进程的输出(在单独线程中)"""
try:
for line in self.process.stdout:
if self._stop_flag:
break
line = line.strip()
if not line:
continue
try:
msg = json.loads(line)
msg_type = msg.get("type")
if msg_type == "ready":
self._is_ready = True
logger.info("LLM服务就绪!")
self.service_ready.emit()
elif msg_type == "token":
self.token_received.emit(msg.get("content", ""))
elif msg_type == "done":
self.response_finished.emit(msg.get("full_text", ""))
elif msg_type == "error":
self.error_occurred.emit(msg.get("content", "未知错误"))
elif msg_type == "pong":
pass # 心跳响应
except json.JSONDecodeError as e:
logger.error(f"JSON解析失败: {e}, line: {line}")
except Exception as e:
logger.error(f"处理响应失败: {e}")
except Exception as e:
if not self._stop_flag:
logger.error(f"读取输出失败: {e}", exc_info=True)
self.error_occurred.emit(f"读取输出失败: {e}")
def is_ready(self) -> bool:
"""检查服务是否就绪"""
return self._is_ready and self.process and self.process.poll() is None
def send_chat_request(self, messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: int = 256,
enable_tts: bool = False):
"""发送聊天请求"""
if not self.is_ready():
error_msg = "LLM服务未就绪"
logger.error(error_msg)
self.error_occurred.emit(error_msg)
return
try:
request = {
"type": "chat",
"messages": messages,
"config": {
"temperature": temperature,
"max_tokens": max_tokens
},
"enable_tts": enable_tts
}
# 发送到stdin(使用ASCII编码避免代理字符问题)
try:
request_json = json.dumps(request, ensure_ascii=True)
self.process.stdin.write(request_json + "\n")
self.process.stdin.flush()
logger.info(f"已发送聊天请求: {len(messages)} 条消息")
except UnicodeEncodeError as e:
logger.error(f"编码错误: {e}, 消息内容: {messages}")
raise
except Exception as e:
error_msg = f"发送请求失败: {e}"
logger.error(error_msg, exc_info=True)
self.error_occurred.emit(error_msg)
def ping(self):
"""发送心跳检查"""
if self.process and self.process.poll() is None:
try:
self.process.stdin.write(json.dumps({"type": "ping"}) + "\n")
self.process.stdin.flush()
except Exception as e:
logger.error(f"发送心跳失败: {e}")
def stop_service(self):
"""停止LLM服务"""
logger.info("停止LLM服务...")
try:
self._stop_flag = True
# 发送退出信号
if self.process and self.process.poll() is None:
try:
self.process.stdin.write(json.dumps({"type": "quit"}) + "\n")
self.process.stdin.flush()
except:
pass
# 等待进程结束
try:
self.process.wait(timeout=3)
except subprocess.TimeoutExpired:
logger.warning("进程未及时退出,强制终止")
self.process.terminate()
try:
self.process.wait(timeout=2)
except subprocess.TimeoutExpired:
logger.warning("强制杀死进程")
self.process.kill()
# 等待读取线程
if self.reader_thread and self.reader_thread.is_alive():
self.reader_thread.join(timeout=2)
self._is_ready = False
logger.info("LLM服务已停止")
except Exception as e:
logger.error(f"停止服务失败: {e}", exc_info=True)
def __del__(self):
"""析构函数 - 确保服务被停止"""
self.stop_service()
我没有找到报错的原因,感觉是底层初始化加载模型的时候冲突了,如果有好的方式可以提出来