python
# ====================== 1. 核心状态和模型定义 ======================
from typing import Dict, List, Any, Optional, Literal, TypedDict
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field
import json
import asyncio
from enum import Enum
class DeviceType(str, Enum):
"""设备类型枚举"""
LIGHT = "light"
SOCKET = "socket"
THERMOSTAT = "thermostat"
CAMERA = "camera"
CURTAIN = "curtain"
class DeviceState(str, Enum):
"""设备状态枚举"""
ON = "on"
OFF = "off"
DIM = "dim"
BRIGHT = "bright"
class SceneMode(str, Enum):
"""场景模式"""
HOME = "home"
AWAY = "away"
SLEEP = "sleep"
MOVIE = "movie"
PARTY = "party"
class VoiceCommandState(TypedDict):
"""语音命令处理状态"""
# 原始输入
raw_text: str
wake_word_detected: bool
timestamp: str
# 处理结果
embedding: Optional[List[float]]
scene_classification: Optional[str]
intent_result: Optional[Dict[str, Any]]
# 设备控制
target_devices: List[Dict[str, Any]]
device_commands: List[Dict[str, Any]]
execution_results: List[Dict[str, Any]]
# 对话上下文
conversation_history: List[Dict[str, Any]]
user_preferences: Dict[str, Any]
home_context: Dict[str, Any] # 家庭状态:谁在家、时间等
# ====================== 2. 智能家居设备管理 ======================
class SmartDevice:
"""智能设备基类"""
def __init__(self, device_id: str, device_type: DeviceType,
name: str, location: str):
self.device_id = device_id
self.device_type = device_type
self.name = name
self.location = location
self.state = DeviceState.OFF
self.properties = {}
async def turn_on(self):
"""打开设备"""
# 实际应调用IoT设备API
print(f"[IoT] 设备 {self.name} 打开")
self.state = DeviceState.ON
return {"success": True, "device": self.name, "state": "on"}
async def turn_off(self):
"""关闭设备"""
# 实际应调用IoT设备API
print(f"[IoT] 设备 {self.name} 关闭")
self.state = DeviceState.OFF
return {"success": True, "device": self.name, "state": "off"}
async def set_property(self, prop: str, value: Any):
"""设置设备属性"""
self.properties[prop] = value
return {"success": True, "property": prop, "value": value}
class SmartHomeManager:
"""智能家居管理器"""
def __init__(self):
self.devices: Dict[str, SmartDevice] = {}
self.scenes: Dict[SceneMode, List[Dict]] = {
SceneMode.HOME: [
{"device": "living_room_light", "action": "turn_on", "brightness": 80},
{"device": "living_room_socket", "action": "turn_on"},
{"device": "ac", "action": "set_temperature", "value": 23}
],
SceneMode.AWAY: [
{"device": "all_lights", "action": "turn_off"},
{"device": "all_sockets", "action": "turn_off"},
{"device": "security_camera", "action": "turn_on"}
]
}
def register_device(self, device: SmartDevice):
"""注册设备"""
self.devices[device.device_id] = device
async def execute_scene(self, scene_mode: SceneMode):
"""执行场景"""
if scene_mode not in self.scenes:
return {"success": False, "error": f"场景 {scene_mode} 不存在"}
results = []
for action in self.scenes[scene_mode]:
device_id = action["device"]
if device_id.startswith("all_"):
# 批量操作
device_type = device_id.replace("all_", "")
for device in self.devices.values():
if device.device_type.value == device_type:
if action["action"] == "turn_on":
result = await device.turn_on()
elif action["action"] == "turn_off":
result = await device.turn_off()
results.append(result)
else:
# 单个设备操作
device = self.devices.get(device_id)
if device:
# 根据action执行相应操作
if action["action"] == "turn_on":
result = await device.turn_on()
elif action["action"] == "turn_off":
result = await device.turn_off()
results.append(result)
return {"success": True, "scene": scene_mode, "results": results}
# ====================== 3. BGE嵌入模型包装器 ======================
class BGEEmbeddingModel:
"""BGE模型包装器"""
def __init__(self, model_path_en: str, model_path_zh: str,
vocab_path_en: str, vocab_path_zh: str):
# 加载ONNX模型和词汇表
self.tokenizer_en = SimpleTokenizer(vocab_path_en, language="en")
self.tokenizer_zh = SimpleTokenizer(vocab_path_zh, language="zh")
# 这里应该加载ONNX模型,为简化用模拟
async def embed_text(self, text: str, language: str = "auto") -> List[float]:
"""文本嵌入"""
# 根据日志,处理流程:分词 -> 模型推理
print(f"[BGEModel] 开始文本嵌入,输入文本: {text}")
if language == "auto":
language = self.detect_language(text)
if language == "en":
# 英文分词和嵌入
tokens = await self.tokenizer_en.tokenize(text)
embedding = await self.run_inference(tokens, language="en")
else:
# 中文分词和嵌入
tokens = await self.tokenizer_zh.tokenize(text)
embedding = await self.run_inference(tokens, language="zh")
return embedding
def detect_language(self, text: str) -> str:
"""检测语言"""
# 简单检测:如果包含中文字符则为中文
import re
if re.search(r'[\u4e00-\u9fff]', text):
return "zh"
return "en"
async def run_inference(self, tokens: List[int], language: str):
"""运行模型推理"""
# 模拟推理过程
print(f"[BGEModel] 运行{language}模型推理,tokens长度: {len(tokens)}")
# 这里应该是实际的ONNX模型推理
# 返回模拟的768维向量
import numpy as np
return np.random.randn(768).tolist()
class SimpleTokenizer:
"""简化分词器 - 基于日志中的实现"""
def __init__(self, vocab_path: str, language: str):
self.language = language
self.vocab_size = 30522 if language == "en" else 21128
print(f"[SimpleTokenizer] 初始化,语言: {language}, 词汇表大小: {self.vocab_size}")
async def tokenize(self, text: str) -> List[int]:
"""分词"""
print(f"[SimpleTokenizer] 开始分词,输入文本: {text}")
# 模拟分词过程
if self.language == "en":
# 英文分词
words = text.lower().split()
tokens = [101] # CLS token
for word in words:
# 简化处理,实际应该有词汇表映射
token_id = hash(word) % 1000 + 1000
tokens.append(token_id)
tokens.append(102) # SEP token
else:
# 中文分词
tokens = [101] # CLS token
# 简化处理,每个字符映射
for char in text:
token_id = ord(char) % 1000 + 1000
tokens.append(token_id)
tokens.append(102) # SEP token
print(f"[SimpleTokenizer] 分词结果token数量: {len(tokens)}")
return tokens
# ====================== 4. 场景分类器 ======================
class SceneClassifier:
"""场景分类器 - 基于BGE向量和LLM"""
def __init__(self, bge_model: BGEEmbeddingModel):
self.bge_model = bge_model
self.scene_vectors = self.load_scene_vectors()
def load_scene_vectors(self) -> Dict[str, List[float]]:
"""加载场景向量"""
# 预先计算好的场景向量
scenes = {
"IOT": "智能家居控制场景",
"MUSIC": "音乐播放场景",
"WEATHER": "天气查询场景",
"CONVERSATION": "对话场景",
"SETTING": "设置场景"
}
return {scene: [0.1 * i for i in range(768)] for scene in scenes}
async def classify(self, text: str) -> Dict[str, Any]:
"""场景分类"""
print(f"[SceneClassifier] 场景分类开始: input='{text}'")
# 1. 向量化
embedding = await self.bge_model.embed_text(text)
# 2. 向量相似度计算
scene_scores = {}
for scene_name, scene_vector in self.scene_vectors.items():
# 计算余弦相似度
similarity = self.cosine_similarity(embedding, scene_vector)
scene_scores[scene_name] = similarity
# 3. 选择最高分场景
best_scene = max(scene_scores, key=scene_scores.get)
print(f"[SceneClassifier] 场景初筛完成,场景: {best_scene}, 置信度: {scene_scores[best_scene]:.4f}")
return {
"scene": best_scene,
"confidence": scene_scores[best_scene],
"all_scores": scene_scores
}
def cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
"""计算余弦相似度"""
import numpy as np
v1 = np.array(vec1)
v2 = np.array(vec2)
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
# ====================== 5. LLM意图分类器 ======================
class IntentClassifier:
"""LLM意图分类器 - 严格输出JSON格式"""
def __init__(self, llm_client: Any):
self.llm = llm_client
self.system_prompt = self.build_system_prompt()
def build_system_prompt(self) -> str:
"""构建系统提示词 - 基于日志中的系统提示"""
# 这是从日志中提取的简化版本
return """你是一个专业的语音助手意图分类器。你的任务是分析用户语音命令,准确识别意图类别,并提取关键实体信息。
【重要:严格的JSON格式要求】
你只能输出一个有效的JSON对象。不能有任何自然语言、解释、操作反馈或对话内容。
你的响应必须以{开始,以}结束,必须可以被JSON解析器解析。
意图类别:
1. IOT_SCENE - 智能家居场景控制(如:"打开灯"、"我回家了"、"关闭插座")
2. CONVERSATIONAL - 对话聊天(如:"你好"、"谢谢")
3. SETTING - 设置调整(如:"调大音量")
4. QUERY - 信息查询(如:"天气怎么样")
输出格式:
{
"intent": "意图类型",
"category": "类别",
"complexity": "SIMPLE|COMPLEX|CONVERSATIONAL",
"entities": {
"action": "操作类型",
"device": "设备名称",
"scene_type": "场景类型"
},
"canExecute": true/false,
"reply": "仅当需要回复时的回复内容"
}
重要规则:
- 当用户说"我回家了"、"我要出门了"时,必须识别为IOT_SCENE
- 当可以立即执行时,canExecute=true
- 当需要更多信息时,canExecute=false,并在reply中询问
- 对话意图必须包含reply字段
"""
async def classify_intent(self, text: str, context: Dict = None) -> Dict[str, Any]:
"""意图分类"""
print(f"[IntentClassifier] 意图分类开始: input='{text}'")
# 构造消息
messages = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": text}
]
if context and context.get("conversation_history"):
messages.extend(context["conversation_history"][-3:]) # 最近3条历史
# 调用LLM
try:
response = await self.llm.chat.completions.create(
model="gpt-3.5-turbo", # 或使用本地模型
messages=messages,
temperature=0.1,
response_format={"type": "json_object"}
)
result_text = response.choices[0].message.content
result = json.loads(result_text)
print(f"[IntentClassifier] 意图分类结果: {result}")
return result
except Exception as e:
print(f"[IntentClassifier] 分类失败: {e}")
# 回退规则
return self.fallback_classification(text)
def fallback_classification(self, text: str) -> Dict[str, Any]:
"""回退规则分类"""
text_lower = text.lower()
# 设备控制关键词
device_keywords = {
"light": ["灯", "light", "lights"],
"socket": ["插座", "socket", "plug"],
"ac": ["空调", "ac", "air conditioner"]
}
# 动作关键词
action_keywords = {
"turn_on": ["打开", "开启", "开", "turn on", "switch on", "start"],
"turn_off": ["关闭", "关掉", "关", "turn off", "switch off", "stop"]
}
# 场景关键词
scene_keywords = {
"home": ["回家", "我回来了", "到家", "到家了", "i'm home", "going home"],
"away": ["出门", "外出", "离开", "出门了", "i'm leaving", "going out"]
}
# 检查场景
for scene_type, keywords in scene_keywords.items():
if any(keyword in text_lower for keyword in keywords):
return {
"intent": "IOT_SCENE",
"category": "IOT",
"complexity": "COMPLEX",
"entities": {
"scene_type": scene_type,
"action": "trigger"
},
"canExecute": True
}
# 检查设备控制
for device, device_words in device_keywords.items():
if any(word in text_lower for word in device_words):
for action, action_words in action_keywords.items():
if any(word in text_lower for word in action_words):
return {
"intent": "IOT_DEVICE_CONTROL",
"category": "IOT",
"complexity": "SIMPLE",
"entities": {
"device": device,
"action": action
},
"canExecute": True
}
# 默认对话
return {
"intent": "CONVERSATIONAL",
"category": "OTHER",
"complexity": "CONVERSATIONAL",
"entities": {},
"canExecute": False,
"reply": "抱歉,我没有理解您的指令。请再说一遍。"
}
# ====================== 6. LangGraph多智能体实现 ======================
class SmartHomeState(BaseModel):
"""智能家居状态"""
# 输入
user_input: str = ""
raw_audio: Optional[bytes] = None
# 处理中间结果
wake_word_detected: bool = False
transcribed_text: str = ""
embedding: List[float] = Field(default_factory=list)
scene_classification: Dict[str, Any] = Field(default_factory=dict)
intent_result: Dict[str, Any] = Field(default_factory=dict)
# 设备控制
target_devices: List[str] = Field(default_factory=list)
device_commands: List[Dict[str, Any]] = Field(default_factory=list)
execution_results: List[Dict[str, Any]] = Field(default_factory=list)
# 输出
response_text: str = ""
response_audio: Optional[bytes] = None
iot_actions: List[Dict[str, Any]] = Field(default_factory=list)
# 上下文
conversation_history: List[Dict[str, Any]] = Field(default_factory=list)
home_context: Dict[str, Any] = Field(default_factory=dict)
class Config:
extra = "allow"
class SmartHomeAgent:
"""智能家居智能体基类"""
def __init__(self, agent_id: str):
self.agent_id = agent_id
self.home_manager = SmartHomeManager()
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""处理状态并返回更新"""
raise NotImplementedError
class WakeWordDetector(SmartHomeAgent):
"""唤醒词检测智能体"""
def __init__(self):
super().__init__("wake_word_detector")
self.wake_words = ["小爱", "hey siri", "ok google", "alexa"]
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""检测唤醒词"""
if state.raw_audio:
# 实际应该用音频处理,这里简化为文本检测
# 假设已经转为文本
text = state.user_input.lower()
detected = any(wake_word in text for wake_word in self.wake_words)
if detected:
print(f"[WakeWordDetector] 检测到唤醒词: {text}")
return {"wake_word_detected": True, "wake_word": text}
return {"wake_word_detected": False}
class SpeechRecognizer(SmartHomeAgent):
"""语音识别智能体"""
def __init__(self):
super().__init__("speech_recognizer")
# 实际应该加载语音识别模型
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""语音转文本"""
if state.raw_audio:
# 模拟语音识别
# 实际应该调用ASR模型如VOSK、Whisper等
print(f"[SpeechRecognizer] 处理音频,长度: {len(state.raw_audio)}")
# 模拟识别结果
simulated_text = "打开客厅的灯"
return {"transcribed_text": simulated_text}
return {}
class SceneClassifierAgent(SmartHomeAgent):
"""场景分类智能体"""
def __init__(self, bge_model: BGEEmbeddingModel):
super().__init__("scene_classifier")
self.classifier = SceneClassifier(bge_model)
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""场景分类"""
text = state.transcribed_text or state.user_input
if not text:
return {}
result = await self.classifier.classify(text)
return {
"scene_classification": result,
"embedding": await self.classifier.bge_model.embed_text(text)
}
class IntentClassifierAgent(SmartHomeAgent):
"""意图分类智能体"""
def __init__(self, llm_client: Any):
super().__init__("intent_classifier")
self.classifier = IntentClassifier(llm_client)
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""意图分类"""
text = state.transcribed_text or state.user_input
if not text:
return {}
context = {
"conversation_history": state.conversation_history,
"home_context": state.home_context
}
result = await self.classifier.classify_intent(text, context)
return {"intent_result": result}
class DeviceControlAgent(SmartHomeAgent):
"""设备控制智能体"""
def __init__(self):
super().__init__("device_control")
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""设备控制"""
intent_result = state.intent_result
if not intent_result:
return {}
intent = intent_result.get("intent")
entities = intent_result.get("entities", {})
actions = []
results = []
if intent == "IOT_SCENE":
# 场景控制
scene_type = entities.get("scene_type")
if scene_type:
result = await self.home_manager.execute_scene(SceneMode(scene_type))
actions.append({
"type": "scene",
"scene": scene_type,
"result": result
})
results.append(result)
elif intent == "IOT_DEVICE_CONTROL":
# 单个设备控制
device = entities.get("device")
action = entities.get("action")
# 查找设备
target_device = None
for dev in self.home_manager.devices.values():
if dev.device_type.value == device or dev.name == device:
target_device = dev
break
if target_device and action:
if action == "turn_on":
result = await target_device.turn_on()
elif action == "turn_off":
result = await target_device.turn_off()
actions.append({
"type": "device",
"device": target_device.name,
"action": action,
"result": result
})
results.append(result)
return {
"iot_actions": actions,
"execution_results": results,
"response_text": self.generate_response(intent_result, results)
}
def generate_response(self, intent_result: Dict, results: List[Dict]) -> str:
"""生成响应文本"""
intent = intent_result.get("intent")
if intent == "IOT_SCENE":
scene_type = intent_result.get("entities", {}).get("scene_type")
if scene_type == "home":
return "欢迎回家!已为您打开回家模式。"
elif scene_type == "away":
return "已开启离家模式,祝您出行愉快!"
elif intent == "IOT_DEVICE_CONTROL":
device = intent_result.get("entities", {}).get("device")
action = intent_result.get("entities", {}).get("action")
if device and action:
device_names = {
"light": "灯",
"socket": "插座",
"ac": "空调"
}
action_names = {
"turn_on": "打开",
"turn_off": "关闭"
}
device_cn = device_names.get(device, device)
action_cn = action_names.get(action, action)
return f"已{action_cn}{device_cn}"
# 默认响应
if intent_result.get("complexity") == "CONVERSATIONAL":
return intent_result.get("reply", "我在听")
return "操作已执行"
class ConversationManager(SmartHomeAgent):
"""对话管理智能体"""
def __init__(self, llm_client: Any):
super().__init__("conversation_manager")
self.llm = llm_client
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""管理对话上下文"""
# 更新对话历史
user_text = state.transcribed_text or state.user_input
assistant_text = state.response_text
if user_text:
state.conversation_history.append({
"role": "user",
"content": user_text,
"timestamp": "2026-01-31 15:25:56"
})
if assistant_text:
state.conversation_history.append({
"role": "assistant",
"content": assistant_text,
"timestamp": "2026-01-31 15:25:57"
})
# 保持历史长度
if len(state.conversation_history) > 10:
state.conversation_history = state.conversation_history[-10:]
return {"conversation_history": state.conversation_history}
class TTSAgent(SmartHomeAgent):
"""文本转语音智能体"""
def __init__(self):
super().__init__("tts_agent")
async def process(self, state: SmartHomeState) -> Dict[str, Any]:
"""文本转语音"""
if state.response_text:
# 模拟TTS生成
print(f"[TTSAgent] 生成语音: {state.response_text}")
# 实际应该调用TTS模型如VITS、Tacotron等
return {"response_audio": b"模拟音频数据"}
return {}
# ====================== 7. 构建LangGraph ======================
class SmartHomeGraphBuilder:
"""智能家居图构建器"""
def __init__(self):
self.graph = StateGraph(SmartHomeState)
def build_graph(self) -> StateGraph:
"""构建智能家居处理图"""
# 创建智能体实例(实际应该依赖注入)
wake_word_detector = WakeWordDetector()
speech_recognizer = SpeechRecognizer()
# 模拟BGE模型和LLM客户端
bge_model = BGEEmbeddingModel(
model_path_en="models/bge_en.onnx",
model_path_zh="models/bge_zh.onnx",
vocab_path_en="vocab/en_vocab.txt",
vocab_path_zh="vocab/zh_vocab.txt"
)
scene_classifier = SceneClassifierAgent(bge_model)
# 模拟LLM客户端
class MockLLM:
async def chat(self):
return self
async def completions(self):
return self
async def create(self, **kwargs):
class MockResponse:
class Choice:
class Message:
content = '{"intent": "IOT_SCENE", "category": "IOT", "complexity": "COMPLEX", "entities": {"scene_type": "away", "action": "trigger"}, "canExecute": true}'
message = Message()
choices = [Choice()]
return MockResponse()
llm_client = MockLLM()
intent_classifier = IntentClassifierAgent(llm_client)
device_control = DeviceControlAgent()
conversation_manager = ConversationManager(llm_client)
tts_agent = TTSAgent()
# 添加节点
self.graph.add_node("wake_word_detection", self.create_node_func(wake_word_detector))
self.graph.add_node("speech_recognition", self.create_node_func(speech_recognizer))
self.graph.add_node("scene_classification", self.create_node_func(scene_classifier))
self.graph.add_node("intent_classification", self.create_node_func(intent_classifier))
self.graph.add_node("device_control", self.create_node_func(device_control))
self.graph.add_node("conversation_management", self.create_node_func(conversation_manager))
self.graph.add_node("tts_generation", self.create_node_func(tts_agent))
# 设置边
self.graph.set_entry_point("wake_word_detection")
self.graph.add_edge("wake_word_detection", "speech_recognition")
self.graph.add_edge("speech_recognition", "scene_classification")
self.graph.add_edge("scene_classification", "intent_classification")
self.graph.add_edge("intent_classification", "device_control")
self.graph.add_edge("device_control", "conversation_management")
self.graph.add_edge("conversation_management", "tts_generation")
self.graph.add_edge("tts_generation", END)
# 添加条件边(处理未唤醒情况)
self.graph.add_conditional_edges(
"wake_word_detection",
self.should_process_speech,
{
"process": "speech_recognition",
"skip": END
}
)
return self.graph
def create_node_func(self, agent: SmartHomeAgent):
"""创建节点函数"""
async def node_func(state: SmartHomeState) -> SmartHomeState:
result = await agent.process(state)
# 更新状态
for key, value in result.items():
setattr(state, key, value)
return state
return node_func
def should_process_speech(self, state: SmartHomeState) -> str:
"""判断是否继续处理语音"""
if state.wake_word_detected:
return "process"
return "skip"
# ====================== 8. 边缘计算部署方案 ======================
class EdgeDeploymentManager:
"""边缘计算部署管理器"""
def __init__(self):
self.models = {}
self.devices = {}
def load_models_for_edge(self):
"""为边缘设备加载轻量化模型"""
model_config = {
"asr": {
"name": "whisper-tiny",
"format": "onnx",
"size": "75MB",
"language": ["zh", "en"]
},
"embedding": {
"name": "bge-m3-small",
"format": "onnx",
"size": "150MB",
"dimension": 768
},
"llm": {
"name": "qwen-1.8b",
"format": "onnx",
"size": "1.8GB",
"quantization": "int8"
},
"tts": {
"name": "vits-zh",
"format": "onnx",
"size": "50MB"
}
}
print("加载边缘计算模型...")
for model_type, config in model_config.items():
print(f" {model_type}: {config['name']} ({config['size']})")
return model_config
def optimize_for_edge(self):
"""边缘优化配置"""
optimizations = {
"cpu_usage": "限制在80%以下",
"memory": "使用内存映射减少RAM占用",
"power": "动态频率调整以省电",
"network": "本地优先,断网仍可用",
"latency": "目标响应时间<500ms"
}
return optimizations
def deploy_to_device(self, device_type: str):
"""部署到特定设备"""
deployment_guides = {
"raspberry_pi": {
"os": "Raspbian/Raspberry Pi OS",
"python": "3.9+",
"dependencies": [
"onnxruntime",
"numpy",
"pyaudio",
"langgraph"
],
"setup": [
"1. 安装操作系统",
"2. 安装Python依赖",
"3. 下载模型文件到本地",
"4. 配置智能家居设备连接",
"5. 设置开机自启动"
]
},
"android_tv": {
"os": "Android 9.0+",
"runtime": "Termux或Android NDK",
"dependencies": ["onnxruntime-android"],
"setup": [
"1. 安装Termux",
"2. 在Termux中安装Python",
"3. 将应用打包为APK",
"4. 申请必要的权限"
]
},
"smart_speaker": {
"os": "Linux嵌入式系统",
"sdk": "厂商SDK",
"integration": "通过HTTP/MQTT与主系统通信"
}
}
return deployment_guides.get(device_type, {})
# ====================== 9. 语音助手集成方案 ======================
class VoiceAssistantIntegration:
"""语音助手与大模型集成"""
def __init__(self):
self.tts_engine = None
self.asr_engine = None
def integration_patterns(self):
"""集成模式"""
patterns = {
"pattern_1": {
"name": "完全边缘计算",
"description": "所有模型在设备本地运行",
"pros": ["隐私性好", "离线可用", "低延迟"],
"cons": ["设备要求高", "模型能力有限"],
"适用设备": "高端智能音箱、家庭服务器"
},
"pattern_2": {
"name": "边缘+云端协同",
"description": "简单任务本地处理,复杂任务云端处理",
"pros": ["平衡性能与成本", "可扩展性强"],
"cons": ["需要网络", "隐私风险"],
"适用设备": "大多数智能家居设备"
},
"pattern_3": {
"name": "分层处理架构",
"description": """
第一层:唤醒词检测(始终本地)
第二层:简单命令识别(本地LLM)
第三层:复杂对话(云端大模型)
""",
"适用场景": "混合型智能助手"
}
}
return patterns
def speech_pipeline(self):
"""语音处理流水线"""
pipeline = [
{
"step": "语音采集",
"技术": "麦克风阵列、波束成形、回声消除",
"输出": "PCM音频流"
},
{
"step": "唤醒词检测",
"技术": "Snowboy、Porcupine、自定义模型",
"输出": "是否唤醒"
},
{
"step": "语音识别(ASR)",
"技术": "Whisper、VOSK、DeepSpeech",
"输出": "文本"
},
{
"step": "语义理解(NLU)",
"技术": "BGE嵌入 + LLM意图分类",
"输出": "结构化意图"
},
{
"step": "任务执行",
"技术": "LangGraph多智能体",
"输出": "操作结果"
},
{
"step": "语音合成(TTS)",
"技术": "VITS、Tacotron、Edge-TTS",
"输出": "语音响应"
}
]
return pipeline
def llm_usage_strategies(self):
"""大模型使用策略"""
strategies = {
"intent_classification": {
"模型": "小型模型(1-3B参数)",
"部署": "本地ONNX",
"任务": "仅分类意图,不生成对话"
},
"conversation": {
"模型": "云端大模型(70B+)",
"部署": "API调用",
"任务": "复杂对话、知识问答"
},
"hybrid": {
"模型": "本地小模型 + 云端大模型",
"策略": "本地处理简单请求,复杂请求转发云端"
}
}
return strategies
# ====================== 10. 完整系统入口 ======================
class SmartHomeVoiceAssistant:
"""智能家居语音助手主类"""
def __init__(self, deployment_mode: str = "edge"):
self.deployment_mode = deployment_mode
self.graph_builder = SmartHomeGraphBuilder()
self.home_manager = SmartHomeManager()
self.edge_manager = EdgeDeploymentManager()
self.integration = VoiceAssistantIntegration()
# 初始化设备
self._initialize_devices()
# 构建处理图
self.graph = self.graph_builder.build_graph()
self.compiled_graph = self.graph.compile()
# 加载模型
self.model_config = self.edge_manager.load_models_for_edge()
def _initialize_devices(self):
"""初始化智能设备"""
devices = [
SmartDevice("living_room_light", DeviceType.LIGHT, "客厅灯", "living_room"),
SmartDevice("bedroom_light", DeviceType.LIGHT, "卧室灯", "bedroom"),
SmartDevice("living_room_socket", DeviceType.SOCKET, "客厅插座", "living_room"),
SmartDevice("kitchen_socket", DeviceType.SOCKET, "厨房插座", "kitchen"),
SmartDevice("ac", DeviceType.THERMOSTAT, "空调", "living_room")
]
for device in devices:
self.home_manager.register_device(device)
async def process_voice_command(self, audio_data: bytes = None, text: str = None):
"""处理语音命令"""
# 初始状态
state = SmartHomeState()
if audio_data:
state.raw_audio = audio_data
state.user_input = "" # 将通过ASR识别
elif text:
state.user_input = text
state.raw_audio = None
# 执行处理图
result = await self.compiled_graph.ainvoke(state)
# 记录日志
self.log_command(result)
return result
def log_command(self, state: SmartHomeState):
"""记录命令日志"""
log_entry = {
"timestamp": "2026-01-31 15:25:56.089",
"input": state.user_input or "[语音输入]",
"wake_word": state.wake_word_detected,
"scene": state.scene_classification.get("scene"),
"intent": state.intent_result.get("intent"),
"response": state.response_text,
"actions": state.iot_actions
}
print(f"[系统日志] {json.dumps(log_entry, ensure_ascii=False, indent=2)}")
def get_system_info(self):
"""获取系统信息"""
return {
"deployment_mode": self.deployment_mode,
"devices_count": len(self.home_manager.devices),
"scenes_available": list(self.home_manager.scenes.keys()),
"models_loaded": list(self.model_config.keys()),
"optimizations": self.edge_manager.optimize_for_edge()
}
# ====================== 11. 使用示例和部署脚本 ======================
async def main():
"""主函数示例"""
print("="*60)
print("智能家居语音助手系统")
print("="*60)
# 1. 创建助手实例
assistant = SmartHomeVoiceAssistant(deployment_mode="edge")
# 2. 显示系统信息
info = assistant.get_system_info()
print("\n系统配置:")
for key, value in info.items():
print(f" {key}: {value}")
# 3. 显示集成方案
integration = assistant.integration
print("\n集成模式:")
patterns = integration.integration_patterns()
for pattern_id, pattern in patterns.items():
print(f"\n{pattern['name']}:")
print(f" {pattern['description']}")
# 4. 测试语音命令
print("\n测试语音命令处理...")
test_commands = [
"小兴,小兴,打开客厅的灯",
"关闭卧室灯",
"我要出门了",
"我回家了",
"打开空调",
"今天的天气怎么样"
]
for command in test_commands:
print(f"\n> 用户: {command}")
result = await assistant.process_voice_command(text=command)
print(f"< 助手: {result.response_text}")
if result.iot_actions:
print(f" 执行操作: {result.iot_actions}")
# 5. 显示部署指南
print("\n" + "="*60)
print("边缘计算部署指南")
print("="*60)
edge_manager = EdgeDeploymentManager()
raspberry_pi_guide = edge_manager.deploy_to_device("raspberry_pi")
print("\n树莓派部署步骤:")
for step in raspberry_pi_guide.get("setup", []):
print(f" {step}")
if __name__ == "__main__":
import asyncio
# 运行示例
asyncio.run(main())
# 生成部署脚本
print("\n" + "="*60)
print("自动部署脚本")
print("="*60)
deploy_script = """#!/bin/bash
# 智能家居语音助手部署脚本
# 适用于树莓派/ARM设备
echo "安装系统依赖..."
sudo apt-get update
sudo apt-get install -y python3.9 python3-pip python3-venv
sudo apt-get install -y portaudio19-dev libasound2-dev
echo "创建Python虚拟环境..."
python3 -m venv ~/smart_home_env
source ~/smart_home_env/bin/activate
echo "安装Python包..."
pip install --upgrade pip
pip install langgraph onnxruntime pyaudio numpy
echo "创建项目目录..."
mkdir -p ~/smart_home_assistant/{models,config,logs}
echo "下载模型文件..."
# 这里应该从服务器下载预训练模型
# wget -O ~/smart_home_assistant/models/bge.onnx https://example.com/models/bge.onnx
# wget -O ~/smart_home_assistant/models/whisper.onnx https://example.com/models/whisper.onnx
echo "配置服务..."
cat > ~/smart_home_assistant/config/settings.yaml << EOF
# 智能家居配置
mqtt:
broker: "localhost"
port: 1883
username: "admin"
password: "password"
devices:
- name: "living_room_light"
type: "light"
location: "living_room"
controller: "tuya"
device_id: "xxx"
- name: "smart_socket"
type: "socket"
location: "kitchen"
controller: "tuya"
device_id: "yyy"
EOF
echo "创建启动脚本..."
cat > ~/smart_home_assistant/start.sh << 'EOS'
#!/bin/bash
source ~/smart_home_env/bin/activate
cd ~/smart_home_assistant
python main.py --mode=edge --log-level=INFO
EOS
chmod +x ~/smart_home_assistant/start.sh
echo "设置开机自启..."
sudo cat > /etc/systemd/system/smart-home.service << EOF
[Unit]
Description=Smart Home Voice Assistant
After=network.target
[Service]
Type=simple
User=$USER
WorkingDirectory=/home/$USER/smart_home_assistant
ExecStart=/bin/bash /home/$USER/smart_home_assistant/start.sh
Restart=always
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable smart-home.service
echo "部署完成!"
echo "启动服务: sudo systemctl start smart-home.service"
echo "查看日志: sudo journalctl -u smart-home.service -f"
"""
print(deploy_script)
本文介绍了一个基于LangGraph的智能家居语音助手系统实现。该系统采用多智能体架构,包含以下核心模块:
-
设备管理模块:定义了智能设备基类(SmartDevice)和家居管理器(SmartHomeManager),支持设备注册和场景控制功能。
-
语音处理流水线:包含唤醒词检测、语音识别、场景分类、意图识别等处理节点,使用BGE嵌入模型和LLM进行语义理解。
-
LangGraph实现:通过状态图(StateGraph)将各智能体连接成处理流程,包括WakeWordDetector、SpeechRecognizer等节点类。
-
边缘计算部署:提供了模型优化方案和针对不同设备的部署指南。
系统支持中文和英文语音指令,能够处理"打开灯"、"我回家了"等智能家居控制场景,并生成语音响应。文章还包含完整的系统架构设计、类定义和使用示例。