智能家居多智能体系统LangGraph实现

python 复制代码
# ====================== 1. 核心状态和模型定义 ======================
from typing import Dict, List, Any, Optional, Literal, TypedDict
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field
import json
import asyncio
from enum import Enum

class DeviceType(str, Enum):
    """设备类型枚举"""
    LIGHT = "light"
    SOCKET = "socket"
    THERMOSTAT = "thermostat"
    CAMERA = "camera"
    CURTAIN = "curtain"

class DeviceState(str, Enum):
    """设备状态枚举"""
    ON = "on"
    OFF = "off"
    DIM = "dim"
    BRIGHT = "bright"

class SceneMode(str, Enum):
    """场景模式"""
    HOME = "home"
    AWAY = "away"
    SLEEP = "sleep"
    MOVIE = "movie"
    PARTY = "party"

class VoiceCommandState(TypedDict):
    """语音命令处理状态"""
    # 原始输入
    raw_text: str
    wake_word_detected: bool
    timestamp: str
    
    # 处理结果
    embedding: Optional[List[float]]
    scene_classification: Optional[str]
    intent_result: Optional[Dict[str, Any]]
    
    # 设备控制
    target_devices: List[Dict[str, Any]]
    device_commands: List[Dict[str, Any]]
    execution_results: List[Dict[str, Any]]
    
    # 对话上下文
    conversation_history: List[Dict[str, Any]]
    user_preferences: Dict[str, Any]
    home_context: Dict[str, Any]  # 家庭状态:谁在家、时间等


# ====================== 2. 智能家居设备管理 ======================
class SmartDevice:
    """智能设备基类"""
    def __init__(self, device_id: str, device_type: DeviceType, 
                 name: str, location: str):
        self.device_id = device_id
        self.device_type = device_type
        self.name = name
        self.location = location
        self.state = DeviceState.OFF
        self.properties = {}
        
    async def turn_on(self):
        """打开设备"""
        # 实际应调用IoT设备API
        print(f"[IoT] 设备 {self.name} 打开")
        self.state = DeviceState.ON
        return {"success": True, "device": self.name, "state": "on"}
    
    async def turn_off(self):
        """关闭设备"""
        # 实际应调用IoT设备API
        print(f"[IoT] 设备 {self.name} 关闭")
        self.state = DeviceState.OFF
        return {"success": True, "device": self.name, "state": "off"}
    
    async def set_property(self, prop: str, value: Any):
        """设置设备属性"""
        self.properties[prop] = value
        return {"success": True, "property": prop, "value": value}


class SmartHomeManager:
    """智能家居管理器"""
    def __init__(self):
        self.devices: Dict[str, SmartDevice] = {}
        self.scenes: Dict[SceneMode, List[Dict]] = {
            SceneMode.HOME: [
                {"device": "living_room_light", "action": "turn_on", "brightness": 80},
                {"device": "living_room_socket", "action": "turn_on"},
                {"device": "ac", "action": "set_temperature", "value": 23}
            ],
            SceneMode.AWAY: [
                {"device": "all_lights", "action": "turn_off"},
                {"device": "all_sockets", "action": "turn_off"},
                {"device": "security_camera", "action": "turn_on"}
            ]
        }
        
    def register_device(self, device: SmartDevice):
        """注册设备"""
        self.devices[device.device_id] = device
        
    async def execute_scene(self, scene_mode: SceneMode):
        """执行场景"""
        if scene_mode not in self.scenes:
            return {"success": False, "error": f"场景 {scene_mode} 不存在"}
        
        results = []
        for action in self.scenes[scene_mode]:
            device_id = action["device"]
            if device_id.startswith("all_"):
                # 批量操作
                device_type = device_id.replace("all_", "")
                for device in self.devices.values():
                    if device.device_type.value == device_type:
                        if action["action"] == "turn_on":
                            result = await device.turn_on()
                        elif action["action"] == "turn_off":
                            result = await device.turn_off()
                        results.append(result)
            else:
                # 单个设备操作
                device = self.devices.get(device_id)
                if device:
                    # 根据action执行相应操作
                    if action["action"] == "turn_on":
                        result = await device.turn_on()
                    elif action["action"] == "turn_off":
                        result = await device.turn_off()
                    results.append(result)
                    
        return {"success": True, "scene": scene_mode, "results": results}


# ====================== 3. BGE嵌入模型包装器 ======================
class BGEEmbeddingModel:
    """BGE模型包装器"""
    
    def __init__(self, model_path_en: str, model_path_zh: str, 
                 vocab_path_en: str, vocab_path_zh: str):
        # 加载ONNX模型和词汇表
        self.tokenizer_en = SimpleTokenizer(vocab_path_en, language="en")
        self.tokenizer_zh = SimpleTokenizer(vocab_path_zh, language="zh")
        # 这里应该加载ONNX模型,为简化用模拟
        
    async def embed_text(self, text: str, language: str = "auto") -> List[float]:
        """文本嵌入"""
        # 根据日志,处理流程:分词 -> 模型推理
        print(f"[BGEModel] 开始文本嵌入,输入文本: {text}")
        
        if language == "auto":
            language = self.detect_language(text)
        
        if language == "en":
            # 英文分词和嵌入
            tokens = await self.tokenizer_en.tokenize(text)
            embedding = await self.run_inference(tokens, language="en")
        else:
            # 中文分词和嵌入
            tokens = await self.tokenizer_zh.tokenize(text)
            embedding = await self.run_inference(tokens, language="zh")
            
        return embedding
    
    def detect_language(self, text: str) -> str:
        """检测语言"""
        # 简单检测:如果包含中文字符则为中文
        import re
        if re.search(r'[\u4e00-\u9fff]', text):
            return "zh"
        return "en"
    
    async def run_inference(self, tokens: List[int], language: str):
        """运行模型推理"""
        # 模拟推理过程
        print(f"[BGEModel] 运行{language}模型推理,tokens长度: {len(tokens)}")
        # 这里应该是实际的ONNX模型推理
        # 返回模拟的768维向量
        import numpy as np
        return np.random.randn(768).tolist()


class SimpleTokenizer:
    """简化分词器 - 基于日志中的实现"""
    def __init__(self, vocab_path: str, language: str):
        self.language = language
        self.vocab_size = 30522 if language == "en" else 21128
        print(f"[SimpleTokenizer] 初始化,语言: {language}, 词汇表大小: {self.vocab_size}")
        
    async def tokenize(self, text: str) -> List[int]:
        """分词"""
        print(f"[SimpleTokenizer] 开始分词,输入文本: {text}")
        
        # 模拟分词过程
        if self.language == "en":
            # 英文分词
            words = text.lower().split()
            tokens = [101]  # CLS token
            for word in words:
                # 简化处理,实际应该有词汇表映射
                token_id = hash(word) % 1000 + 1000
                tokens.append(token_id)
            tokens.append(102)  # SEP token
        else:
            # 中文分词
            tokens = [101]  # CLS token
            # 简化处理,每个字符映射
            for char in text:
                token_id = ord(char) % 1000 + 1000
                tokens.append(token_id)
            tokens.append(102)  # SEP token
            
        print(f"[SimpleTokenizer] 分词结果token数量: {len(tokens)}")
        return tokens


# ====================== 4. 场景分类器 ======================
class SceneClassifier:
    """场景分类器 - 基于BGE向量和LLM"""
    
    def __init__(self, bge_model: BGEEmbeddingModel):
        self.bge_model = bge_model
        self.scene_vectors = self.load_scene_vectors()
        
    def load_scene_vectors(self) -> Dict[str, List[float]]:
        """加载场景向量"""
        # 预先计算好的场景向量
        scenes = {
            "IOT": "智能家居控制场景",
            "MUSIC": "音乐播放场景", 
            "WEATHER": "天气查询场景",
            "CONVERSATION": "对话场景",
            "SETTING": "设置场景"
        }
        return {scene: [0.1 * i for i in range(768)] for scene in scenes}
    
    async def classify(self, text: str) -> Dict[str, Any]:
        """场景分类"""
        print(f"[SceneClassifier] 场景分类开始: input='{text}'")
        
        # 1. 向量化
        embedding = await self.bge_model.embed_text(text)
        
        # 2. 向量相似度计算
        scene_scores = {}
        for scene_name, scene_vector in self.scene_vectors.items():
            # 计算余弦相似度
            similarity = self.cosine_similarity(embedding, scene_vector)
            scene_scores[scene_name] = similarity
        
        # 3. 选择最高分场景
        best_scene = max(scene_scores, key=scene_scores.get)
        
        print(f"[SceneClassifier] 场景初筛完成,场景: {best_scene}, 置信度: {scene_scores[best_scene]:.4f}")
        
        return {
            "scene": best_scene,
            "confidence": scene_scores[best_scene],
            "all_scores": scene_scores
        }
    
    def cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
        """计算余弦相似度"""
        import numpy as np
        v1 = np.array(vec1)
        v2 = np.array(vec2)
        return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))


# ====================== 5. LLM意图分类器 ======================
class IntentClassifier:
    """LLM意图分类器 - 严格输出JSON格式"""
    
    def __init__(self, llm_client: Any):
        self.llm = llm_client
        self.system_prompt = self.build_system_prompt()
        
    def build_system_prompt(self) -> str:
        """构建系统提示词 - 基于日志中的系统提示"""
        # 这是从日志中提取的简化版本
        return """你是一个专业的语音助手意图分类器。你的任务是分析用户语音命令,准确识别意图类别,并提取关键实体信息。

【重要:严格的JSON格式要求】
你只能输出一个有效的JSON对象。不能有任何自然语言、解释、操作反馈或对话内容。
你的响应必须以{开始,以}结束,必须可以被JSON解析器解析。

意图类别:
1. IOT_SCENE - 智能家居场景控制(如:"打开灯"、"我回家了"、"关闭插座")
2. CONVERSATIONAL - 对话聊天(如:"你好"、"谢谢")
3. SETTING - 设置调整(如:"调大音量")
4. QUERY - 信息查询(如:"天气怎么样")

输出格式:
{
  "intent": "意图类型",
  "category": "类别",
  "complexity": "SIMPLE|COMPLEX|CONVERSATIONAL",
  "entities": {
    "action": "操作类型",
    "device": "设备名称",
    "scene_type": "场景类型"
  },
  "canExecute": true/false,
  "reply": "仅当需要回复时的回复内容"
}

重要规则:
- 当用户说"我回家了"、"我要出门了"时,必须识别为IOT_SCENE
- 当可以立即执行时,canExecute=true
- 当需要更多信息时,canExecute=false,并在reply中询问
- 对话意图必须包含reply字段
"""
    
    async def classify_intent(self, text: str, context: Dict = None) -> Dict[str, Any]:
        """意图分类"""
        print(f"[IntentClassifier] 意图分类开始: input='{text}'")
        
        # 构造消息
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": text}
        ]
        
        if context and context.get("conversation_history"):
            messages.extend(context["conversation_history"][-3:])  # 最近3条历史
        
        # 调用LLM
        try:
            response = await self.llm.chat.completions.create(
                model="gpt-3.5-turbo",  # 或使用本地模型
                messages=messages,
                temperature=0.1,
                response_format={"type": "json_object"}
            )
            
            result_text = response.choices[0].message.content
            result = json.loads(result_text)
            
            print(f"[IntentClassifier] 意图分类结果: {result}")
            return result
            
        except Exception as e:
            print(f"[IntentClassifier] 分类失败: {e}")
            # 回退规则
            return self.fallback_classification(text)
    
    def fallback_classification(self, text: str) -> Dict[str, Any]:
        """回退规则分类"""
        text_lower = text.lower()
        
        # 设备控制关键词
        device_keywords = {
            "light": ["灯", "light", "lights"],
            "socket": ["插座", "socket", "plug"],
            "ac": ["空调", "ac", "air conditioner"]
        }
        
        # 动作关键词
        action_keywords = {
            "turn_on": ["打开", "开启", "开", "turn on", "switch on", "start"],
            "turn_off": ["关闭", "关掉", "关", "turn off", "switch off", "stop"]
        }
        
        # 场景关键词
        scene_keywords = {
            "home": ["回家", "我回来了", "到家", "到家了", "i'm home", "going home"],
            "away": ["出门", "外出", "离开", "出门了", "i'm leaving", "going out"]
        }
        
        # 检查场景
        for scene_type, keywords in scene_keywords.items():
            if any(keyword in text_lower for keyword in keywords):
                return {
                    "intent": "IOT_SCENE",
                    "category": "IOT",
                    "complexity": "COMPLEX",
                    "entities": {
                        "scene_type": scene_type,
                        "action": "trigger"
                    },
                    "canExecute": True
                }
        
        # 检查设备控制
        for device, device_words in device_keywords.items():
            if any(word in text_lower for word in device_words):
                for action, action_words in action_keywords.items():
                    if any(word in text_lower for word in action_words):
                        return {
                            "intent": "IOT_DEVICE_CONTROL",
                            "category": "IOT",
                            "complexity": "SIMPLE",
                            "entities": {
                                "device": device,
                                "action": action
                            },
                            "canExecute": True
                        }
        
        # 默认对话
        return {
            "intent": "CONVERSATIONAL",
            "category": "OTHER",
            "complexity": "CONVERSATIONAL",
            "entities": {},
            "canExecute": False,
            "reply": "抱歉,我没有理解您的指令。请再说一遍。"
        }


# ====================== 6. LangGraph多智能体实现 ======================
class SmartHomeState(BaseModel):
    """智能家居状态"""
    # 输入
    user_input: str = ""
    raw_audio: Optional[bytes] = None
    
    # 处理中间结果
    wake_word_detected: bool = False
    transcribed_text: str = ""
    embedding: List[float] = Field(default_factory=list)
    scene_classification: Dict[str, Any] = Field(default_factory=dict)
    intent_result: Dict[str, Any] = Field(default_factory=dict)
    
    # 设备控制
    target_devices: List[str] = Field(default_factory=list)
    device_commands: List[Dict[str, Any]] = Field(default_factory=list)
    execution_results: List[Dict[str, Any]] = Field(default_factory=list)
    
    # 输出
    response_text: str = ""
    response_audio: Optional[bytes] = None
    iot_actions: List[Dict[str, Any]] = Field(default_factory=list)
    
    # 上下文
    conversation_history: List[Dict[str, Any]] = Field(default_factory=list)
    home_context: Dict[str, Any] = Field(default_factory=dict)
    
    class Config:
        extra = "allow"


class SmartHomeAgent:
    """智能家居智能体基类"""
    
    def __init__(self, agent_id: str):
        self.agent_id = agent_id
        self.home_manager = SmartHomeManager()
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """处理状态并返回更新"""
        raise NotImplementedError


class WakeWordDetector(SmartHomeAgent):
    """唤醒词检测智能体"""
    
    def __init__(self):
        super().__init__("wake_word_detector")
        self.wake_words = ["小爱", "hey siri", "ok google", "alexa"]
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """检测唤醒词"""
        if state.raw_audio:
            # 实际应该用音频处理,这里简化为文本检测
            # 假设已经转为文本
            text = state.user_input.lower()
            detected = any(wake_word in text for wake_word in self.wake_words)
            
            if detected:
                print(f"[WakeWordDetector] 检测到唤醒词: {text}")
                return {"wake_word_detected": True, "wake_word": text}
        
        return {"wake_word_detected": False}


class SpeechRecognizer(SmartHomeAgent):
    """语音识别智能体"""
    
    def __init__(self):
        super().__init__("speech_recognizer")
        # 实际应该加载语音识别模型
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """语音转文本"""
        if state.raw_audio:
            # 模拟语音识别
            # 实际应该调用ASR模型如VOSK、Whisper等
            print(f"[SpeechRecognizer] 处理音频,长度: {len(state.raw_audio)}")
            
            # 模拟识别结果
            simulated_text = "打开客厅的灯"
            return {"transcribed_text": simulated_text}
        
        return {}


class SceneClassifierAgent(SmartHomeAgent):
    """场景分类智能体"""
    
    def __init__(self, bge_model: BGEEmbeddingModel):
        super().__init__("scene_classifier")
        self.classifier = SceneClassifier(bge_model)
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """场景分类"""
        text = state.transcribed_text or state.user_input
        if not text:
            return {}
        
        result = await self.classifier.classify(text)
        return {
            "scene_classification": result,
            "embedding": await self.classifier.bge_model.embed_text(text)
        }


class IntentClassifierAgent(SmartHomeAgent):
    """意图分类智能体"""
    
    def __init__(self, llm_client: Any):
        super().__init__("intent_classifier")
        self.classifier = IntentClassifier(llm_client)
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """意图分类"""
        text = state.transcribed_text or state.user_input
        if not text:
            return {}
        
        context = {
            "conversation_history": state.conversation_history,
            "home_context": state.home_context
        }
        
        result = await self.classifier.classify_intent(text, context)
        return {"intent_result": result}


class DeviceControlAgent(SmartHomeAgent):
    """设备控制智能体"""
    
    def __init__(self):
        super().__init__("device_control")
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """设备控制"""
        intent_result = state.intent_result
        if not intent_result:
            return {}
        
        intent = intent_result.get("intent")
        entities = intent_result.get("entities", {})
        
        actions = []
        results = []
        
        if intent == "IOT_SCENE":
            # 场景控制
            scene_type = entities.get("scene_type")
            if scene_type:
                result = await self.home_manager.execute_scene(SceneMode(scene_type))
                actions.append({
                    "type": "scene",
                    "scene": scene_type,
                    "result": result
                })
                results.append(result)
                
        elif intent == "IOT_DEVICE_CONTROL":
            # 单个设备控制
            device = entities.get("device")
            action = entities.get("action")
            
            # 查找设备
            target_device = None
            for dev in self.home_manager.devices.values():
                if dev.device_type.value == device or dev.name == device:
                    target_device = dev
                    break
            
            if target_device and action:
                if action == "turn_on":
                    result = await target_device.turn_on()
                elif action == "turn_off":
                    result = await target_device.turn_off()
                    
                actions.append({
                    "type": "device",
                    "device": target_device.name,
                    "action": action,
                    "result": result
                })
                results.append(result)
        
        return {
            "iot_actions": actions,
            "execution_results": results,
            "response_text": self.generate_response(intent_result, results)
        }
    
    def generate_response(self, intent_result: Dict, results: List[Dict]) -> str:
        """生成响应文本"""
        intent = intent_result.get("intent")
        
        if intent == "IOT_SCENE":
            scene_type = intent_result.get("entities", {}).get("scene_type")
            if scene_type == "home":
                return "欢迎回家!已为您打开回家模式。"
            elif scene_type == "away":
                return "已开启离家模式,祝您出行愉快!"
                
        elif intent == "IOT_DEVICE_CONTROL":
            device = intent_result.get("entities", {}).get("device")
            action = intent_result.get("entities", {}).get("action")
            
            if device and action:
                device_names = {
                    "light": "灯",
                    "socket": "插座", 
                    "ac": "空调"
                }
                action_names = {
                    "turn_on": "打开",
                    "turn_off": "关闭"
                }
                
                device_cn = device_names.get(device, device)
                action_cn = action_names.get(action, action)
                
                return f"已{action_cn}{device_cn}"
        
        # 默认响应
        if intent_result.get("complexity") == "CONVERSATIONAL":
            return intent_result.get("reply", "我在听")
        
        return "操作已执行"


class ConversationManager(SmartHomeAgent):
    """对话管理智能体"""
    
    def __init__(self, llm_client: Any):
        super().__init__("conversation_manager")
        self.llm = llm_client
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """管理对话上下文"""
        # 更新对话历史
        user_text = state.transcribed_text or state.user_input
        assistant_text = state.response_text
        
        if user_text:
            state.conversation_history.append({
                "role": "user",
                "content": user_text,
                "timestamp": "2026-01-31 15:25:56"
            })
        
        if assistant_text:
            state.conversation_history.append({
                "role": "assistant",
                "content": assistant_text,
                "timestamp": "2026-01-31 15:25:57"
            })
        
        # 保持历史长度
        if len(state.conversation_history) > 10:
            state.conversation_history = state.conversation_history[-10:]
        
        return {"conversation_history": state.conversation_history}


class TTSAgent(SmartHomeAgent):
    """文本转语音智能体"""
    
    def __init__(self):
        super().__init__("tts_agent")
        
    async def process(self, state: SmartHomeState) -> Dict[str, Any]:
        """文本转语音"""
        if state.response_text:
            # 模拟TTS生成
            print(f"[TTSAgent] 生成语音: {state.response_text}")
            # 实际应该调用TTS模型如VITS、Tacotron等
            return {"response_audio": b"模拟音频数据"}
        return {}


# ====================== 7. 构建LangGraph ======================
class SmartHomeGraphBuilder:
    """智能家居图构建器"""
    
    def __init__(self):
        self.graph = StateGraph(SmartHomeState)
        
    def build_graph(self) -> StateGraph:
        """构建智能家居处理图"""
        
        # 创建智能体实例(实际应该依赖注入)
        wake_word_detector = WakeWordDetector()
        speech_recognizer = SpeechRecognizer()
        
        # 模拟BGE模型和LLM客户端
        bge_model = BGEEmbeddingModel(
            model_path_en="models/bge_en.onnx",
            model_path_zh="models/bge_zh.onnx",
            vocab_path_en="vocab/en_vocab.txt",
            vocab_path_zh="vocab/zh_vocab.txt"
        )
        
        scene_classifier = SceneClassifierAgent(bge_model)
        
        # 模拟LLM客户端
        class MockLLM:
            async def chat(self):
                return self
            async def completions(self):
                return self
            async def create(self, **kwargs):
                class MockResponse:
                    class Choice:
                        class Message:
                            content = '{"intent": "IOT_SCENE", "category": "IOT", "complexity": "COMPLEX", "entities": {"scene_type": "away", "action": "trigger"}, "canExecute": true}'
                        message = Message()
                    choices = [Choice()]
                return MockResponse()
        
        llm_client = MockLLM()
        intent_classifier = IntentClassifierAgent(llm_client)
        device_control = DeviceControlAgent()
        conversation_manager = ConversationManager(llm_client)
        tts_agent = TTSAgent()
        
        # 添加节点
        self.graph.add_node("wake_word_detection", self.create_node_func(wake_word_detector))
        self.graph.add_node("speech_recognition", self.create_node_func(speech_recognizer))
        self.graph.add_node("scene_classification", self.create_node_func(scene_classifier))
        self.graph.add_node("intent_classification", self.create_node_func(intent_classifier))
        self.graph.add_node("device_control", self.create_node_func(device_control))
        self.graph.add_node("conversation_management", self.create_node_func(conversation_manager))
        self.graph.add_node("tts_generation", self.create_node_func(tts_agent))
        
        # 设置边
        self.graph.set_entry_point("wake_word_detection")
        
        self.graph.add_edge("wake_word_detection", "speech_recognition")
        self.graph.add_edge("speech_recognition", "scene_classification")
        self.graph.add_edge("scene_classification", "intent_classification")
        self.graph.add_edge("intent_classification", "device_control")
        self.graph.add_edge("device_control", "conversation_management")
        self.graph.add_edge("conversation_management", "tts_generation")
        self.graph.add_edge("tts_generation", END)
        
        # 添加条件边(处理未唤醒情况)
        self.graph.add_conditional_edges(
            "wake_word_detection",
            self.should_process_speech,
            {
                "process": "speech_recognition",
                "skip": END
            }
        )
        
        return self.graph
    
    def create_node_func(self, agent: SmartHomeAgent):
        """创建节点函数"""
        async def node_func(state: SmartHomeState) -> SmartHomeState:
            result = await agent.process(state)
            # 更新状态
            for key, value in result.items():
                setattr(state, key, value)
            return state
        return node_func
    
    def should_process_speech(self, state: SmartHomeState) -> str:
        """判断是否继续处理语音"""
        if state.wake_word_detected:
            return "process"
        return "skip"


# ====================== 8. 边缘计算部署方案 ======================
class EdgeDeploymentManager:
    """边缘计算部署管理器"""
    
    def __init__(self):
        self.models = {}
        self.devices = {}
        
    def load_models_for_edge(self):
        """为边缘设备加载轻量化模型"""
        model_config = {
            "asr": {
                "name": "whisper-tiny",
                "format": "onnx",
                "size": "75MB",
                "language": ["zh", "en"]
            },
            "embedding": {
                "name": "bge-m3-small",
                "format": "onnx", 
                "size": "150MB",
                "dimension": 768
            },
            "llm": {
                "name": "qwen-1.8b",
                "format": "onnx",
                "size": "1.8GB",
                "quantization": "int8"
            },
            "tts": {
                "name": "vits-zh",
                "format": "onnx",
                "size": "50MB"
            }
        }
        
        print("加载边缘计算模型...")
        for model_type, config in model_config.items():
            print(f"  {model_type}: {config['name']} ({config['size']})")
            
        return model_config
    
    def optimize_for_edge(self):
        """边缘优化配置"""
        optimizations = {
            "cpu_usage": "限制在80%以下",
            "memory": "使用内存映射减少RAM占用",
            "power": "动态频率调整以省电",
            "network": "本地优先,断网仍可用",
            "latency": "目标响应时间<500ms"
        }
        return optimizations
    
    def deploy_to_device(self, device_type: str):
        """部署到特定设备"""
        deployment_guides = {
            "raspberry_pi": {
                "os": "Raspbian/Raspberry Pi OS",
                "python": "3.9+",
                "dependencies": [
                    "onnxruntime",
                    "numpy",
                    "pyaudio",
                    "langgraph"
                ],
                "setup": [
                    "1. 安装操作系统",
                    "2. 安装Python依赖",
                    "3. 下载模型文件到本地",
                    "4. 配置智能家居设备连接",
                    "5. 设置开机自启动"
                ]
            },
            "android_tv": {
                "os": "Android 9.0+",
                "runtime": "Termux或Android NDK",
                "dependencies": ["onnxruntime-android"],
                "setup": [
                    "1. 安装Termux",
                    "2. 在Termux中安装Python",
                    "3. 将应用打包为APK",
                    "4. 申请必要的权限"
                ]
            },
            "smart_speaker": {
                "os": "Linux嵌入式系统",
                "sdk": "厂商SDK",
                "integration": "通过HTTP/MQTT与主系统通信"
            }
        }
        
        return deployment_guides.get(device_type, {})


# ====================== 9. 语音助手集成方案 ======================
class VoiceAssistantIntegration:
    """语音助手与大模型集成"""
    
    def __init__(self):
        self.tts_engine = None
        self.asr_engine = None
        
    def integration_patterns(self):
        """集成模式"""
        patterns = {
            "pattern_1": {
                "name": "完全边缘计算",
                "description": "所有模型在设备本地运行",
                "pros": ["隐私性好", "离线可用", "低延迟"],
                "cons": ["设备要求高", "模型能力有限"],
                "适用设备": "高端智能音箱、家庭服务器"
            },
            "pattern_2": {
                "name": "边缘+云端协同",
                "description": "简单任务本地处理,复杂任务云端处理",
                "pros": ["平衡性能与成本", "可扩展性强"],
                "cons": ["需要网络", "隐私风险"],
                "适用设备": "大多数智能家居设备"
            },
            "pattern_3": {
                "name": "分层处理架构",
                "description": """
                第一层:唤醒词检测(始终本地)
                第二层:简单命令识别(本地LLM)
                第三层:复杂对话(云端大模型)
                """,
                "适用场景": "混合型智能助手"
            }
        }
        return patterns
    
    def speech_pipeline(self):
        """语音处理流水线"""
        pipeline = [
            {
                "step": "语音采集",
                "技术": "麦克风阵列、波束成形、回声消除",
                "输出": "PCM音频流"
            },
            {
                "step": "唤醒词检测", 
                "技术": "Snowboy、Porcupine、自定义模型",
                "输出": "是否唤醒"
            },
            {
                "step": "语音识别(ASR)",
                "技术": "Whisper、VOSK、DeepSpeech",
                "输出": "文本"
            },
            {
                "step": "语义理解(NLU)",
                "技术": "BGE嵌入 + LLM意图分类",
                "输出": "结构化意图"
            },
            {
                "step": "任务执行",
                "技术": "LangGraph多智能体",
                "输出": "操作结果"
            },
            {
                "step": "语音合成(TTS)",
                "技术": "VITS、Tacotron、Edge-TTS",
                "输出": "语音响应"
            }
        ]
        return pipeline
    
    def llm_usage_strategies(self):
        """大模型使用策略"""
        strategies = {
            "intent_classification": {
                "模型": "小型模型(1-3B参数)",
                "部署": "本地ONNX",
                "任务": "仅分类意图,不生成对话"
            },
            "conversation": {
                "模型": "云端大模型(70B+)",
                "部署": "API调用",
                "任务": "复杂对话、知识问答"
            },
            "hybrid": {
                "模型": "本地小模型 + 云端大模型",
                "策略": "本地处理简单请求,复杂请求转发云端"
            }
        }
        return strategies


# ====================== 10. 完整系统入口 ======================
class SmartHomeVoiceAssistant:
    """智能家居语音助手主类"""
    
    def __init__(self, deployment_mode: str = "edge"):
        self.deployment_mode = deployment_mode
        self.graph_builder = SmartHomeGraphBuilder()
        self.home_manager = SmartHomeManager()
        self.edge_manager = EdgeDeploymentManager()
        self.integration = VoiceAssistantIntegration()
        
        # 初始化设备
        self._initialize_devices()
        
        # 构建处理图
        self.graph = self.graph_builder.build_graph()
        self.compiled_graph = self.graph.compile()
        
        # 加载模型
        self.model_config = self.edge_manager.load_models_for_edge()
        
    def _initialize_devices(self):
        """初始化智能设备"""
        devices = [
            SmartDevice("living_room_light", DeviceType.LIGHT, "客厅灯", "living_room"),
            SmartDevice("bedroom_light", DeviceType.LIGHT, "卧室灯", "bedroom"),
            SmartDevice("living_room_socket", DeviceType.SOCKET, "客厅插座", "living_room"),
            SmartDevice("kitchen_socket", DeviceType.SOCKET, "厨房插座", "kitchen"),
            SmartDevice("ac", DeviceType.THERMOSTAT, "空调", "living_room")
        ]
        
        for device in devices:
            self.home_manager.register_device(device)
    
    async def process_voice_command(self, audio_data: bytes = None, text: str = None):
        """处理语音命令"""
        # 初始状态
        state = SmartHomeState()
        
        if audio_data:
            state.raw_audio = audio_data
            state.user_input = ""  # 将通过ASR识别
        elif text:
            state.user_input = text
            state.raw_audio = None
        
        # 执行处理图
        result = await self.compiled_graph.ainvoke(state)
        
        # 记录日志
        self.log_command(result)
        
        return result
    
    def log_command(self, state: SmartHomeState):
        """记录命令日志"""
        log_entry = {
            "timestamp": "2026-01-31 15:25:56.089",
            "input": state.user_input or "[语音输入]",
            "wake_word": state.wake_word_detected,
            "scene": state.scene_classification.get("scene"),
            "intent": state.intent_result.get("intent"),
            "response": state.response_text,
            "actions": state.iot_actions
        }
        
        print(f"[系统日志] {json.dumps(log_entry, ensure_ascii=False, indent=2)}")
    
    def get_system_info(self):
        """获取系统信息"""
        return {
            "deployment_mode": self.deployment_mode,
            "devices_count": len(self.home_manager.devices),
            "scenes_available": list(self.home_manager.scenes.keys()),
            "models_loaded": list(self.model_config.keys()),
            "optimizations": self.edge_manager.optimize_for_edge()
        }


# ====================== 11. 使用示例和部署脚本 ======================
async def main():
    """主函数示例"""
    print("="*60)
    print("智能家居语音助手系统")
    print("="*60)
    
    # 1. 创建助手实例
    assistant = SmartHomeVoiceAssistant(deployment_mode="edge")
    
    # 2. 显示系统信息
    info = assistant.get_system_info()
    print("\n系统配置:")
    for key, value in info.items():
        print(f"  {key}: {value}")
    
    # 3. 显示集成方案
    integration = assistant.integration
    print("\n集成模式:")
    patterns = integration.integration_patterns()
    for pattern_id, pattern in patterns.items():
        print(f"\n{pattern['name']}:")
        print(f"  {pattern['description']}")
    
    # 4. 测试语音命令
    print("\n测试语音命令处理...")
    
    test_commands = [
        "小兴,小兴,打开客厅的灯",
        "关闭卧室灯",
        "我要出门了",
        "我回家了",
        "打开空调",
        "今天的天气怎么样"
    ]
    
    for command in test_commands:
        print(f"\n> 用户: {command}")
        result = await assistant.process_voice_command(text=command)
        print(f"< 助手: {result.response_text}")
        
        if result.iot_actions:
            print(f"  执行操作: {result.iot_actions}")
    
    # 5. 显示部署指南
    print("\n" + "="*60)
    print("边缘计算部署指南")
    print("="*60)
    
    edge_manager = EdgeDeploymentManager()
    raspberry_pi_guide = edge_manager.deploy_to_device("raspberry_pi")
    
    print("\n树莓派部署步骤:")
    for step in raspberry_pi_guide.get("setup", []):
        print(f"  {step}")


if __name__ == "__main__":
    import asyncio
    
    # 运行示例
    asyncio.run(main())
    
    # 生成部署脚本
    print("\n" + "="*60)
    print("自动部署脚本")
    print("="*60)
    
    deploy_script = """#!/bin/bash
# 智能家居语音助手部署脚本
# 适用于树莓派/ARM设备

echo "安装系统依赖..."
sudo apt-get update
sudo apt-get install -y python3.9 python3-pip python3-venv
sudo apt-get install -y portaudio19-dev libasound2-dev

echo "创建Python虚拟环境..."
python3 -m venv ~/smart_home_env
source ~/smart_home_env/bin/activate

echo "安装Python包..."
pip install --upgrade pip
pip install langgraph onnxruntime pyaudio numpy

echo "创建项目目录..."
mkdir -p ~/smart_home_assistant/{models,config,logs}

echo "下载模型文件..."
# 这里应该从服务器下载预训练模型
# wget -O ~/smart_home_assistant/models/bge.onnx https://example.com/models/bge.onnx
# wget -O ~/smart_home_assistant/models/whisper.onnx https://example.com/models/whisper.onnx

echo "配置服务..."
cat > ~/smart_home_assistant/config/settings.yaml << EOF
# 智能家居配置
mqtt:
  broker: "localhost"
  port: 1883
  username: "admin"
  password: "password"

devices:
  - name: "living_room_light"
    type: "light"
    location: "living_room"
    controller: "tuya"
    device_id: "xxx"
    
  - name: "smart_socket"
    type: "socket"
    location: "kitchen"
    controller: "tuya"
    device_id: "yyy"
EOF

echo "创建启动脚本..."
cat > ~/smart_home_assistant/start.sh << 'EOS'
#!/bin/bash
source ~/smart_home_env/bin/activate
cd ~/smart_home_assistant
python main.py --mode=edge --log-level=INFO
EOS

chmod +x ~/smart_home_assistant/start.sh

echo "设置开机自启..."
sudo cat > /etc/systemd/system/smart-home.service << EOF
[Unit]
Description=Smart Home Voice Assistant
After=network.target

[Service]
Type=simple
User=$USER
WorkingDirectory=/home/$USER/smart_home_assistant
ExecStart=/bin/bash /home/$USER/smart_home_assistant/start.sh
Restart=always

[Install]
WantedBy=multi-user.target
EOF

sudo systemctl daemon-reload
sudo systemctl enable smart-home.service

echo "部署完成!"
echo "启动服务: sudo systemctl start smart-home.service"
echo "查看日志: sudo journalctl -u smart-home.service -f"
"""
    
    print(deploy_script)

本文介绍了一个基于LangGraph的智能家居语音助手系统实现。该系统采用多智能体架构,包含以下核心模块:

  1. 设备管理模块:定义了智能设备基类(SmartDevice)和家居管理器(SmartHomeManager),支持设备注册和场景控制功能。

  2. 语音处理流水线:包含唤醒词检测、语音识别、场景分类、意图识别等处理节点,使用BGE嵌入模型和LLM进行语义理解。

  3. LangGraph实现:通过状态图(StateGraph)将各智能体连接成处理流程,包括WakeWordDetector、SpeechRecognizer等节点类。

  4. 边缘计算部署:提供了模型优化方案和针对不同设备的部署指南。

系统支持中文和英文语音指令,能够处理"打开灯"、"我回家了"等智能家居控制场景,并生成语音响应。文章还包含完整的系统架构设计、类定义和使用示例。

相关推荐
阿杰学AI2 小时前
AI核心知识81——大语言模型之MaaS(简洁且通俗易懂版)
人工智能·ai·语言模型·自然语言处理·aigc·maas·模型即服务
IT·小灰灰3 小时前
基于DMXAPI与GLM-4.7-Flash构建零成本AI编程工作站:从API选型到流式生成实战
人工智能·aigc·ai编程
快降重024 小时前
剖析AIGC降重:你的论文智能“重构师”
人工智能·自然语言处理·重构·aigc·论文降重·降ai率·快降重
熵减纪元4 小时前
OpenClaw gateway start 报 401 Invalid API key?一个环境变量的坑
人工智能·aigc
147API5 小时前
60,000 星的代价:解析 OpenClaw 的架构设计与安全教训
人工智能·安全·aigc·clawdbot·moltbot·openclaw
feasibility.15 小时前
混元3D-dit-v2-mv-turbo生成3D模型初体验(ComfyUI)
人工智能·3d·aigc·三维建模·comfyui
PaperRed ai写作降重助手18 小时前
AI 论文写作工具排名(实测不踩坑)
人工智能·aigc·ai写作·论文写作·智能降重·辅助写作·降重复率
阿杰学AI20 小时前
AI核心知识80——大语言模型之Slow Thinking和Deep Reasoning(简洁且通俗易懂版)
人工智能·ai·语言模型·自然语言处理·aigc·慢思考·深度推理
SmartBrain21 小时前
OCR 模型在医疗场景的选型研究
人工智能·算法·语言模型·架构·aigc·ocr