51CTO-西瓜老师-2025年大模型 MCP 技术实战课

上下文管理革命：MCP的动态分块与分级压缩技术突破1M tokens瓶颈

一、MCP核心架构设计

1.1 动态分块引擎实现

python 复制代码

import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import re
from typing import List, Dict, Tuple

class DynamicChunker:
    def __init__(self, model_name='paraphrase-multilingual-MiniLM-L12-v2'):
        self.embedder = SentenceTransformer(model_name)
        self.min_chunk_size = 128  # tokens
        self.max_chunk_size = 1024  # tokens
        self.semantic_threshold = 0.82
        self.cache = {}  # 缓存已计算嵌入
    
    def chunk_text(self, text: str, tokenizer) -> List[Dict]:
        """动态分块主方法"""
        # 预处理文本
        cleaned_text = self._preprocess_text(text)
        
        # 初始分句
        sentences = self._split_into_sentences(cleaned_text)
        if not sentences:
            return []
        
        # 计算句子嵌入（带缓存）
        sentence_embeddings = []
        for sent in sentences:
            if sent in self.cache:
                sentence_embeddings.append(self.cache[sent])
            else:
                emb = self.embedder.encode(sent)
                self.cache[sent] = emb
                sentence_embeddings.append(emb)
        sentence_embeddings = np.array(sentence_embeddings)
        
        # 自适应分块策略
        if len(sentences) > 50:  # 大文本使用聚类分块
            chunks = self._cluster_chunking(sentences, sentence_embeddings)
        else:  # 小文本使用滑动窗口
            chunks = self._sliding_window_chunking(sentences, sentence_embeddings, tokenizer)
        
        # 后处理验证
        final_chunks = []
        for chunk in chunks:
            chunk_text = ' '.join(chunk['sentences'])
            token_count = len(tokenizer.tokenize(chunk_text))
            
            if token_count > self.max_chunk_size:
                # 递归处理过大块
                sub_chunks = self.chunk_text(chunk_text, tokenizer)
                final_chunks.extend(sub_chunks)
            else:
                final_chunks.append({
                    'text': chunk_text,
                    'embeddings': chunk['embedding'],
                    'tokens': token_count,
                    'type': 'text'
                })
        
        return final_chunks
    
    def _preprocess_text(self, text: str) -> str:
        """文本预处理"""
        # 移除多余空白和特殊字符
        text = re.sub(r'\s+', ' ', text).strip()
        # 其他预处理规则...
        return text
    
    def _split_into_sentences(self, text: str) -> List[str]:
        """改进的分句逻辑"""
        sentences = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
        return sentences
    
    def _sliding_window_chunking(self, sentences: List[str], 
                               embeddings: np.ndarray,
                               tokenizer) -> List[Dict]:
        """滑动窗口分块算法"""
        chunks = []
        current_chunk = []
        current_embeddings = []
        current_size = 0
        
        for i, (sent, emb) in enumerate(zip(sentences, embeddings)):
            sent_size = len(tokenizer.tokenize(sent))
            
            if not current_chunk:
                # 开始新块
                current_chunk.append(sent)
                current_embeddings.append(emb)
                current_size += sent_size
                continue
                
            # 计算与当前块的相似度
            avg_embedding = np.mean(current_embeddings, axis=0)
            similarity = np.dot(emb, avg_embedding) / (
                np.linalg.norm(emb) * np.linalg.norm(avg_embedding))
            
            # 决定是否扩展当前块
            if (similarity > self.semantic_threshold and 
                current_size + sent_size <= self.max_chunk_size):
                current_chunk.append(sent)
                current_embeddings.append(emb)
                current_size += sent_size
            else:
                # 保存当前块
                chunks.append({
                    'sentences': current_chunk.copy(),
                    'embedding': avg_embedding
                })
                # 开始新块
                current_chunk = [sent]
                current_embeddings = [emb]
                current_size = sent_size
        
        # 添加最后一个块
        if current_chunk:
            chunks.append({
                'sentences': current_chunk,
                'embedding': np.mean(current_embeddings, axis=0)
            })
            
        return chunks
    
    def _cluster_chunking(self, sentences: List[str],
                         embeddings: np.ndarray) -> List[Dict]:
        """基于聚类的分块策略"""
        # 动态确定聚类数量
        n_clusters = max(2, min(10, len(sentences)//20))
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = kmeans.fit_predict(embeddings)
        
        # 构建分块
        chunks = []
        for cluster_id in range(n_clusters):
            cluster_indices = np.where(clusters == cluster_id)[0]
            cluster_sents = [sentences[i] for i in cluster_indices]
            cluster_embs = embeddings[cluster_indices]
            
            chunks.append({
                'sentences': cluster_sents,
                'embedding': np.mean(cluster_embs, axis=0)
            })
        
        return chunks

1.2 分级压缩系统实现

python 复制代码

import zlib
import pickle
import msgpack
from transformers import AutoTokenizer, AutoModel
import torch
from typing import Union, Dict

class HierarchicalCompressor:
    COMPRESSION_LEVELS = {
        'raw': 0,
        'light': 1,
        'medium': 2,
        'aggressive': 3,
        'extreme': 4
    }
    
    def __init__(self, model_name='bert-base-uncased'):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.compression_algos = {
            0: self._raw_compress,
            1: self._light_compress,
            2: self._medium_compress,
            3: self._aggressive_compress,
            4: self._extreme_compress
        }
        self.decompression_algos = {
            0: self._raw_decompress,
            1: self._light_decompress,
            2: self._medium_decompress,
            3: self._aggressive_decompress,
            4: self._extreme_decompress
        }
    
    def compress(self, text: str, level: Union[str, int] = 'medium') -> bytes:
        """分级压缩入口"""
        level = self._resolve_level(level)
        return self.compression_algos[level](text)
    
    def decompress(self, data: bytes, level: Union[str, int]) -> str:
        """分级解压入口"""
        level = self._resolve_level(level)
        return self.decompression_algos[level](data)
    
    def _resolve_level(self, level: Union[str, int]) -> int:
        """解析压缩级别"""
        if isinstance(level, str):
            return self.COMPRESSION_LEVELS[level.lower()]
        return level
    
    def _raw_compress(self, text: str) -> bytes:
        """原始存储"""
        return text.encode('utf-8')
    
    def _light_compress(self, text: str) -> bytes:
        """轻量压缩"""
        return zlib.compress(text.encode('utf-8'), level=1)
    
    def _medium_compress(self, text: str) -> bytes:
        """中等压缩"""
        # 提取关键信息
        inputs = self.tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.model(**inputs)
        
        # 获取语义表示
        cls_embedding = outputs.last_hidden_state[:, 0, :].numpy()
        
        # 提取实体和关键词
        entities = self._extract_entities(text)
        keywords = self._extract_keywords(text)
        
        # 使用msgpack高效序列化
        return msgpack.packb({
            'embedding': cls_embedding.tolist(),
            'entities': entities,
            'keywords': keywords,
            'length': len(text),
            'type': 'medium'
        })
    
    def _aggressive_compress(self, text: str) -> bytes:
        """激进压缩"""
        # 生成摘要和关键信息
        summary = self._generate_summary(text, ratio=0.3)
        entities = self._extract_entities(text)
        
        return msgpack.packb({
            'summary': summary,
            'entities': entities,
            'length': len(text),
            'type': 'aggressive'
        })
    
    def _extreme_compress(self, text: str) -> bytes:
        """极限压缩"""
        # 仅保留最核心信息
        extreme_summary = self._generate_summary(text, ratio=0.1)
        return extreme_summary.encode('utf-8')
    
    # 解压方法对应实现
    def _raw_decompress(self, data: bytes) -> str:
        return data.decode('utf-8')
    
    def _light_decompress(self, data: bytes) -> str:
        return zlib.decompress(data).decode('utf-8')
    
    def _medium_decompress(self, data: bytes) -> str:
        obj = msgpack.unpackb(data)
        # 这里可以添加基于embedding的文本重建逻辑
        return f"[MEDIUM] Keywords: {', '.join(obj['keywords'])} | Entities: {', '.join(obj['entities'])}"
    
    def _aggressive_decompress(self, data: bytes) -> str:
        obj = msgpack.unpackb(data)
        return f"[AGGRESSIVE] Summary: {obj['summary']}"
    
    def _extreme_decompress(self, data: bytes) -> str:
        return f"[EXTREME] {data.decode('utf-8')}"
    
    def _extract_entities(self, text: str) -> List[str]:
        """实体提取（简化版）"""
        # 实际实现应使用NER模型
        return list(set(re.findall(r'\b[A-Z][a-z]+\b', text)))[:5]
    
    def _extract_keywords(self, text: str) -> List[str]:
        """关键词提取"""
        words = re.findall(r'\b\w{4,}\b', text.lower())
        word_counts = {}
        for word in words:
            if word not in {'that', 'this', 'with', 'which'}:
                word_counts[word] = word_counts.get(word, 0) + 1
        return sorted(word_counts, key=word_counts.get, reverse=True)[:10]
    
    def _generate_summary(self, text: str, ratio=0.3) -> str:
        """生成摘要（简化版）"""
        sentences = re.split(r'(?<=[.!?])\s+', text)
        return ' '.join(sentences[:max(1, int(len(sentences)*ratio))])

二、MCP内存管理系统

2.1 智能内存单元设计

python 复制代码

from dataclasses import dataclass
import time
from typing import Optional
import hashlib

@dataclass
class MemoryUnit:
    content_hash: str
    compressed_data: bytes
    compression_level: int
    metadata: dict
    last_accessed: float = None
    access_count: int = 0
    
    def __post_init__(self):
        if self.last_accessed is None:
            self.last_accessed = time.time()
    
    @classmethod
    def create(cls, content: str, compressor: HierarchicalCompressor, 
              importance: float = 0.5) -> 'MemoryUnit':
        """工厂方法创建内存单元"""
        # 确定压缩级别
        level = cls._determine_compression_level(importance)
        
        # 计算内容哈希
        content_hash = hashlib.sha256(content.encode()).hexdigest()
        
        # 压缩内容
        compressed = compressor.compress(content, level)
        
        # 生成元数据
        metadata = {
            'created': time.time(),
            'importance': max(0.0, min(1.0, importance)),
            'original_length': len(content),
            'compressed_size': len(compressed),
            'compression_ratio': len(compressed)/len(content) if content else 0
        }
        
        return cls(
            content_hash=content_hash,
            compressed_data=compressed,
            compression_level=level,
            metadata=metadata
        )
    
    @staticmethod
    def _determine_compression_level(importance: float) -> int:
        """基于重要性确定压缩级别"""
        if importance > 0.8:   return 0  # raw
        elif importance > 0.6: return 1  # light
        elif importance > 0.4: return 2  # medium
        elif importance > 0.2: return 3  # aggressive
        else:                  return 4  # extreme
    
    def get_content(self, compressor: HierarchicalCompressor) -> str:
        """获取解压后的内容"""
        self.access_count += 1
        self.last_accessed = time.time()
        return compressor.decompress(self.compressed_data, self.compression_level)
    
    def get_info(self) -> dict:
        """获取单元信息"""
        return {
            'hash': self.content_hash[:8],
            'size': self.metadata['compressed_size'],
            'ratio': round(self.metadata['compression_ratio'], 2),
            'importance': self.metadata['importance'],
            'access_count': self.access_count,
            'last_accessed': time.ctime(self.last_accessed)
        }

2.2 记忆池实现

python 复制代码

class MemoryPool:
    def __init__(self, max_size_mb: int = 1024, chunker=None, compressor=None):
        self.max_size = max_size_mb * 1024 * 1024  # 转换为字节
        self.current_size = 0
        self.memory_units = {}
        self.chunker = chunker or DynamicChunker()
        self.compressor = compressor or HierarchicalCompressor()
        self.tokenizer = self.compressor.tokenizer
        
        # 索引结构
        self.semantic_index = {}  # {embedding_hash: unit_hash}
        self.temporal_index = []  # [(timestamp, unit_hash)]
    
    def add_content(self, content: str, importance: float = 0.5) -> List[str]:
        """添加内容到记忆池"""
        # 动态分块
        chunks = self.chunker.chunk_text(content, self.tokenizer)
        if not chunks:
            return []
        
        added_hashes = []
        for chunk in chunks:
            # 创建内存单元
            unit = MemoryUnit.create(chunk['text'], self.compressor, importance)
            
            # 检查是否已存在
            if unit.content_hash in self.memory_units:
                continue
                
            # 检查内存限制
            self._ensure_capacity(unit.metadata['compressed_size'])
            
            # 添加到记忆池
            self.memory_units[unit.content_hash] = unit
            self.current_size += unit.metadata['compressed_size']
            added_hashes.append(unit.content_hash)
            
            # 更新索引
            self._update_indexes(unit, chunk.get('embeddings'))
        
        return added_hashes
    
    def _ensure_capacity(self, required_size: int):
        """确保有足够空间"""
        while self.current_size + required_size > self.max_size and self.memory_units:
            # 找到最不重要的单元（基于LRU和重要性）
            to_remove = min(
                self.memory_units.values(),
                key=lambda u: (
                    u.metadata['importance'], 
                    -u.access_count,
                    u.last_accessed
                )
            )
            
            # 移除单元
            self._remove_unit(to_remove.content_hash)
    
    def _remove_unit(self, unit_hash: str):
        """移除内存单元"""
        if unit_hash not in self.memory_units:
            return
            
        unit = self.memory_units[unit_hash]
        self.current_size -= unit.metadata['compressed_size']
        del self.memory_units[unit_hash]
        
        # 清理索引
        self._clean_indexes(unit_hash)
    
    def _update_indexes(self, unit: MemoryUnit, embedding: np.ndarray = None):
        """更新索引结构"""
        # 时间索引
        self.temporal_index.append((unit.metadata['created'], unit.content_hash))
        
        # 语义索引
        if embedding is not None:
            emb_hash = hashlib.sha256(embedding.tobytes()).hexdigest()
            self.semantic_index[emb_hash] = unit.content_hash
    
    def _clean_indexes(self, unit_hash: str):
        """清理索引"""
        # 时间索引
        self.temporal_index = [(t, h) for t, h in self.temporal_index if h != unit_hash]
        
        # 语义索引
        for emb_hash, h in list(self.semantic_index.items()):
            if h == unit_hash:
                del self.semantic_index[emb_hash]
    
    def retrieve_by_semantics(self, query: str, top_k: int = 5) -> List[dict]:
        """语义检索"""
        query_embed = self.chunker.embedder.encode(query)
        
        # 计算与所有记忆单元的相似度
        scores = []
        for unit in self.memory_units.values():
            # 获取单元嵌入（如果有）
            if unit.compression_level == 2:  # medium压缩级别存储了嵌入
                try:
                    data = msgpack.unpackb(unit.compressed_data)
                    if 'embedding' in data:
                        emb = np.array(data['embedding'])
                        similarity = np.dot(query_embed, emb) / (
                            np.linalg.norm(query_embed) * np