langchain(node.js) 实际应用==》md文件检索

本文代码可直接copy 就可使用

json 复制代码
  "dependencies": {
    "@chroma-core/default-embed": "^0.1.9",
    "@langchain/classic": "^1.0.5",
    "@langchain/community": "^1.0.4",
    "@langchain/core": "^1.0.6",
    "@langchain/langgraph": "^1.0.2",
    "@langchain/ollama": "^1.0.2",
    "@langchain/openai": "^1.1.2",
    "@langchain/textsplitters": "^1.0.1",
    "chromadb": "^3.1.6",
    "dotenv": "^17.2.3",
    "faiss-node": "^0.5.1",
    "langchain": "^1.0.6",
    "mammoth": "^1.11.0",
    "typeorm": "^0.3.27",
    "zod": "^4.1.12"
  }
ts 复制代码
import { ChatOpenAI } from "@langchain/openai";
export function getModel(modelName) {
    return new ChatOpenAI({
        apiKey: process.env.API_KEY,
        model: modelName || "gpt-4o-mini",
        configuration: {
            baseURL: process.env.BASE_URL,
        },
    });
}
export function getLocalFilePath(fileName) {
    const __dirname = path.dirname(fileURLToPath(import.meta.url));
    return path.join(__dirname, fileName);
}
ts 复制代码
import { MarkdownTextSplitter } from "@langchain/textsplitters"
import { Document } from "@langchain/core/documents";
import fs from "fs";
import { MemoryVectorStore } from "@langchain/classic/vectorstores/memory";
import path from "path";
// 帮我创建向量
import { OpenAIEmbeddings } from "@langchain/openai";
import { getModel, getLocalFilePath } from "../model/index.js";
import { createAgent, summarizationMiddleware, tool, trimMessages,createMiddleware } from "langchain"
import { MemorySaver } from "@langchain/langgraph"
import dotenv from "dotenv";
dotenv.config();
import { z } from "zod";


// markdown 向量位置
const vectorStorePath = getLocalFilePath("../assets/vectorStore.json");

const mainModel = getModel();
const summaryModel = getModel();
const checkpointer = new MemorySaver();


const embeddings = new OpenAIEmbeddings({
    apiKey: process.env.API_KEY,
    model: "text-embedding-3-small",
    dimensions: 512,
    configuration: {
        baseURL: process.env.BASE_URL,
    }
});

function getMakerDownPath() {
    const makerDownPath = getLocalFilePath("../assets/makerDown.md");
    return {
        makerDownContent: fs.readFileSync(makerDownPath, "utf-8"),
        makerDownPath,
    }
}

async function splitMakerDown() {
    const { makerDownContent, makerDownPath } = getMakerDownPath();
    const document = new Document({
        pageContent: makerDownContent,
        metadata: { source: makerDownPath },
    });
    const markdownSplitter = new MarkdownTextSplitter({
        chunkSize: 200,
        chunkOverlap: 100,
    });
    const chunks = await markdownSplitter.splitDocuments([document]);
    return chunks;
}

async function loadVectorStore() {
    try {
        if (!fs.existsSync(vectorStorePath)) {
            return null;
        }
        // 将向量从本地拿回来
        const data = JSON.parse(fs.readFileSync(vectorStorePath, "utf-8"));

        const documents = data.vectors.map((item) =>
            new Document({
                pageContent: item.content,
                metadata: { embedding: item.embedding }, // 将向量存储在元数据中
            })
        );

        const vectorStore = new MemoryVectorStore(embeddings);

        await vectorStore.addDocuments(documents)

        return vectorStore;
    }
    catch (error) {
        console.error("加载失败:", error);
        return null;
    }
}

async function saveVectorStore(vectorStore) {
    try {
        const indexData = {
            vectors: vectorStore.memoryVectors,  // 向量数据
            docstore: vectorStore.docstore       // 文档存储
        };

        const dir = path.dirname(vectorStorePath);
        if (!fs.existsSync(dir)) {
            fs.mkdirSync(dir, { recursive: true });
        }

        fs.writeFileSync(vectorStorePath, JSON.stringify(indexData, null, 2));
        console.log(`✓ 向量存储已保存到: ${vectorStorePath}`);
    } catch (error) {
        console.error("保存向量存储失败:", error);
    }

}

async function vectorizeMarkdown() {
    try {
        let vectorStore = await loadVectorStore();

        if (vectorStore) {
            console.log('已存在向量存储');
            const retriever = vectorStore.asRetriever({ k: 3 });
            return { vectorStore, retriever };
        }

        const chunks = await splitMakerDown();

        vectorStore = await MemoryVectorStore.fromDocuments(
            chunks,
            embeddings,
        );
        console.log('向量化完毕');

        await saveVectorStore(vectorStore);
        console.log('保存完毕');
        const retriever = vectorStore.asRetriever({ k: 3 });
        return { vectorStore, retriever };
    } catch (error) {
        console.error("向量化失败:", error);
        return null;
    }
}

const queryMarkdown = tool(async ({ question }) => {
    const { retriever } = await vectorizeMarkdown();
    const relevantDocs = await retriever.invoke(question);
    return relevantDocs
        .map(doc => doc.pageContent)
        .join('\n---\n');
}, {
    name: 'query_markdown',
    description: '查询文档',
    schema: z.object({
        question: z.string().describe("用户的问题"),
    })
});

const trimMessageHistory = createMiddleware({
    name: "TrimMessages",
    beforeModel: async (state) => {
        // 在模型调用前修剪消息
        const trimmed = await trimMessages(state.messages, {
            strategy: "last",
            maxTokens: 2000,
            startOn: "human",
            endOn: ["human", "tool"],
            tokenCounter: (msgs) => msgs.length,  // 自定义 token 计数器
        });
        return { messages: trimmed };
    },
});


const agent = createAgent({
    model: mainModel,
    tools: [queryMarkdown],
    systemPrompt: `你是一个文档查看助手,
    你可以根据用户的问题查看文档内容。
    请你根据文档的内容回答用户的问题。如果文档中没有相关内容,
    请回答"文档中没有相关内容"。`,
    checkpointer,
    middleware: [
        // 先对消息进行总结
        summarizationMiddleware({
            model: summaryModel,
            trigger: { tokens: 1000 },
            keep: { messages: 25 },
        }),
        trimMessageHistory
    ],
});

const userId = "user-123";

async function runChat(userInput) {
    const result = await agent.invoke(
        { messages: [{ role: "user", content: userInput }] },
        { configurable: { thread_id: userId } }
    );
    const lastMessage = result.messages[result.messages.length - 1];
    return lastMessage.content;
}

const response = await runChat("vue3那年发布,并且介绍一下vue3的新特性");
console.log(response);
相关推荐
梦帮科技11 小时前
Node.js配置生成器CLI工具开发实战
前端·人工智能·windows·前端框架·node.js·json
kjkdd11 小时前
6.1 核心组件(Agent)
python·ai·语言模型·langchain·ai编程
Misnice13 小时前
Webpack、Vite、Rsbuild区别
前端·webpack·node.js
渣渣苏15 小时前
Langchain实战快速入门
人工智能·python·langchain
小天呐16 小时前
01—langchain 架构
langchain
香芋Yu19 小时前
【LangChain1.0】第九篇 Agent 架构设计
langchain·agent·架构设计
kjkdd20 小时前
5. LangChain设计理念和发展历程
python·语言模型·langchain·ai编程
ASKED_20191 天前
Langchain学习笔记一 -基础模块以及架构概览
笔记·学习·langchain
zhengfei6111 天前
【AI平台】- 基于大模型的知识库与知识图谱智能体开发平台
vue.js·语言模型·langchain·知识图谱·多分类
玄同7652 天前
LangChain 1.0 模型接口:多厂商集成与统一调用
开发语言·人工智能·python·langchain·知识图谱·rag·智能体