langchain(node.js) 实际应用==》md文件检索

本文代码可直接copy 就可使用

json 复制代码
  "dependencies": {
    "@chroma-core/default-embed": "^0.1.9",
    "@langchain/classic": "^1.0.5",
    "@langchain/community": "^1.0.4",
    "@langchain/core": "^1.0.6",
    "@langchain/langgraph": "^1.0.2",
    "@langchain/ollama": "^1.0.2",
    "@langchain/openai": "^1.1.2",
    "@langchain/textsplitters": "^1.0.1",
    "chromadb": "^3.1.6",
    "dotenv": "^17.2.3",
    "faiss-node": "^0.5.1",
    "langchain": "^1.0.6",
    "mammoth": "^1.11.0",
    "typeorm": "^0.3.27",
    "zod": "^4.1.12"
  }
ts 复制代码
import { ChatOpenAI } from "@langchain/openai";
export function getModel(modelName) {
    return new ChatOpenAI({
        apiKey: process.env.API_KEY,
        model: modelName || "gpt-4o-mini",
        configuration: {
            baseURL: process.env.BASE_URL,
        },
    });
}
export function getLocalFilePath(fileName) {
    const __dirname = path.dirname(fileURLToPath(import.meta.url));
    return path.join(__dirname, fileName);
}
ts 复制代码
import { MarkdownTextSplitter } from "@langchain/textsplitters"
import { Document } from "@langchain/core/documents";
import fs from "fs";
import { MemoryVectorStore } from "@langchain/classic/vectorstores/memory";
import path from "path";
// 帮我创建向量
import { OpenAIEmbeddings } from "@langchain/openai";
import { getModel, getLocalFilePath } from "../model/index.js";
import { createAgent, summarizationMiddleware, tool, trimMessages,createMiddleware } from "langchain"
import { MemorySaver } from "@langchain/langgraph"
import dotenv from "dotenv";
dotenv.config();
import { z } from "zod";


// markdown 向量位置
const vectorStorePath = getLocalFilePath("../assets/vectorStore.json");

const mainModel = getModel();
const summaryModel = getModel();
const checkpointer = new MemorySaver();


const embeddings = new OpenAIEmbeddings({
    apiKey: process.env.API_KEY,
    model: "text-embedding-3-small",
    dimensions: 512,
    configuration: {
        baseURL: process.env.BASE_URL,
    }
});

function getMakerDownPath() {
    const makerDownPath = getLocalFilePath("../assets/makerDown.md");
    return {
        makerDownContent: fs.readFileSync(makerDownPath, "utf-8"),
        makerDownPath,
    }
}

async function splitMakerDown() {
    const { makerDownContent, makerDownPath } = getMakerDownPath();
    const document = new Document({
        pageContent: makerDownContent,
        metadata: { source: makerDownPath },
    });
    const markdownSplitter = new MarkdownTextSplitter({
        chunkSize: 200,
        chunkOverlap: 100,
    });
    const chunks = await markdownSplitter.splitDocuments([document]);
    return chunks;
}

async function loadVectorStore() {
    try {
        if (!fs.existsSync(vectorStorePath)) {
            return null;
        }
        // 将向量从本地拿回来
        const data = JSON.parse(fs.readFileSync(vectorStorePath, "utf-8"));

        const documents = data.vectors.map((item) =>
            new Document({
                pageContent: item.content,
                metadata: { embedding: item.embedding }, // 将向量存储在元数据中
            })
        );

        const vectorStore = new MemoryVectorStore(embeddings);

        await vectorStore.addDocuments(documents)

        return vectorStore;
    }
    catch (error) {
        console.error("加载失败:", error);
        return null;
    }
}

async function saveVectorStore(vectorStore) {
    try {
        const indexData = {
            vectors: vectorStore.memoryVectors,  // 向量数据
            docstore: vectorStore.docstore       // 文档存储
        };

        const dir = path.dirname(vectorStorePath);
        if (!fs.existsSync(dir)) {
            fs.mkdirSync(dir, { recursive: true });
        }

        fs.writeFileSync(vectorStorePath, JSON.stringify(indexData, null, 2));
        console.log(`✓ 向量存储已保存到: ${vectorStorePath}`);
    } catch (error) {
        console.error("保存向量存储失败:", error);
    }

}

async function vectorizeMarkdown() {
    try {
        let vectorStore = await loadVectorStore();

        if (vectorStore) {
            console.log('已存在向量存储');
            const retriever = vectorStore.asRetriever({ k: 3 });
            return { vectorStore, retriever };
        }

        const chunks = await splitMakerDown();

        vectorStore = await MemoryVectorStore.fromDocuments(
            chunks,
            embeddings,
        );
        console.log('向量化完毕');

        await saveVectorStore(vectorStore);
        console.log('保存完毕');
        const retriever = vectorStore.asRetriever({ k: 3 });
        return { vectorStore, retriever };
    } catch (error) {
        console.error("向量化失败:", error);
        return null;
    }
}

const queryMarkdown = tool(async ({ question }) => {
    const { retriever } = await vectorizeMarkdown();
    const relevantDocs = await retriever.invoke(question);
    return relevantDocs
        .map(doc => doc.pageContent)
        .join('\n---\n');
}, {
    name: 'query_markdown',
    description: '查询文档',
    schema: z.object({
        question: z.string().describe("用户的问题"),
    })
});

const trimMessageHistory = createMiddleware({
    name: "TrimMessages",
    beforeModel: async (state) => {
        // 在模型调用前修剪消息
        const trimmed = await trimMessages(state.messages, {
            strategy: "last",
            maxTokens: 2000,
            startOn: "human",
            endOn: ["human", "tool"],
            tokenCounter: (msgs) => msgs.length,  // 自定义 token 计数器
        });
        return { messages: trimmed };
    },
});


const agent = createAgent({
    model: mainModel,
    tools: [queryMarkdown],
    systemPrompt: `你是一个文档查看助手,
    你可以根据用户的问题查看文档内容。
    请你根据文档的内容回答用户的问题。如果文档中没有相关内容,
    请回答"文档中没有相关内容"。`,
    checkpointer,
    middleware: [
        // 先对消息进行总结
        summarizationMiddleware({
            model: summaryModel,
            trigger: { tokens: 1000 },
            keep: { messages: 25 },
        }),
        trimMessageHistory
    ],
});

const userId = "user-123";

async function runChat(userInput) {
    const result = await agent.invoke(
        { messages: [{ role: "user", content: userInput }] },
        { configurable: { thread_id: userId } }
    );
    const lastMessage = result.messages[result.messages.length - 1];
    return lastMessage.content;
}

const response = await runChat("vue3那年发布,并且介绍一下vue3的新特性");
console.log(response);
相关推荐
Mr_li8 小时前
NestJS 集成 TypeORM 的最优解
node.js·nestjs
UIUV10 小时前
node:child_process spawn 模块学习笔记
javascript·后端·node.js
是一碗螺丝粉14 小时前
5分钟上手LangChain.js:用DeepSeek给你的App加上AI能力
前端·人工智能·langchain
是一碗螺丝粉14 小时前
LangChain 核心组件深度解析:模型与提示词模板
前端·langchain·aigc
大模型真好玩1 天前
大模型训练全流程实战指南工具篇(七)——EasyDataset文档处理流程
人工智能·langchain·deepseek
前端付豪1 天前
Nest 项目小实践之注册登陆
前端·node.js·nestjs
天蓝色的鱼鱼1 天前
Node.js 中间层退潮:从“前端救星”到“成本噩梦”
前端·架构·node.js
codingWhat2 天前
uniapp 多地区、多平台、多环境打包方案
前端·架构·node.js
小p2 天前
nodejs学习: 服务器资源CPU、内存、硬盘
node.js
Mr_li2 天前
手摸手,教你如何优雅的书写 NestJS 服务配置
node.js·nestjs