WPS自定义公式，相似度匹配

借助豆包做的这个相似度匹配公式，vlookup公式的模糊匹配使用起来一言难尽，经测试1500对1500的匹配大概3分钟完成。使用方法：打开空白表，将代码复制到宏编辑器，保存为 .xlam文件，再加载到wps里面后续使用就跟普通公式一样使用了。
复制代码
/**
 * 【自定义函数】多粒度精准相似度匹配（跨Sheet + 阈值过滤 + 显示最高匹配度）
 * 使用方法：
 * =mlooklup(查找值, 查找列)              → 返回最优匹配
 * =mlooklup(查找值, 查找列, 0.8)         → 大于等于80%匹配度才返回
 * =mlooklup(查找值, 查找列, 80%)          → 同上
 * =mlooklup(查找值, 查找列, 1.2)          → 仅完全一致返回
 * =mlooklup(查找值, 查找列, 0)            → 不限制，返回最优
 * @param {Range} target - 查找值所在单元格
 * @param {Range} matchRange - 要匹配的列/区域（鼠标直接点选，支持跨Sheet）
 * @param {number|string} [threshold=null] - 匹配阈值（支持小数/百分比）
 * @returns {string} 最匹配结果 或 无匹配提示（含最高匹配度）
 */
function mlooklup(target, matchRange, threshold = null) {
    // ===================== 1. 获取匹配区域的工作表与列信息（跨Sheet核心） =====================
    // 获取用户选中区域所在的工作表（不是当前活动表，保证跨表正常）
    const lookupSheet = matchRange.Worksheet;
    // 获取匹配区域所在的列号（自动识别，无需手动输入数字）
    const matchColNum = matchRange.Column;
    // 获取该列最后一行非空行号，全空时默认1，避免循环报错
    const maxRow = lookupSheet.Cells(lookupSheet.Rows.Count, matchColNum).End(-4162).Row || 1;

    // ===================== 2. 初始化最优结果变量 =====================
    let bestItem = null;    // 存储最匹配的文本内容
    let bestScore = 0;      // 存储最高综合相似度（0~1）

    // ===================== 3. 处理查找值（格式统一） =====================
    // 安全获取查找值，容错空值/无效对象
    let targetValue = (target?.Value2 ?? "").toString().trim();
    // 查找值为空，直接返回提示
    if (!targetValue) return "目标值不能为空";
    // 替换罗马数字：II→2，I→1
    targetValue = replaceIIandI(targetValue);
    // 文本清洗：全角转半角 + 小写 + 去空格
    const tClean = cleanText(targetValue);

    // ===================== 4. 遍历匹配列，计算相似度 =====================
    // 从第2行开始遍历，跳过表头
    for (let row = 2; row <= maxRow; row++) {
        try {
            // 获取当前行单元格内容
            let cellValue = (lookupSheet.Cells(row, matchColNum).Value2 ?? "").toString().trim();
            // 空值跳过
            if (!cellValue) continue;

            // 统一格式：替换罗马数字 + 文本清洗
            cellValue = replaceIIandI(cellValue);
            const cClean = cleanText(cellValue);

            // 核心：计算4维度综合相似度
            const sim = getMultiSimilarity(tClean, cClean);

            // 更新最高分与最优匹配项
            if (sim > bestScore) {
                bestScore = sim;
                bestItem = cellValue;
            }
        } catch (e) {
            // 单条数据异常，跳过并打印日志
            console.log(`【行${row}】异常：${e.message}`);
        }
    }

    // ===================== 5. 阈值处理 + 最终返回 =====================
    // 初始化阈值
    let finalThreshold = 0;
    // 如果传入了阈值，处理格式（支持百分比 80% → 0.8）
    if (threshold !== null && threshold !== undefined && threshold !== "") {
        let val = threshold;
        // 字符串百分比处理
        if (typeof val === "string" && val.includes("%")) {
            val = parseFloat(val.replace("%", "")) / 100;
        }
        finalThreshold = Number(val) || 0;
    }

    // 把最高相似度格式化为百分比（保留2位小数）
    const bestPercent = (bestScore * 100).toFixed(2);

    // 根据阈值规则返回结果
    if (finalThreshold > 1) {
        // 阈值 >1 → 仅完全匹配（100%）才返回
        return bestScore === 1 ? bestItem : `无匹配，最高匹配度 ${bestPercent}%`;
    } 
    else if (finalThreshold <= 0) {
        // 阈值 ≤0 → 直接返回最优匹配
        return bestItem || `无匹配，最高匹配度 ${bestPercent}%`;
    } 
    else {
        // 0~1之间 → 达到阈值返回，否则提示无匹配
        return bestScore >= finalThreshold ? bestItem : `无匹配，最高匹配度 ${bestPercent}%`;
    }
}

/**
 * 文本清洗函数：统一格式，消除干扰
 * @param {string} str - 原始字符串
 * @returns {string} 清洗后字符串（半角、小写、去空格）
 */
function cleanText(str) {
    // 全角字符转半角
    const toHalf = s => s.replace(/[\uff00-\uffff]/g, ch => String.fromCharCode(ch.charCodeAt(0) - 65248));
    // 全角转半角 → 小写 → 去首尾空格
    return toHalf(str).toLowerCase().trim();
}

/**
 * 替换罗马数字：II→2，I→1
 * @param {string} str
 * @returns {string}
 */
function replaceIIandI(str) {
    return str.replace(/II/gi, "2").replace(/I/gi, "1");
}

/**
 * 4维度综合相似度计算（高精度核心）
 * @param {string} t - 目标清洗后文本
 * @param {string} c - 对比清洗后文本
 * @returns {number} 综合得分 0~1
 */
function getMultiSimilarity(t, c) {
    const charSim = charSimilarity(t, c);         // 单字符匹配
    const biSim = ngramSimilarity(t, c, 2);      // 双字符组合
    const triSim = ngramSimilarity(t, c, 3);     // 三字符组合
    const editSim = editDistanceSimilarity(t, c); // 编辑距离
    // 四个维度等权平均
    return (charSim + biSim + triSim + editSim) / 4;
}

/**
 * 单字符重合相似度（Jaccard）
 */
function charSimilarity(a, b) {
    const setA = new Set(a.split(''));
    const setB = new Set(b.split(''));
    const intersect = [...setA].filter(x => setB.has(x)).length;
    const union = new Set([...setA, ...setB]).size;
    return union === 0 ? 0 : intersect / union;
}

/**
 * N-gram连续子串相似度（2-gram、3-gram）
 */
function ngramSimilarity(a, b, n) {
    // 生成连续N字符组合
    function getNgrams(s, n) {
        const ngrams = [];
        for (let i = 0; i <= s.length - n; i++) {
            ngrams.push(s.slice(i, i + n));
        }
        return ngrams;
    }
    const g1 = getNgrams(a, n);
    const g2 = getNgrams(b, n);
    const set1 = new Set(g1);
    const set2 = new Set(g2);
    const intersect = [...set1].filter(x => set2.has(x)).length;
    const union = new Set([...set1, ...set2]).size;
    return union === 0 ? 0 : intersect / union;
}

/**
 * 编辑距离相似度（Levenshtein）
 * 衡量两个字符串的差异程度
 */
function editDistanceSimilarity(a, b) {
    const lenA = a.length;
    const lenB = b.length;
    const maxLen = Math.max(lenA, lenB);
    if (maxLen === 0) return 1;

    // 构建动态规划表
    const dp = Array.from({ length: lenA + 1 }, () => Array(lenB + 1).fill(0));
    for (let i = 0; i <= lenA; i++) dp[i][0] = i;
    for (let j = 0; j <= lenB; j++) dp[0][j] = j;

    // 计算最小修改次数
    for (let i = 1; i <= lenA; i++) {
        for (let j = 1; j <= lenB; j++) {
            const cost = a[i - 1] === b[j - 1] ? 0 : 1;
            dp[i][j] = Math.min(
                dp[i - 1][j] + 1,
                dp[i][j - 1] + 1,
                dp[i - 1][j - 1] + cost
            );
        }
    }
    // 转为相似度 0~1
    return 1 - dp[lenA][lenB] / maxLen;
}