前端获取表格数据

1、使用xlsx插件

复制代码

npm i xlsx

2、创建excelParser.ts文件

复制代码

import * as XLSX from "xlsx";

/**
 * 支持的文件类型
 */
export const SUPPORTED_FORMATS = {
  XLS: "xls",
  XLSX: "xlsx",
  CSV: "csv",
};

// 表格type类型
export const FILE_MIME_TYPES = {
  [SUPPORTED_FORMATS.XLS]: "application/vnd.ms-excel",
  [SUPPORTED_FORMATS.XLSX]:
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
  [SUPPORTED_FORMATS.CSV]: "text/csv",
};

/**
 * 检测文件类型
 */
export function detectFileType(file) {
  const fileName = file.name.toLowerCase();

  if (fileName.endsWith(".xls")) return SUPPORTED_FORMATS.XLS;
  if (fileName.endsWith(".xlsx")) return SUPPORTED_FORMATS.XLSX;
  if (fileName.endsWith(".csv")) return SUPPORTED_FORMATS.CSV;

  // 通过 MIME 类型检测
  if (file.type.includes("spreadsheet")) return SUPPORTED_FORMATS.XLSX;
  if (file.type.includes("excel")) return SUPPORTED_FORMATS.XLS;
  if (file.type.includes("csv")) return SUPPORTED_FORMATS.CSV;

  return null;
}

export function parseTableFile(file) {
  return new Promise((resolve, reject) => {
    const fileType = detectFileType(file);

    if (!fileType) {
      reject(new Error("不支持的文件格式"));
      return;
    }

    const reader = new FileReader();

    reader.onload = function (e) {
      try {
        let workbook;
        const result = e.target.result;

        if (fileType === SUPPORTED_FORMATS.CSV) {
          // 处理 CSV 编码问题
          console.log("------result", result);
          workbook = parseCSVFile(result, file.name);
        } else {
          // XLS 或 XLSX
          const data = new Uint8Array(result);
          workbook = XLSX.read(data, { type: "array" });
        }

        const workbookInfo = extractWorkbookInfo(workbook, fileType);
        resolve(workbookInfo);
      } catch (error) {
        reject(error);
      }
    };

    reader.onerror = reject;

	// csv也用二进制读取，不然容易中文乱码
    // if (fileType === SUPPORTED_FORMATS.CSV) {
    //   reader.readAsText(file, "UTF-8"); // CSV 用文本方式读取
    // } else {
    //   reader.readAsArrayBuffer(file); // Excel 用二进制读取
    // }
    reader.readAsArrayBuffer(file); // Excel 用二进制读取
  });
}

/**
 * 解析 CSV 文件
 */
function parseCSVFile(arrayBuffer, fileName) {
  try {
    // 直接让 xlsx 处理 ArrayBuffer
    const data = decodeCSVWithUint8Array(new Uint8Array(arrayBuffer));
    console.log("--------data", data);

    // 检测分隔符
    const delimiter = detectCSVDelimiter(data);

    // 将 CSV 转换为工作表
    const worksheet = XLSX.utils.aoa_to_sheet(
      data
        .split("\n")
        .filter((line) => line.trim())
        .map(
          (line) =>
            line
              .split(delimiter)
              .map((cell) => cell.trim().replace(/^"|"$/g, "")) // 去除引号
        )
    );

    // 创建工作簿
    const workbook = XLSX.utils.book_new();
    const sheetName = "default";
    XLSX.utils.book_append_sheet(workbook, worksheet, sheetName);

    // const workbook = XLSX.read(data, {
    //   type: "array",
    //   // codepage: 65001, // 明确指定 UTF-8
    //   raw: false,
    // });

    console.log("-------workbook", workbook);
    return workbook;
  } catch (error) {
    console.error("XLSX 直接解析失败:", error);

    // 回退方案：转换为文本再解析
    try {
      const text = new TextDecoder("utf-8").decode(arrayBuffer);
      return XLSX.read(text, { type: "string" });
    } catch (fallbackError) {
      console.error("所有解析方法都失败:", fallbackError);
      throw fallbackError;
    }
  }
}

function decodeCSVWithUint8Array(uint8Array) {
  // 尝试的编码顺序
  const encodings = [
    "gb18030",
    "gbk",
    "gb2132",
    "gb2312",
    "utf-8",
    "windows-1252",
  ];

  for (const encoding of encodings) {
    try {
      const text = decodeUint8Array(uint8Array, encoding);
      if (isValidChineseText(text)) {
        console.log(`解码成功，使用编码: ${encoding}`);
        return text;
      }
    } catch (error) {
      console.warn(`编码 ${encoding} 失败:`, error);
      continue;
    }
  }

  // 默认使用 UTF-8
  return decodeUint8Array(uint8Array, "utf-8");
}

function isValidChineseText(text) {
  if (!text || typeof text !== "string" || text.length < 10) {
    console.log("文本过短或无效");
    return false;
  }

  const cleanText = text.replace(/^\uFEFF/, ""); // 移除 BOM

  // CSV 特定检查
  const lines = cleanText
    .split("\n")
    .map((line) => line.trim())
    .filter((line) => line.length > 0);

  if (lines.length < 2) {
    console.log("CSV 行数不足");
    return false;
  }

  // 分析第一行（通常是表头）
  const headerLine = lines[0];
  const headerFields = headerLine.split(",").length;

  // 检查表头是否包含中文
  const headerHasChinese = /[\u4e00-\u9fa5]/.test(headerLine);

  // 检查数据行的一致性
  let consistentRows = 0;
  for (let i = 1; i < Math.min(lines.length, 10); i++) {
    // 检查前10行数据
    const fields = lines[i].split(",").length;
    if (Math.abs(fields - headerFields) <= 1) {
      // 允许1个字段的差异
      consistentRows++;
    }
  }

  // 中文内容检查
  const chineseCount = (cleanText.match(/[\u4e00-\u9fa5]/g) || []).length;
  const hasMojibake = /[�]/.test(cleanText);
  const hasBadEncoding = /ï¿½|Ã[^ ]|â[^ ]/.test(cleanText);

  console.log(`📋 CSV 文件验证:
    🏷️  表头字段: ${headerFields} 个
    🇨🇳 表头含中文: ${headerHasChinese}
    📊 数据行一致性: ${consistentRows}/${Math.min(lines.length - 1, 10)}
    🔤 中文字符数: ${chineseCount}
    🚫 包含乱码: ${hasMojibake}
    ⚠️  编码问题: ${hasBadEncoding}`);

  return (
    headerHasChinese &&
    consistentRows >= 3 && // 至少3行数据格式一致
    chineseCount >= 5 && // 至少5个中文字符
    !hasMojibake &&
    !hasBadEncoding
  );
}

function decodeUint8Array(uint8Array, encoding) {
  try {
    const decoder = new TextDecoder(encoding);
    return decoder.decode(uint8Array);
  } catch (error) {
    throw new Error(`TextDecoder 不支持编码: ${encoding}`);
  }
}

/**
 * 检测 CSV 分隔符
 */
function detectCSVDelimiter(csvText) {
  const firstLine = csvText.split("\n")[0];
  const delimiters = [",", ";", "\t", "|"];

  let bestDelimiter = ",";
  let maxCount = 0;

  for (const delimiter of delimiters) {
    const count = (firstLine.match(new RegExp(`\\${delimiter}`, "g")) || [])
      .length;
    if (count > maxCount) {
      maxCount = count;
      bestDelimiter = delimiter;
    }
  }

  return bestDelimiter;
}

/**
 * 提取工作簿信息
 */
function extractWorkbookInfo(workbook, fileType) {
  const sheetsInfo = [];

  workbook.SheetNames.forEach((sheetName) => {
    const worksheet = workbook.Sheets[sheetName];
    const sheetInfo = extractSheetInfo(worksheet, sheetName, fileType);
    sheetsInfo.push(sheetInfo);
  });

  return {
    fileType: fileType,
    fileName: workbook.SheetNames[0],
    sheetCount: workbook.SheetNames.length,
    sheetNames: workbook.SheetNames,
    sheets: sheetsInfo,
  };
}

/**
 * 提取单个sheet的信息
 */
function extractSheetInfo(worksheet, sheetName, fileType) {
  // 转换为JSON数据
  const jsonData = XLSX.utils.sheet_to_json(worksheet, {
    header: 1,
    defval: "",
    raw: false,
  });

  // 清理空行
  const cleanedData = jsonData.filter((row) =>
    row.some((cell) => cell !== null && cell !== undefined && cell !== "")
  );

  const headers = cleanedData.length > 0 ? cleanedData[0] : [];
  const data = cleanedData.length > 1 ? cleanedData.slice(1) : [];

  const range = worksheet["!ref"]
    ? XLSX.utils.decode_range(worksheet["!ref"])
    : null;

  return {
    name: sheetName,
    fileType: fileType,
    headers: headers,
    data: data,
    rowCount: cleanedData.length,
    columnCount: headers.length,
    dataRowCount: data.length,
    range: range
      ? {
          startRow: range.s.r + 1,
          endRow: range.e.r + 1,
          startCol: range.s.c + 1,
          endCol: range.e.c + 1,
        }
      : null,
  };
}

3、获取文件信心

复制代码

import { parseTableFile } from "@/utils/excelParser";
const workbookInfo = await parseTableFile(file);

再处理想要的信息就可以了