主要用于用于解析 Excel 文件,提取文本内容和嵌入式图片,并将图片上传到服务器后生成可访问的 URL。
依赖库
-
JSZip
:用于解压 Excel 文件 -
XLSX
:用于解析 Excel 内容
性能考虑
-
图片上传采用分批并发处理(已完成)
-
大文件可能需要优化内存使用(未完成)
兼容性
-
支持 Excel 中的 DISPIMG 公式
-
处理多种图片格式(PNG、JPG 等)
扩展建议
-
可添加进度提示,特别是处理大文件时
-
可增加图片压缩选项,减少上传数据量
-
可支持更多 Excel 特殊公式和格式
-
可添加结果缓存功能,避免重复处理相同文件
为什么使用 JSZip 库呢:因为Excel文件 本质上是 ZIP压缩包 。从 Excel 2007 开始,Microsoft采用了新的文件格式,称为开放的XML文件格式(.xlsx),如果 Excel 中有多张相同的图片(内容完全一致),Excel 可能会优化存储,只保留一份图片文件。这是 Excel 的内部优化机制,目的是减少文件体积。
css
A[上传Excel文件] --> B[解析文本内容]
A --> C[提取图片元数据]
A --> D[提取图片二进制数据]
B --> E[合并文本和图片数据]
C --> E
D --> F[上传图片到服务器]
F --> E
E --> G[生成最终结果]
完整代码
js
var parseResult = {};
var jsonResults = [];
const parseExcelFile = async (file) => {
try {
const arrayBuffer = await file.arrayBuffer();
// 使用 XLSX 解析文本内容
const workbook = XLSX.read(arrayBuffer, { type: "array" });
const textData = extractTextData(workbook);
// 提取图片元数据(DISPIMG信息)
const imageMetadata = extractImageMetadata(workbook);
// 使用 JSZip 解析图片内容
const images = await extractImagesFromExcel(arrayBuffer);
const allImageInfo = await uploadAllImg(images);
const resData = getImageUrl(textData, allImageInfo);
if (resData?.length > 0) {
return resData.slice(2);
}
} catch (error) {
alert("解析文件失败,请检查文件格式");
throw error;
}
};
const base64ToBlobWithFetch = async (base64Data) => {
const response = await fetch(base64Data);
return await response.blob();
};
const convertBase64ToBlob = (base64String) => {
const byteString = atob(base64String.split(",")[1]);
const mimeString = base64String.split(",")[0].split(":")[1].split(";")[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
return new Blob([ab], { type: mimeString });
};
const uploadImage = async (base64String) => {
const formData = new FormData();
const blob = convertBase64ToBlob(base64String);
formData.append("files", blob, "image.png");
try {
const response = await fetch(`/接口`, {
method: "POST",
body: formData,
});
const result = await response.json();
return result;
} catch (error) {
throw error;
}
};
// 分组批量上传图片
const uploadAllImg = async (images, batchSize = 5) => {
const allImages = images || [];
if (allImages.length === 0) {
return [];
}
const results = [];
const totalBatches = Math.ceil(allImages.length / batchSize);
for (let i = 0; i < totalBatches; i++) {
const startIndex = i * batchSize;
const endIndex = Math.min(startIndex + batchSize, allImages.length);
const batch = allImages.slice(startIndex, endIndex);
try {
// 并发上传当前批次的图片
const batchPromises = batch.map(async (data) => {
try {
const result = await uploadImage(data?.dataUrl);
results.push({
...data,
file_url: result?.file_url || "",
});
} catch (error) {
console.error(`上传图片失败:`, error);
}
});
await Promise.all(batchPromises);
if (i < totalBatches - 1) {
await new Promise((resolve) => setTimeout(resolve, 500));
}
} catch (error) {}
}
return results;
};
// 提取图片元数据
const extractImageMetadata = (workbook) => {
const imageMetadata = [];
workbook.SheetNames.forEach((sheetName) => {
const worksheet = workbook.Sheets[sheetName];
// 遍历所有单元格
for (const cellAddress in worksheet) {
if (cellAddress[0] === "!") continue; // 跳过特殊属性
const cell = worksheet[cellAddress];
// 检查单元格值中是否包含图片ID信息
if (cell && cell.v && typeof cell.v === "string") {
const value = cell.v;
const dispimgMatch = value.match(
/=DISPIMG\(["']([^"']+)["'],?\s*(\d+)?\)/
);
if (dispimgMatch) {
const imageId = dispimgMatch[1];
const imageIndex = dispimgMatch[2] || "1";
imageMetadata.push({
sheetName,
cellAddress,
imageId,
imageIndex: parseInt(imageIndex),
formula: value,
row: getRowFromAddress(cellAddress),
col: getColFromAddress(cellAddress),
});
}
}
}
});
return imageMetadata;
};
// 从单元格地址提取行号
const getRowFromAddress = (address) => {
const match = address.match(/(\d+)/);
return match ? parseInt(match[1]) - 1 : 0; // 转换为0基础索引
};
// 从单元格地址提取列号
const getColFromAddress = (address) => {
const match = address.match(/([A-Z]+)/);
if (!match) return 0;
const col = match[1];
let result = 0;
for (let i = 0; i < col.length; i++) {
result = result * 26 + (col.charCodeAt(i) - 64);
}
return result - 1; // 转换为0基础索引
};
// 改进的文本数据提取,包含图片关联
const extractTextData = (workbook) => {
const allData = [];
workbook.SheetNames.forEach((sheetName) => {
const worksheet = workbook.Sheets[sheetName];
const jsonData = XLSX.utils.sheet_to_json(worksheet, {
header: 1, // 以数组形式返回
defval: "", // 空单元格默认值
});
if (jsonData.length > 0) {
// 为每行添加额外信息
jsonData.forEach((row, rowIndex) => {
const rowData = {
sheetName,
rowIndex,
data: row,
images: [],
};
allData.push(rowData);
});
}
});
return allData;
};
// 从 Excel 文件中提取图片带元数据
const extractImagesFromExcel = async (arrayBuffer) => {
try {
const zip = new JSZip();
await zip.loadAsync(arrayBuffer);
// files["xl/cellimages.xml"] rid 和 DISPIMG ID 的对应关系
// files["xl/_rels/cellimages.xml.rels"] rid 和 图片名称对应的关系
const cellImagesData = await parseXmlFromZip(
zip,
"xl/cellimages.xml",
parseCellImagesXml
);
const relationshipsData = await parseXmlFromZip(
zip,
"xl/_rels/cellimages.xml.rels",
parseRelationshipsXml
);
if (!cellImagesData || !relationshipsData) {
return [];
}
// 使用抽离的函数合并数据
const jsonResults = prepareAndMergeData(cellImagesData, relationshipsData);
if (jsonResults) {
// 处理每个图片文件
for (let index = 0; index < jsonResults.length; index++) {
const path = `xl/${jsonResults[index]?.name}`;
const file = zip.file(path);
if (file) {
try {
const imageData = await file.async("arraybuffer");
const mimeType = getMimeType(path);
const blob = new Blob([imageData], { type: mimeType });
const dataUrl = await blobToDataUrl(blob);
jsonResults[index].dataUrl = dataUrl;
} catch (error) {
console.warn(`处理图片失败:`, error);
}
}
}
return jsonResults;
}
return images;
} catch (error) {
console.error("提取图片失败:", error);
return [];
}
};
// XML解析函数 获取到 对应关系
async function parseXmlFromZip(zip, filePath, parseFunction) {
const file = zip.file(filePath);
if (!file) {
console.error(`XML file not found at ${filePath}`);
return null;
}
try {
const xmlText = await file.async("text");
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(xmlText, "text/xml");
return parseFunction(xmlDoc);
} catch (error) {
console.error(`Error processing ${filePath}:`, error);
return null;
}
}
// 合并数据函数 生成最终对应关系
function prepareAndMergeData(cellImagesData, relationshipsData) {
const jsonCellImages = JSON.stringify(cellImagesData, null, 2);
const jsonRelationships = JSON.stringify(relationshipsData, null, 2);
return mergeImageData(
JSON.parse(jsonCellImages),
JSON.parse(jsonRelationships)
);
}
// 建立文本数据和图片的关联关系
const linkTextDataWithImages = (textData, images) => {
images.forEach((image) => {
if (image.metadata) {
// 根据元数据找到对应的文本行
const targetRow = textData.find(
(row) =>
row.sheetName === image.metadata.sheetName &&
row.rowIndex === image.metadata.row
);
if (targetRow) {
targetRow.images.push(image);
}
}
});
return textData;
};
// 判断是否为图片文件
const isImageFile = (filename) => {
const imageExts = [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".svg"];
const ext = filename.toLowerCase().substring(filename.lastIndexOf("."));
return imageExts.includes(ext);
};
// 获取 MIME 类型
const getMimeType = (filename) => {
const ext = filename.toLowerCase().substring(filename.lastIndexOf("."));
const mimeTypes = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".bmp": "image/bmp",
".tiff": "image/tiff",
".svg": "image/svg+xml",
};
return mimeTypes[ext] || "image/png";
};
// Blob 转 DataURL
const blobToDataUrl = (blob) => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(blob);
});
};
// 解析XML关系文件并提取为JSON格式,提取的是 id 和 图片 名称
const parseRelationshipsXml = (xmlDoc) => {
// 获取所有Relationship元素
const relationships = xmlDoc.getElementsByTagName("Relationship");
const result = [];
// 遍历每个Relationship元素并提取属性
for (let i = 0; i < relationships.length; i++) {
const relationship = relationships[i];
const id = relationship.getAttribute("Id");
const target = relationship.getAttribute("Target");
// 将提取的属性添加到结果数组
result.push({
id: id,
name: target,
});
}
return result;
};
// 解析 cellImages XML 并提取为所需的 JSON 格式 ,提取的是 rid 和 真正对应的id
const parseCellImagesXml = (xmlDoc) => {
// 1. 自动探测命名空间
const detectNamespace = (doc, preferredPrefix = "") => {
const root = doc.documentElement;
const ns =
root.getAttribute(`xmlns:${preferredPrefix}`) ||
root.getAttribute("xmlns");
return ns || "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
};
// 2. 使用动态命名空间
const mainNS = detectNamespace(xmlDoc);
const relNS = detectNamespace(xmlDoc, "r");
const wpsNS = "http://www.wps.cn/officeDocument/2017/etCustomData";
// 3. 多命名空间查询
const cellImages = [];
[wpsNS, mainNS].forEach((ns) => {
const nodes = xmlDoc.getElementsByTagNameNS(ns, "cellImage");
Array.from(nodes).forEach((node) => cellImages.push(node));
});
// 4. 兼容性属性提取
return cellImages
.map((cellImage, i) => {
try {
const blip = cellImage.querySelector(
"[r\\:embed], [embed], a\\:blip, blip"
);
const rid =
blip?.getAttributeNS(relNS, "embed") ||
blip?.getAttribute("r:embed") ||
blip?.getAttribute("embed");
const cNvPr = cellImage.querySelector("xdr\\:cNvPr, cNvPr");
const id = cNvPr?.getAttribute("name") || `img_${i}`;
return { rid, id };
} catch (error) {
console.error(`解析 cellImage 出错:`, error);
return null;
}
})
.filter(Boolean);
};
const mergeImageData = (jsonCellImages, jsonRelationships) => {
const relationshipMap = {};
jsonRelationships.forEach((rel) => {
relationshipMap[rel.id] = rel.name;
});
return jsonCellImages
.map((cellImage) => {
const matchedRel = jsonRelationships.find(
(rel) => rel.id === cellImage.rid
);
return {
id: cellImage.id,
rid: cellImage.rid,
name: matchedRel ? matchedRel.name : null,
};
})
.filter((item) => item.name);
};
const getImageUrl = (textData, allImageInfo) => {
let res = textData.map((v) => {
return {
...v,
data: v?.data?.map((item) => {
if (item.includes("=DISPIMG")) {
const id = item.match(/=DISPIMG\(["']([^"']+)["'],?\s*(\d+)?\)/)?.[1];
let imgInfo = allImageInfo.find((v) => v.id === id);
return imgInfo?.file_url || "";
} else {
return item;
}
}),
};
});
return res;
};
ps:代码可能比较糟糕,文档也没什么排版,目前网上这样的实现方法前端不是很多,做个记录贴,希望有一样需求的小伙伴可以看到,能有所帮助就是存在最好的意义。