POI读取 excel 浮动图片、excel 嵌入式图片(支持wps 和 office)
使用poi 读取excel 中的图片;可同时 读取浮动图片、wps 嵌入式图片、office 嵌入式图片。
无需复制文件为ZIP,解压读取。
一、依赖
gradle
//guava
implementation group: 'com.google.guava', name: 'guava', version: '33.2.0-jre'
//对应 poi 4.1.2
implementation group: 'com.alibaba', name: 'easyexcel', version: '3.3.4'
二、代码
java
import com.google.common.collect.HashBasedTable;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.usermodel.*;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.util.List;
import java.util.Map;
/**
* 从 excel 中读取图片
*
* @version 1.0.0
*/
public class ExcelImgParseUtils {
private static final Logger log = LoggerFactory.getLogger(ExcelImgParseUtils.class);
/**
* 读取Excel图片(增强版),支持以下场景:
* 1. 浮动图片(传统方式-此种方式对用户插入图片时的操作要求较高,很容易导致图片顶点不在指定单元格)
* 2. Office嵌入单元格图片(通过RichData机制)
* 3. WPS嵌入单元格图片(通过DISPIMG公式识别)
*
* @param workbook excel 文件(xlsx)
* @param sheet excel sheet,若多个sheet,可以考虑循环调用此方法,或将 sheet 改为列表
* @param table 存储图片数据{@code new HashBasedTable<Integer, Integer, byte[]>}
* value=table.get(rowIndex,columnIndex)
* {@link HashBasedTable}
* @param imgColumnIndexList 存储图片列索引
*/
public static void readImage(XSSFWorkbook workbook, XSSFSheet sheet, HashBasedTable<Integer, Integer, byte[]> table, List<Integer> imgColumnIndexList) {
log.info("开始读取Excel图片...");
// 第一步:读取浮动图片(传统方式)
List<XSSFPictureData> pictures = workbook.getAllPictures();
log.info("工作簿中总图片数: {}", pictures != null ? pictures.size() : 0);
if (pictures != null && !pictures.isEmpty()) {
XSSFDrawing drawing = sheet.getDrawingPatriarch();
if (drawing != null) {
for (XSSFShape shape : drawing) {
if (shape instanceof XSSFPicture pic) {
XSSFPictureData picData = pic.getPictureData();
XSSFClientAnchor anchor = pic.getClientAnchor();
int row1 = anchor.getRow1();
int col1 = anchor.getCol1();
table.put(row1, col1, picData.getData());
log.info("读取到浮动图片: 行={}, 列={}, 大小={} bytes", row1, col1, picData.getData().length);
}
}
} else {
log.info("Drawing对象为null,可能是Office/WPS嵌入单元格图片");
}
}
// 第二步:读取Office新式嵌入图片(通过RichData机制)
try {
parseOfficeRichDataImages(workbook, sheet, table, imgColumnIndexList);
} catch (Exception e) {
log.warn("读取Office RichData嵌入图片失败: {}", e.getMessage());
}
// 第三步:处理WPS嵌入单元格图片
// 这种情况下图片存在于workbook.getAllPictures()中,但没有位置信息
if (pictures != null && !pictures.isEmpty()) {
log.info("检测到图片,尝试处理嵌入单元格图片");
// 尝试读取WPS的 cellimages.xml 来建立精确映射
Map<String, byte[]> imageIdToDataMap = parseCellImages(workbook);
if (!imageIdToDataMap.isEmpty()) {
log.info("成功解析cellimages.xml,找到 {} 个图片ID映射", imageIdToDataMap.size());
// 扫描所有单元格,查找DISPIMG公式
for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
if (rowIndex == 0) continue; // 跳过表头
Row row = sheet.getRow(rowIndex);
//遍历图片单元格
for (Integer columnIndex : imgColumnIndexList) {
Cell cell = row != null ? row.getCell(columnIndex) : null;
if (cell != null && cell.getCellType() == CellType.FORMULA) {
String formula = cell.getCellFormula();
if (formula != null && formula.contains("DISPIMG")) {
String imageId = extractImageIdFromFormula(formula);
byte[] imageData = imageIdToDataMap.get(imageId);
if (imageData != null && imageData.length > 0) {
table.put(rowIndex, 0, imageData);
log.info("精确匹配WPS图片: 行={}, ID={}, 大小={} bytes",
rowIndex, imageId, imageData.length);
} else {
log.warn("未找到ID={}对应的图片数据", imageId);
}
}
}
}
}
} else {
log.warn("未找到cellimages.xml");
}
}
log.info("图片读取完成,共读取 {} 张图片", table.size());
}
/**
* 解析Office新式RichData嵌入图片(Office 365/2022+)
* 映射链路:单元格(vm) -> metadata.xml(v) -> rdrichvalue.xml(LocalImageIdentifier) -> richValueRel.xml.rels(rId) -> media/image.png
*
* @param workbook 工作簿
* @param sheet 工作表
* @param table 图片存储表
*/
public static void parseOfficeRichDataImages(XSSFWorkbook workbook, XSSFSheet sheet, HashBasedTable<Integer, Integer, byte[]> table, List<Integer> imgColumnIndexList) {
try {
OPCPackage pkg = workbook.getPackage();
// 步骤1:检查是否存在richData目录
PackagePart metadataPart;
PackagePart rdrichvaluePart;
PackagePart richValueRelPart;
try {
metadataPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/metadata.xml"));
rdrichvaluePart = pkg.getPart(PackagingURIHelper.createPartName("/xl/richData/rdrichvalue.xml"));
richValueRelPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/richData/richValueRel.xml"));
} catch (Exception e) {
log.debug("未找到RichData相关文件,跳过RichData解析: {}", e.getMessage());
return;
}
if (metadataPart == null || rdrichvaluePart == null || richValueRelPart == null) {
log.debug("缺少RichData必要文件,跳过解析");
return;
}
log.info("检测到Office RichData机制,开始解析嵌入图片...");
// 步骤2:解析richValueRel.xml.rels,建立rId到图片路径的映射
java.util.Map<String, String> rIdToImagePathMap = new java.util.HashMap<>();
try {
PackageRelationshipCollection rels = richValueRelPart.getRelationships();
for (PackageRelationship rel : rels) {
if (rel.getRelationshipType().contains("image")) {
// rId1 -> ../media/image1.png
rIdToImagePathMap.put(rel.getId(), rel.getTargetURI().toString());
log.debug("RichData rId映射: {} -> {}", rel.getId(), rel.getTargetURI());
}
}
} catch (Exception e) {
log.warn("解析richValueRel.xml.rels失败: {}", e.getMessage());
return;
}
if (rIdToImagePathMap.isEmpty()) {
log.debug("richValueRel.xml.rels中没有图片关系");
return;
}
// 步骤3:解析rdrichvalue.xml,建立RichValue索引到LocalImageIdentifier的映射
java.util.Map<Integer, Integer> rvIndexToImageIdMap = new java.util.HashMap<>();
try (java.io.InputStream is = rdrichvaluePart.getInputStream()) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(is);
NodeList rvList = doc.getElementsByTagName("rv");
log.info("找到 {} 个RichValue定义", rvList.getLength());
for (int i = 0; i < rvList.getLength(); i++) {
Element rvElement = (Element) rvList.item(i);
NodeList vList = rvElement.getElementsByTagName("v");
if (vList.getLength() >= 1) {
// 第一个<v>元素是LocalImageIdentifier,值为rId的索引(0=rId1, 1=rId2...)
String localImageIdStr = vList.item(0).getTextContent().trim();
try {
int localImageId = Integer.parseInt(localImageIdStr);
rvIndexToImageIdMap.put(i, localImageId);
log.debug("RichValue[{}] -> LocalImageIdentifier={} (rId{})", i, localImageId, localImageId + 1);
} catch (NumberFormatException e) {
log.warn("LocalImageIdentifier解析失败: {}", localImageIdStr);
}
}
}
} catch (Exception e) {
log.warn("解析rdrichvalue.xml失败: {}", e.getMessage());
return;
}
// 步骤4:解析metadata.xml,建立ValueMetadata索引到RichValue索引的映射
java.util.Map<Integer, Integer> vmIndexToRvIndexMap = new java.util.HashMap<>();
try (java.io.InputStream is = metadataPart.getInputStream()) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(is);
NodeList bkList = doc.getElementsByTagName("bk");
if (bkList.getLength() > 0) {
// 查找valueMetadata下的bk元素
org.w3c.dom.Node valueMetadataNode;
NodeList metadataNodes = doc.getElementsByTagName("valueMetadata");
if (metadataNodes.getLength() > 0) {
valueMetadataNode = metadataNodes.item(0);
NodeList vmBkList = ((Element) valueMetadataNode).getElementsByTagName("bk");
log.info("找到 {} 个ValueMetadata定义", vmBkList.getLength());
for (int i = 0; i < vmBkList.getLength(); i++) {
Element bkElement = (Element) vmBkList.item(i);
NodeList rcList = bkElement.getElementsByTagName("rc");
if (rcList.getLength() > 0) {
Element rcElement = (Element) rcList.item(0);
String vAttr = rcElement.getAttribute("v");
if (!vAttr.isEmpty()) {
try {
int rvIndex = Integer.parseInt(vAttr);
vmIndexToRvIndexMap.put(i, rvIndex);
log.debug("ValueMetadata[{}] -> RichValue[{}]", i, rvIndex);
} catch (NumberFormatException e) {
log.warn("ValueMetadata v属性解析失败: {}", vAttr);
}
}
}
}
}
}
} catch (Exception e) {
log.warn("解析metadata.xml失败: {}", e.getMessage());
return;
}
// 步骤5:扫描工作表,查找vm属性的单元格
int imageCount = 0;
for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
XSSFRow row = sheet.getRow(rowIndex);
if (row == null) continue;
for (Integer colIndex : imgColumnIndexList) {
XSSFCell cell = row.getCell(colIndex);
if (cell == null) continue;
// 检查是否有vm属性(通过底层CTCell获取)
CTCell ctCell = cell.getCTCell();
if (ctCell != null && ctCell.isSetVm()) {
long vmValue = ctCell.getVm();
int vmIndex = (int) vmValue - 1; // vm从1开始,数组索引从0开始
log.debug("单元格[{},{}] vm={}", rowIndex, colIndex, vmValue);
// 查找完整的映射链路
Integer rvIndex = vmIndexToRvIndexMap.get(vmIndex);
if (rvIndex != null) {
Integer localImageId = rvIndexToImageIdMap.get(rvIndex);
if (localImageId != null) {
String rId = "rId" + (localImageId + 1); // 0->rId1, 1->rId2
String imagePath = rIdToImagePathMap.get(rId);
if (imagePath != null) {
// 读取图片数据
try {
// 处理相对路径 ../media/image1.png
String absolutePath = imagePath;
if (imagePath.startsWith("../")) {
absolutePath = "/xl/" + imagePath.substring(3);
} else if (!imagePath.startsWith("/")) {
absolutePath = "/xl/richData/" + imagePath;
}
PackagePartName partName =
PackagingURIHelper.createPartName(absolutePath);
PackagePart imagePart = pkg.getPart(partName);
if (imagePart != null) {
byte[] imageData = IOUtils.toByteArray(imagePart.getInputStream());
if (imageData != null && imageData.length > 0) {
table.put(rowIndex, colIndex, imageData);
imageCount++;
log.info("读取到RichData嵌入图片: 行={}, 列={}, 大小={} bytes, 映射链: vm[{}]->rv[{}]->localId[{}]->{}->{}",
rowIndex, colIndex, imageData.length, vmIndex, rvIndex, localImageId, rId, absolutePath);
}
} else {
log.warn("未找到图片文件: {}", absolutePath);
}
} catch (Exception e) {
log.warn("读取RichData图片失败 (行={}, 列={}): {}", rowIndex, colIndex, e.getMessage());
}
} else {
log.warn("未找到rId={}对应的图片路径", rId);
}
} else {
log.warn("未找到RichValue[{}]对应的LocalImageIdentifier", rvIndex);
}
} else {
log.warn("未找到ValueMetadata[{}]对应的RichValue索引", vmIndex);
}
}
}
}
log.info("RichData解析完成,共读取 {} 张嵌入图片", imageCount);
} catch (Exception e) {
log.error("解析Office RichData图片异常", e);
}
}
/**
* 解析WPS的 cellimages.xml,建立图片ID到图片数据的映射
* <p>
* 文件位于excel /xl/cellimages.xml
*
* @return Map<ImageID, ImageData>
*/
public static java.util.Map<String, byte[]> parseCellImages(XSSFWorkbook workbook) {
java.util.Map<String, byte[]> imageMap = new java.util.HashMap<>();
try {
// 获取workbook的底层Package
OPCPackage pkg = workbook.getPackage();
//1、 尝试获取cellimages.xml文件
PackagePart cellImagesPart;
try {
cellImagesPart = pkg.getPart(
PackagingURIHelper.createPartName("/xl/cellimages.xml")
);
} catch (Exception e) {
log.debug("未找到/xl/cellimages.xml: {}", e.getMessage());
return imageMap;
}
if (cellImagesPart == null) {
return imageMap;
}
//2、 获取cellimages.xml.rels文件来建立rId到图片路径的映射
java.util.Map<String, String> rIdToPathMap = new java.util.HashMap<>();
PackageRelationshipCollection rels = cellImagesPart.getRelationships();
for (PackageRelationship rel : rels) {
if (rel.getRelationshipType().contains("image")) {
rIdToPathMap.put(rel.getId(), rel.getTargetURI().toString());
log.debug("rId映射: {} -> {}", rel.getId(), rel.getTargetURI());
}
}
// 3、 解析cellimages.xml内容
try (java.io.InputStream is = cellImagesPart.getInputStream()) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(is);
// 查找所有cellImage元素
NodeList cellImages = doc.getElementsByTagNameNS("*", "cellImage");
log.info("找到 {} 个cellImage元素", cellImages.getLength());
for (int i = 0; i < cellImages.getLength(); i++) {
Element cellImage = (Element) cellImages.item(i);
// 提取图片ID (在cNvPr的name属性中)
NodeList cNvPrList = cellImage.getElementsByTagNameNS("*", "cNvPr");
if (cNvPrList.getLength() > 0) {
Element cNvPr = (Element) cNvPrList.item(0);
String imageId = cNvPr.getAttribute("name");
// 提取rId (在blip的embed属性中)
NodeList blipList = cellImage.getElementsByTagNameNS("*", "blip");
if (blipList.getLength() > 0) {
Element blip = (Element) blipList.item(0);
String rId = blip.getAttributeNS(
"http://schemas.openxmlformats.org/officeDocument/2006/relationships",
"embed"
);
// 通过rId找到图片路径
String imagePath = rIdToPathMap.get(rId);
if (imagePath != null) {
// 读取图片数据
try {
// Relationship返回的已经是完整的相对路径
PackagePartName partName =
PackagingURIHelper.createPartName(
imagePath
);
PackagePart imagePart = pkg.getPart(partName);
if (imagePart != null) {
byte[] imageData = IOUtils.toByteArray(
imagePart.getInputStream()
);
imageMap.put(imageId, imageData);
log.info("成功加载图片: ID={}, 大小={} bytes", imageId, imageData.length);
}
} catch (Exception e) {
log.warn("读取图片失败: ID={}, 路径={}, 错误: {}",
imageId, imagePath, e.getMessage());
}
}
}
}
}
}
} catch (Exception e) {
log.warn("解析cellimages.xml失败: {}", e.getMessage(), e);
}
return imageMap;
}
/**
* 从DISPIMG公式中提取图片ID
* <p>
* 支持格式:_xlfn.DISPIMG("ID_XXX",1) 或 DISPIMG("ID_XXX")
*
* @param formula wps图片公式
* @return 图片ID
*/
public static String extractImageIdFromFormula(String formula) {
if (formula == null) {
return "";
}
// 移除前缀 = 和 _xlfn.
formula = formula.trim();
if (formula.startsWith("=")) {
formula = formula.substring(1).trim();
}
if (formula.startsWith("_xlfn.")) {
formula = formula.substring(6).trim();
}
// 提取括号内的内容
int start = formula.indexOf("(");
int end = formula.indexOf(")");
if (start > 0 && end > start) {
String content = formula.substring(start + 1, end).trim();
// 提取第一个参数(ID)
int commaPos = content.indexOf(",");
if (commaPos > 0) {
content = content.substring(0, commaPos).trim();
}
// 移除引号
content = content.replaceAll("[\"']", "");
return content;
}
return formula;
}
}
三、测试
java
@Test
public void testReadImage() throws Exception {
//文件路径
String flightPath = "";
try (XSSFWorkbook workbook = new XSSFWorkbook(flightPath)) {
//图片所在 sheet
XSSFSheet sheet = workbook.getSheetAt(0);
//图片数据 行、列 图片数组
HashBasedTable<Integer, Integer, byte[]> table = HashBasedTable.create();
//图片所在列
ArrayList<Integer> imgColIndexList = Lists.newArrayList(0);
ExcelImgParseUtils.readImage(workbook, sheet, table, imgColIndexList);
System.out.println("图片数量:" + table.size());
//遍历图片数据
List<Integer> rowIndexList = table.rowKeySet().stream().sorted().toList();
for (Integer rowIndex : rowIndexList) {
for (Integer colIndex : imgColIndexList) {
byte[] bytes = table.get(rowIndex, colIndex);
//
System.out.println("行:" + rowIndex + "列:" + colIndex + "图片数据大小:" + (bytes == null ? 0 : bytes.length));
}
}
}
}