POI读取 excel 嵌入式图片(支持wps 和 office)

POI读取 excel 浮动图片、excel 嵌入式图片(支持wps 和 office)

使用poi 读取excel 中的图片;可同时 读取浮动图片、wps 嵌入式图片、office 嵌入式图片。

无需复制文件为ZIP,解压读取。

一、依赖

gradle 复制代码
	  //guava
      implementation group: 'com.google.guava', name: 'guava', version: '33.2.0-jre'
      //对应 poi 4.1.2
      implementation group: 'com.alibaba', name: 'easyexcel', version: '3.3.4'

二、代码

java 复制代码
import com.google.common.collect.HashBasedTable;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.usermodel.*;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.util.List;
import java.util.Map;

/**
 * 从 excel 中读取图片
 *
 * @version 1.0.0
 */
public class ExcelImgParseUtils {

    private static final Logger log = LoggerFactory.getLogger(ExcelImgParseUtils.class);

    /**
     * 读取Excel图片(增强版),支持以下场景:
     * 1. 浮动图片(传统方式-此种方式对用户插入图片时的操作要求较高,很容易导致图片顶点不在指定单元格)
     * 2. Office嵌入单元格图片(通过RichData机制)
     * 3. WPS嵌入单元格图片(通过DISPIMG公式识别)
     *
     * @param workbook           excel 文件(xlsx)
     * @param sheet              excel sheet,若多个sheet,可以考虑循环调用此方法,或将 sheet 改为列表
     * @param table              存储图片数据{@code new HashBasedTable<Integer, Integer, byte[]>}
     *                           value=table.get(rowIndex,columnIndex)
     *                           {@link HashBasedTable}
     * @param imgColumnIndexList 存储图片列索引
     */
    public static void readImage(XSSFWorkbook workbook, XSSFSheet sheet, HashBasedTable<Integer, Integer, byte[]> table, List<Integer> imgColumnIndexList) {
        log.info("开始读取Excel图片...");

        // 第一步:读取浮动图片(传统方式)
        List<XSSFPictureData> pictures = workbook.getAllPictures();
        log.info("工作簿中总图片数: {}", pictures != null ? pictures.size() : 0);

        if (pictures != null && !pictures.isEmpty()) {
            XSSFDrawing drawing = sheet.getDrawingPatriarch();
            if (drawing != null) {
                for (XSSFShape shape : drawing) {
                    if (shape instanceof XSSFPicture pic) {
                        XSSFPictureData picData = pic.getPictureData();
                        XSSFClientAnchor anchor = pic.getClientAnchor();
                        int row1 = anchor.getRow1();
                        int col1 = anchor.getCol1();
                        table.put(row1, col1, picData.getData());
                        log.info("读取到浮动图片: 行={}, 列={}, 大小={} bytes", row1, col1, picData.getData().length);
                    }
                }
            } else {
                log.info("Drawing对象为null,可能是Office/WPS嵌入单元格图片");
            }
        }

        // 第二步:读取Office新式嵌入图片(通过RichData机制)
        try {
            parseOfficeRichDataImages(workbook, sheet, table, imgColumnIndexList);
        } catch (Exception e) {
            log.warn("读取Office RichData嵌入图片失败: {}", e.getMessage());
        }

        // 第三步:处理WPS嵌入单元格图片
        // 这种情况下图片存在于workbook.getAllPictures()中,但没有位置信息
        if (pictures != null && !pictures.isEmpty()) {
            log.info("检测到图片,尝试处理嵌入单元格图片");

            // 尝试读取WPS的 cellimages.xml 来建立精确映射
            Map<String, byte[]> imageIdToDataMap = parseCellImages(workbook);

            if (!imageIdToDataMap.isEmpty()) {
                log.info("成功解析cellimages.xml,找到 {} 个图片ID映射", imageIdToDataMap.size());

                // 扫描所有单元格,查找DISPIMG公式
                for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
                    if (rowIndex == 0) continue; // 跳过表头

                    Row row = sheet.getRow(rowIndex);
                    //遍历图片单元格
                    for (Integer columnIndex : imgColumnIndexList) {
                        Cell cell = row != null ? row.getCell(columnIndex) : null;

                        if (cell != null && cell.getCellType() == CellType.FORMULA) {
                            String formula = cell.getCellFormula();
                            if (formula != null && formula.contains("DISPIMG")) {
                                String imageId = extractImageIdFromFormula(formula);
                                byte[] imageData = imageIdToDataMap.get(imageId);

                                if (imageData != null && imageData.length > 0) {
                                    table.put(rowIndex, 0, imageData);
                                    log.info("精确匹配WPS图片: 行={}, ID={}, 大小={} bytes",
                                            rowIndex, imageId, imageData.length);
                                } else {
                                    log.warn("未找到ID={}对应的图片数据", imageId);
                                }
                            }
                        }
                    }

                }
            } else {
                log.warn("未找到cellimages.xml");
            }
        }

        log.info("图片读取完成,共读取 {} 张图片", table.size());
    }

    /**
     * 解析Office新式RichData嵌入图片(Office 365/2022+)
     * 映射链路:单元格(vm) -> metadata.xml(v) -> rdrichvalue.xml(LocalImageIdentifier) -> richValueRel.xml.rels(rId) -> media/image.png
     *
     * @param workbook 工作簿
     * @param sheet    工作表
     * @param table    图片存储表
     */
    public static void parseOfficeRichDataImages(XSSFWorkbook workbook, XSSFSheet sheet, HashBasedTable<Integer, Integer, byte[]> table, List<Integer> imgColumnIndexList) {
        try {
            OPCPackage pkg = workbook.getPackage();

            // 步骤1:检查是否存在richData目录
            PackagePart metadataPart;
            PackagePart rdrichvaluePart;
            PackagePart richValueRelPart;
            try {
                metadataPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/metadata.xml"));
                rdrichvaluePart = pkg.getPart(PackagingURIHelper.createPartName("/xl/richData/rdrichvalue.xml"));
                richValueRelPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/richData/richValueRel.xml"));
            } catch (Exception e) {
                log.debug("未找到RichData相关文件,跳过RichData解析: {}", e.getMessage());
                return;
            }

            if (metadataPart == null || rdrichvaluePart == null || richValueRelPart == null) {
                log.debug("缺少RichData必要文件,跳过解析");
                return;
            }

            log.info("检测到Office RichData机制,开始解析嵌入图片...");

            // 步骤2:解析richValueRel.xml.rels,建立rId到图片路径的映射
            java.util.Map<String, String> rIdToImagePathMap = new java.util.HashMap<>();
            try {
                PackageRelationshipCollection rels = richValueRelPart.getRelationships();
                for (PackageRelationship rel : rels) {
                    if (rel.getRelationshipType().contains("image")) {
                        // rId1 -> ../media/image1.png
                        rIdToImagePathMap.put(rel.getId(), rel.getTargetURI().toString());
                        log.debug("RichData rId映射: {} -> {}", rel.getId(), rel.getTargetURI());
                    }
                }
            } catch (Exception e) {
                log.warn("解析richValueRel.xml.rels失败: {}", e.getMessage());
                return;
            }

            if (rIdToImagePathMap.isEmpty()) {
                log.debug("richValueRel.xml.rels中没有图片关系");
                return;
            }

            // 步骤3:解析rdrichvalue.xml,建立RichValue索引到LocalImageIdentifier的映射
            java.util.Map<Integer, Integer> rvIndexToImageIdMap = new java.util.HashMap<>();
            try (java.io.InputStream is = rdrichvaluePart.getInputStream()) {
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                factory.setNamespaceAware(true);
                DocumentBuilder builder = factory.newDocumentBuilder();
                Document doc = builder.parse(is);

                NodeList rvList = doc.getElementsByTagName("rv");
                log.info("找到 {} 个RichValue定义", rvList.getLength());

                for (int i = 0; i < rvList.getLength(); i++) {
                    Element rvElement = (Element) rvList.item(i);
                    NodeList vList = rvElement.getElementsByTagName("v");

                    if (vList.getLength() >= 1) {
                        // 第一个<v>元素是LocalImageIdentifier,值为rId的索引(0=rId1, 1=rId2...)
                        String localImageIdStr = vList.item(0).getTextContent().trim();
                        try {
                            int localImageId = Integer.parseInt(localImageIdStr);
                            rvIndexToImageIdMap.put(i, localImageId);
                            log.debug("RichValue[{}] -> LocalImageIdentifier={} (rId{})", i, localImageId, localImageId + 1);
                        } catch (NumberFormatException e) {
                            log.warn("LocalImageIdentifier解析失败: {}", localImageIdStr);
                        }
                    }
                }
            } catch (Exception e) {
                log.warn("解析rdrichvalue.xml失败: {}", e.getMessage());
                return;
            }
            // 步骤4:解析metadata.xml,建立ValueMetadata索引到RichValue索引的映射
            java.util.Map<Integer, Integer> vmIndexToRvIndexMap = new java.util.HashMap<>();
            try (java.io.InputStream is = metadataPart.getInputStream()) {
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                factory.setNamespaceAware(true);
                DocumentBuilder builder = factory.newDocumentBuilder();
                Document doc = builder.parse(is);
                NodeList bkList = doc.getElementsByTagName("bk");
                if (bkList.getLength() > 0) {
                    // 查找valueMetadata下的bk元素
                    org.w3c.dom.Node valueMetadataNode;
                    NodeList metadataNodes = doc.getElementsByTagName("valueMetadata");
                    if (metadataNodes.getLength() > 0) {
                        valueMetadataNode = metadataNodes.item(0);
                        NodeList vmBkList = ((Element) valueMetadataNode).getElementsByTagName("bk");

                        log.info("找到 {} 个ValueMetadata定义", vmBkList.getLength());

                        for (int i = 0; i < vmBkList.getLength(); i++) {
                            Element bkElement = (Element) vmBkList.item(i);
                            NodeList rcList = bkElement.getElementsByTagName("rc");

                            if (rcList.getLength() > 0) {
                                Element rcElement = (Element) rcList.item(0);
                                String vAttr = rcElement.getAttribute("v");

                                if (!vAttr.isEmpty()) {
                                    try {
                                        int rvIndex = Integer.parseInt(vAttr);
                                        vmIndexToRvIndexMap.put(i, rvIndex);
                                        log.debug("ValueMetadata[{}] -> RichValue[{}]", i, rvIndex);
                                    } catch (NumberFormatException e) {
                                        log.warn("ValueMetadata v属性解析失败: {}", vAttr);
                                    }
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                log.warn("解析metadata.xml失败: {}", e.getMessage());
                return;
            }
            // 步骤5:扫描工作表,查找vm属性的单元格
            int imageCount = 0;
            for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
                XSSFRow row = sheet.getRow(rowIndex);
                if (row == null) continue;
                for (Integer colIndex : imgColumnIndexList) {
                    XSSFCell cell = row.getCell(colIndex);
                    if (cell == null) continue;

                    // 检查是否有vm属性(通过底层CTCell获取)
                    CTCell ctCell = cell.getCTCell();

                    if (ctCell != null && ctCell.isSetVm()) {
                        long vmValue = ctCell.getVm();
                        int vmIndex = (int) vmValue - 1; // vm从1开始,数组索引从0开始

                        log.debug("单元格[{},{}] vm={}", rowIndex, colIndex, vmValue);

                        // 查找完整的映射链路
                        Integer rvIndex = vmIndexToRvIndexMap.get(vmIndex);
                        if (rvIndex != null) {
                            Integer localImageId = rvIndexToImageIdMap.get(rvIndex);
                            if (localImageId != null) {
                                String rId = "rId" + (localImageId + 1); // 0->rId1, 1->rId2
                                String imagePath = rIdToImagePathMap.get(rId);

                                if (imagePath != null) {
                                    // 读取图片数据
                                    try {
                                        // 处理相对路径 ../media/image1.png
                                        String absolutePath = imagePath;
                                        if (imagePath.startsWith("../")) {
                                            absolutePath = "/xl/" + imagePath.substring(3);
                                        } else if (!imagePath.startsWith("/")) {
                                            absolutePath = "/xl/richData/" + imagePath;
                                        }

                                        PackagePartName partName =
                                                PackagingURIHelper.createPartName(absolutePath);
                                        PackagePart imagePart = pkg.getPart(partName);

                                        if (imagePart != null) {
                                            byte[] imageData = IOUtils.toByteArray(imagePart.getInputStream());
                                            if (imageData != null && imageData.length > 0) {
                                                table.put(rowIndex, colIndex, imageData);
                                                imageCount++;
                                                log.info("读取到RichData嵌入图片: 行={}, 列={}, 大小={} bytes, 映射链: vm[{}]->rv[{}]->localId[{}]->{}->{}",
                                                        rowIndex, colIndex, imageData.length, vmIndex, rvIndex, localImageId, rId, absolutePath);
                                            }
                                        } else {
                                            log.warn("未找到图片文件: {}", absolutePath);
                                        }
                                    } catch (Exception e) {
                                        log.warn("读取RichData图片失败 (行={}, 列={}): {}", rowIndex, colIndex, e.getMessage());
                                    }
                                } else {
                                    log.warn("未找到rId={}对应的图片路径", rId);
                                }
                            } else {
                                log.warn("未找到RichValue[{}]对应的LocalImageIdentifier", rvIndex);
                            }
                        } else {
                            log.warn("未找到ValueMetadata[{}]对应的RichValue索引", vmIndex);
                        }
                    }
                }
            }
            log.info("RichData解析完成,共读取 {} 张嵌入图片", imageCount);
        } catch (Exception e) {
            log.error("解析Office RichData图片异常", e);
        }
    }


    /**
     * 解析WPS的 cellimages.xml,建立图片ID到图片数据的映射
     * <p>
     * 文件位于excel /xl/cellimages.xml
     *
     * @return Map<ImageID, ImageData>
     */
    public static java.util.Map<String, byte[]> parseCellImages(XSSFWorkbook workbook) {
        java.util.Map<String, byte[]> imageMap = new java.util.HashMap<>();

        try {
            // 获取workbook的底层Package
            OPCPackage pkg = workbook.getPackage();

            //1、 尝试获取cellimages.xml文件
            PackagePart cellImagesPart;
            try {
                cellImagesPart = pkg.getPart(
                        PackagingURIHelper.createPartName("/xl/cellimages.xml")
                );
            } catch (Exception e) {
                log.debug("未找到/xl/cellimages.xml: {}", e.getMessage());
                return imageMap;
            }

            if (cellImagesPart == null) {
                return imageMap;
            }

            //2、 获取cellimages.xml.rels文件来建立rId到图片路径的映射
            java.util.Map<String, String> rIdToPathMap = new java.util.HashMap<>();
            PackageRelationshipCollection rels = cellImagesPart.getRelationships();
            for (PackageRelationship rel : rels) {
                if (rel.getRelationshipType().contains("image")) {
                    rIdToPathMap.put(rel.getId(), rel.getTargetURI().toString());
                    log.debug("rId映射: {} -> {}", rel.getId(), rel.getTargetURI());
                }
            }

            // 3、 解析cellimages.xml内容
            try (java.io.InputStream is = cellImagesPart.getInputStream()) {
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                factory.setNamespaceAware(true);
                DocumentBuilder builder = factory.newDocumentBuilder();
                Document doc = builder.parse(is);

                // 查找所有cellImage元素
                NodeList cellImages = doc.getElementsByTagNameNS("*", "cellImage");
                log.info("找到 {} 个cellImage元素", cellImages.getLength());

                for (int i = 0; i < cellImages.getLength(); i++) {
                    Element cellImage = (Element) cellImages.item(i);

                    // 提取图片ID (在cNvPr的name属性中)
                    NodeList cNvPrList = cellImage.getElementsByTagNameNS("*", "cNvPr");
                    if (cNvPrList.getLength() > 0) {
                        Element cNvPr = (Element) cNvPrList.item(0);
                        String imageId = cNvPr.getAttribute("name");

                        // 提取rId (在blip的embed属性中)
                        NodeList blipList = cellImage.getElementsByTagNameNS("*", "blip");
                        if (blipList.getLength() > 0) {
                            Element blip = (Element) blipList.item(0);
                            String rId = blip.getAttributeNS(
                                    "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
                                    "embed"
                            );

                            // 通过rId找到图片路径
                            String imagePath = rIdToPathMap.get(rId);
                            if (imagePath != null) {
                                // 读取图片数据
                                try {
                                    // Relationship返回的已经是完整的相对路径
                                    PackagePartName partName =
                                            PackagingURIHelper.createPartName(
                                                    imagePath
                                            );
                                    PackagePart imagePart = pkg.getPart(partName);
                                    if (imagePart != null) {
                                        byte[] imageData = IOUtils.toByteArray(
                                                imagePart.getInputStream()
                                        );
                                        imageMap.put(imageId, imageData);
                                        log.info("成功加载图片: ID={}, 大小={} bytes", imageId, imageData.length);
                                    }
                                } catch (Exception e) {
                                    log.warn("读取图片失败: ID={}, 路径={}, 错误: {}",
                                            imageId, imagePath, e.getMessage());
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            log.warn("解析cellimages.xml失败: {}", e.getMessage(), e);
        }

        return imageMap;
    }

    /**
     * 从DISPIMG公式中提取图片ID
     * <p>
     * 支持格式:_xlfn.DISPIMG("ID_XXX",1) 或 DISPIMG("ID_XXX")
     *
     * @param formula wps图片公式
     * @return 图片ID
     */
    public static String extractImageIdFromFormula(String formula) {
        if (formula == null) {
            return "";
        }

        // 移除前缀 = 和 _xlfn.
        formula = formula.trim();
        if (formula.startsWith("=")) {
            formula = formula.substring(1).trim();
        }
        if (formula.startsWith("_xlfn.")) {
            formula = formula.substring(6).trim();
        }

        // 提取括号内的内容
        int start = formula.indexOf("(");
        int end = formula.indexOf(")");
        if (start > 0 && end > start) {
            String content = formula.substring(start + 1, end).trim();
            // 提取第一个参数(ID)
            int commaPos = content.indexOf(",");
            if (commaPos > 0) {
                content = content.substring(0, commaPos).trim();
            }
            // 移除引号
            content = content.replaceAll("[\"']", "");
            return content;
        }

        return formula;
    }

}

三、测试

java 复制代码
    @Test
    public void testReadImage() throws Exception {
        //文件路径
        String flightPath = "";
        try (XSSFWorkbook workbook = new XSSFWorkbook(flightPath)) {
            //图片所在 sheet
            XSSFSheet sheet = workbook.getSheetAt(0);
            //图片数据 行、列 图片数组
            HashBasedTable<Integer, Integer, byte[]> table = HashBasedTable.create();
            //图片所在列
            ArrayList<Integer> imgColIndexList = Lists.newArrayList(0);
            ExcelImgParseUtils.readImage(workbook, sheet, table, imgColIndexList);
            System.out.println("图片数量:" + table.size());
            //遍历图片数据
            List<Integer> rowIndexList = table.rowKeySet().stream().sorted().toList();
            for (Integer rowIndex : rowIndexList) {
                for (Integer colIndex : imgColIndexList) {
                    byte[] bytes = table.get(rowIndex, colIndex);
                    //
                    System.out.println("行:" + rowIndex + "列:" + colIndex + "图片数据大小:" + (bytes == null ? 0 : bytes.length));
                }
            }
        }
    }
相关推荐
天外天-亮1 小时前
Vue + excel下载 + 水印
前端·vue.js·excel
一个尚在学习的计算机小白2 小时前
java集合
java·开发语言
IUGEI2 小时前
synchronized的工作机制是怎样的?深入解析synchronized底层原理
java·开发语言·后端·c#
q***13612 小时前
Windows操作系统部署Tomcat详细讲解
java·windows·tomcat
z***I3942 小时前
Java桌面应用案例
java·开发语言
r***12382 小时前
SpringBoot最佳实践之 - 使用AOP记录操作日志
java·spring boot·后端
间彧2 小时前
Java进程内存深度解析:从JVM组件内存到RSS的全面视角
java
间彧2 小时前
对比GraalVM Native Image与传统JVM,在内存管理方面各自适合哪些具体业务场景?
java
daidaidaiyu2 小时前
Spring IOC 源码学习一 基本姿势
java·spring