Java解析Excel图片

目录

功能描述

主要是实现 Excel 中的图片提取

支持浮动图片和内嵌图片

支持 Office 和 WPS

社会我 T 哥,人狠话不多 。上代码


代码实现

依赖

xml 复制代码
<dependency>
   <groupId>com.alibaba</groupId>
   <artifactId>easyexcel-core</artifactId>
   <version>3.3.2</version>
</dependency>

代码

java 复制代码
package cn.texous.utils;

import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.usermodel.*;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
import org.springframework.util.CollectionUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.StreamSupport;

/**
 * 从 excel 中读取图片,兼容浮动图片,嵌入式图片(WPS,Office)
 *
 * @author liuxiaohua
 * @version 1.0.0
 */
@Slf4j
public class ExcelImageAnalysisUtils {


    /**
     * 读取 excel 图片
     *
     * @param filePath        excel 文件路径
     * @param excelImageTable 图片位置存储
     * @throws IOException 异常
     */
    public static void readImage(String filePath,
                                 Map<Integer, Map<Integer, String>> excelImageTable) throws IOException {
        readImage(filePath, 0, excelImageTable);
    }

    /**
     * 读取 excel 图片
     *
     * @param filePath        excel 文件路径
     * @param sheetNo         需要读取的 sheet
     * @param excelImageTable 图片位置存储
     * @throws IOException 异常
     */
    public static void readImage(String filePath, Integer sheetNo,
                                 Map<Integer, Map<Integer, String>> excelImageTable) throws IOException {
        try (XSSFWorkbook workbook = new XSSFWorkbook(filePath)) {
            // 图片所在 sheet
            XSSFSheet sheet = workbook.getSheetAt(sheetNo);
            ExcelImageAnalysisUtils.readImage(workbook, sheet, excelImageTable);
        }
    }

    /**
     * 读取 excel 图片
     *
     * @param inputStream     excel 文件流
     * @param excelImageTable 图片存储路径
     * @throws IOException 异常
     */
    public static void readImage(InputStream inputStream,
                                 Map<Integer, Map<Integer, String>> excelImageTable) throws IOException {
        readImage(inputStream, 0, excelImageTable);
    }

    /**
     * 读取 excel 图片
     *
     * @param inputStream     excel 文件流
     * @param sheetNo         sheet
     * @param excelImageTable 文件存储
     * @throws IOException 异常
     */
    public static void readImage(InputStream inputStream, Integer sheetNo,
                                 Map<Integer, Map<Integer, String>> excelImageTable) throws IOException {
        try (XSSFWorkbook workbook = new XSSFWorkbook(inputStream)) {
            // 图片所在 sheet
            XSSFSheet sheet = workbook.getSheetAt(sheetNo);
            ExcelImageAnalysisUtils.readImage(workbook, sheet, excelImageTable);
        }
    }

    /**
     * 读取 Excel 图片(增强版),支持以下场景:
     * 1. 浮动图片(传统方式-此种方式对用户插入图片时的操作要求较高,很容易导致图片顶点不在指定单元格)
     * 2. Office 嵌入单元格图片(通过 RichData 机制)
     * 3. WPS 嵌入单元格图片(通过 DISPIMG 公式识别)
     *
     * @param workbook        excel 文件(xlsx)
     * @param sheet           excel sheet,若多个sheet,可以考虑循环调用此方法,或将 sheet 改为列表
     * @param excelImageTable 图片位置存储
     */
    public static void readImage(XSSFWorkbook workbook, XSSFSheet sheet,
                                 Map<Integer, Map<Integer, String>> excelImageTable) {
        log.info("开始读取Excel图片...");

        // 第一步:读取浮动图片(传统方式)
        List<XSSFPictureData> pictures = workbook.getAllPictures();
        log.info("工作簿中总图片数: {}", pictures != null ? pictures.size() : 0);

        if (pictures == null || pictures.isEmpty()) {
            return;
        }

        // 获取浮动图片
        readFloatingPictures(sheet, excelImageTable);

        // 获取内嵌图片
        readEmbeddedImages(workbook, sheet, excelImageTable);

        log.info("图片读取完成,共读取 {} 张图片", excelImageTable.size());
    }

    /**
     * 获取浮动图片数据
     *
     * @param sheet 单元格
     * @param table 图片所在位置
     */
    public static void readFloatingPictures(XSSFSheet sheet,
                                            Map<Integer, Map<Integer, String>> table) {
        Optional.ofNullable(sheet)
                .map(XSSFSheet::getDrawingPatriarch)
                .stream()
                .flatMap(s -> StreamSupport.stream(s.spliterator(), false))
                .filter(s -> s instanceof XSSFPicture)
                .map(shape -> (XSSFPicture) shape)
                .forEach(pic -> {
                    XSSFClientAnchor anchor = pic.getClientAnchor();
                    if (anchor == null) {
                        return;
                    }
                    XSSFPictureData picData = pic.getPictureData();
                    String imagePath = picData.getPackagePart().getPartName().getURI().toString();
                    int row1 = anchor.getRow1();
                    int col1 = anchor.getCol1();
                    table.computeIfAbsent(row1, k -> new HashMap<>()).put(col1, imagePath);
                    log.info("读取到浮动图片: 行={}, 列={}, 路径={}", row1, col1, imagePath);
                });
    }

    /**
     * 获取内嵌图片,兼容 Office 和 WPS
     *
     * @param workbook        工作簿
     * @param sheet           表格
     * @param excelImageTable 图片位置存储
     */
    public static void readEmbeddedImages(XSSFWorkbook workbook,
                                          XSSFSheet sheet,
                                          Map<Integer, Map<Integer, String>> excelImageTable) {
        // 读取Office新式嵌入图片(通过RichData机制)
        Map<Integer, String> officeEmbeddedValueMetadataToImagePathMap =
                readOfficeEmbeddedValueMetadataToImagePathMap(workbook);

        // 读取 WPS 嵌入式图片
        // 这种情况下图片存在于workbook.getAllPictures()中,但没有位置信息
        Map<String, String> wpsEmbeddedImageIdToImagePathMap =
                readWpsEmbeddedImageIdToImagePathMap(workbook);

        // 定位图片位置
        readExcelEmbeddedImagesInfo(sheet, wpsEmbeddedImageIdToImagePathMap,
                officeEmbeddedValueMetadataToImagePathMap, excelImageTable);
    }

    /**
     * 获取图片流
     *
     * @param pkg    excel压缩文件
     * @param path   图片路径
     * @param office 是否为 office 文件
     * @return 结果
     */
    public static byte[] readImageDataFromImagePath(OPCPackage pkg, String path, boolean office) {
        AtomicReference<byte[]> result = new AtomicReference<>();
        readImageDataFromImagePath(pkg, path, office, in -> result.set(IOUtils.toByteArray(in)));
        return result.get();
    }

    /**
     * 获取图片流
     *
     * @param pkg    excel 压缩文件
     * @param path   图片路径
     * @param office 是否为 office 文件
     */
    public static void readImageDataFromImagePath(OPCPackage pkg, String path,
                                                  boolean office,
                                                  ExceptionConsumer<InputStream> consumer) {
        readImagePackagePartFromImagePath(pkg, path, office, pp -> consumer.accept(pp.getInputStream()));
    }

    /**
     * 获取图片流
     *
     * @param pkg    excel 压缩文件
     * @param path   图片路径
     * @param office 是否为 office 文件
     */
    public static void readImagePackagePartFromImagePath(OPCPackage pkg, String path,
                                                         boolean office,
                                                         ExceptionConsumer<PackagePart> consumer) {
        try {
            PackagePart imagePackagePartFromImagePath = getImagePackagePartFromImagePath(pkg, path, office);
            if (consumer != null) {
                consumer.accept(imagePackagePartFromImagePath);
            }
        } catch (Exception e) {
            log.warn("readImageDataFromImagePath error: path = {}, error = {}", path, e.getMessage());
            throw new RuntimeException("处理图片失败", e);
        }
    }


    /**
     * 获取图片流
     *
     * @param pkg    excel 压缩文件
     * @param path   图片路径
     * @param office 是否为 office 文件
     */
    public static PackagePart getImagePackagePartFromImagePath(OPCPackage pkg, String path, boolean office) {
        PackagePart imagePart = null;
        if (path == null || path.trim().isEmpty()) {
            return imagePart;
        }

        // 处理相对路径 ../media/image1.png
        String absolutePath = path;
        if (path.startsWith("../")) {
            absolutePath = "/xl/" + path.substring(3);
        } else if (!path.startsWith("/")) {
            absolutePath = "/xl/" + (office ? "richData/" : "") + path;
        }

        imagePart = readPackagePart(pkg, absolutePath);

        return imagePart;
    }

    /**
     * 从 DISPIMG 公式中提取图片ID
     * <p>
     * 支持格式:_xlfn.DISPIMG("ID_XXX",1) 或 DISPIMG("ID_XXX")
     *
     * @param formula WPS 图片公式
     * @return 图片ID
     */
    public static String readImageIdFromFormula(String formula) {
        if (formula == null) {
            return "";
        }

        // 移除前缀 = 和 _xlfn.
        formula = formula.trim();
        if (formula.startsWith("=")) {
            formula = formula.substring(1).trim();
        }
        if (formula.startsWith("_xlfn.")) {
            formula = formula.substring(6).trim();
        }

        // 提取括号内的内容
        int start = formula.indexOf("(");
        int end = formula.indexOf(")");
        if (start > 0 && end > start) {
            String content = formula.substring(start + 1, end).trim();
            // 提取第一个参数(ID)
            int commaPos = content.indexOf(",");
            if (commaPos > 0) {
                content = content.substring(0, commaPos).trim();
            }
            // 移除引号
            content = content.replaceAll("[\"']", "");
            return content;
        }

        return formula;
    }

    /**
     * 解析 Office 新式 RichData 嵌入图片(Office 365/2022+)
     * 映射链路:单元格(vm) -> metadata.xml(v) -> rdrichvalue.xml(LocalImageIdentifier) -> richValueRel.xml.rels(rId) -> media/image.png
     *
     * @param workbook 工作簿
     */
    private static Map<Integer, String> readOfficeEmbeddedValueMetadataToImagePathMap(XSSFWorkbook workbook) {
        try {
            OPCPackage pkg = workbook.getPackage();

            // 步骤1:检查是否存在richData目录
            PackagePart metadataPart = readPackagePart(pkg, "/xl/metadata.xml");
            PackagePart rdRichValuePart = readPackagePart(pkg, "/xl/richData/rdrichvalue.xml");
            PackagePart richValueRelPart = readPackagePart(pkg, "/xl/richData/richValueRel.xml");

            if (metadataPart == null || rdRichValuePart == null || richValueRelPart == null) {
                log.debug("【Office】:缺少 RichData 必要文件,跳过解析");
                return null;
            }

            log.info("【Office】:检测到 Office RichData 机制,开始解析嵌入图片...");

            // 步骤2:解析 richValueRel.xml.rels,建立 rId 到图片路径的映射
            Map<String, String> rIdToImagePathMap = readRIdToImagePathMap(richValueRelPart);

            if (CollectionUtils.isEmpty(rIdToImagePathMap)) {
                log.info("【Office】:richValueRel.xml.rels 中没有图片关系");
                return null;
            }

            // 步骤3:解析 rdrichvalue.xml,建立 LocalImageIdentifier 索引到 RichValue 的映射
            Map<Integer, List<Integer>> ImageIdToRichValueIndexMap = readImageIdToRichValueIndexMap(rdRichValuePart);
            if (CollectionUtils.isEmpty(ImageIdToRichValueIndexMap)) {
                return null;
            }

            // 步骤4:解析 metadata.xml,建立 RichValue 索引到 ValueMetadata 索引的映射
            Map<Integer, List<Integer>> RichValueToValueMetadataMap = readRichValueToValueMetadataMap(metadataPart);
            if (CollectionUtils.isEmpty(RichValueToValueMetadataMap)) {
                return null;
            }

            // 步骤5:映射 ValueMetadata 和 imagePath 的关系
            return readValueMetadataToImagePathMap(
                    ImageIdToRichValueIndexMap, RichValueToValueMetadataMap, rIdToImagePathMap);
        } catch (Exception e) {
            log.warn("【Office】:解析 Office RichData 图片异常: {}", e.getMessage());
        }
        return null;
    }

    /**
     * 获取内嵌图片位置信息
     *
     * @param sheet                       表格
     * @param imageIdToImagePathMap       WPS 映射
     * @param valueMetadataToImagePathMap Office 映射
     * @param tableImage                  位置信息
     */
    private static void readExcelEmbeddedImagesInfo(
            XSSFSheet sheet,
            Map<String, String> imageIdToImagePathMap,
            Map<Integer, String> valueMetadataToImagePathMap,
            Map<Integer, Map<Integer, String>> tableImage) {
        // 遍历 excel 单元格
        for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
            // 获取 row
            XSSFRow row = sheet.getRow(rowIndex);
            if (row == null) {
                continue;
            }

            for (int colIndex = 0; colIndex <= row.getLastCellNum(); colIndex++) {
                // 获取 cell
                XSSFCell cell = row.getCell(colIndex);
                if (cell == null) {
                    continue;
                }

                // 处理 Office 嵌入图片位置
                readOfficeEmbeddedImagesPositionInfo(cell, valueMetadataToImagePathMap, tableImage);
                // 处理 WPS 嵌入图片位置
                readWpsEmbeddedImagesPositionInfo(cell, imageIdToImagePathMap, tableImage);
            }
        }
    }

    /**
     * 获取 WPS 内嵌图片位置信息
     *
     * @param cell                  单元格
     * @param imageIdToImagePathMap 映射关系
     * @param table                 结果
     */
    private static void readWpsEmbeddedImagesPositionInfo(XSSFCell cell,
                                                          Map<String, String> imageIdToImagePathMap,
                                                          Map<Integer, Map<Integer, String>> table) {
        if (CollectionUtils.isEmpty(imageIdToImagePathMap)) {
            return;
        }
        if (cell != null && cell.getCellType() == CellType.FORMULA) {
            String formula = cell.getCellFormula();
            if (formula != null && formula.contains("DISPIMG")) {
                String imageId = readImageIdFromFormula(formula);
                String imagePath = imageIdToImagePathMap.get(imageId);
                if (imagePath != null && !imagePath.isEmpty()) {
                    table.computeIfAbsent(cell.getRowIndex(), k -> new HashMap<>())
                            .put(cell.getColumnIndex(), imagePath);
                } else {
                    log.warn("【WPS】:未找到 ID = {} 对应的图片数据: row = {}, col = {}",
                            imageId, cell.getRowIndex(), cell.getColumnIndex());
                }
            }
        }
    }

    /**
     * 获取 Office 内嵌图片位置信息
     *
     * @param cell                        单元格
     * @param valueMetadataToImagePathMap 映射
     * @param table                       结果
     */
    private static void readOfficeEmbeddedImagesPositionInfo(XSSFCell cell,
                                                             Map<Integer, String> valueMetadataToImagePathMap,
                                                             Map<Integer, Map<Integer, String>> table) {
        if (CollectionUtils.isEmpty(valueMetadataToImagePathMap)) {
            return;
        }

        // 检查是否有vm属性(通过底层CTCell获取)
        CTCell ctCell = cell.getCTCell();
        if (ctCell != null && ctCell.isSetVm()) {
            long vmValue = ctCell.getVm();
            int vmIndex = (int) vmValue - 1; // vm 从 1 开始,数组索引从 0 开始
            String imagePath = valueMetadataToImagePathMap.get(vmIndex);
            if (imagePath != null && !imagePath.isEmpty()) {
                table.computeIfAbsent(cell.getRowIndex(), k -> new HashMap<>())
                        .put(cell.getColumnIndex(), imagePath);
            } else {
                log.warn("【Office】:未找到 vm = {} 对应的图片数据: row = {}, col = {}",
                        vmValue, cell.getRowIndex(), cell.getColumnIndex());
            }
        }
    }

    /**
     * 获取 ValueMetadata 和 imagePath 的映射关系
     *
     * @param imageIdToRichValueIndexMap  imageId 和 RichValue 映射关系
     * @param richValueToValueMetadataMap RichValue 和 ValueMetadata 映射关系
     * @param rIdToImagePathMap           rid 和 imagePath 的映射关系
     * @return 结果
     */
    private static Map<Integer, String> readValueMetadataToImagePathMap(
            Map<Integer, List<Integer>> imageIdToRichValueIndexMap,
            Map<Integer, List<Integer>> richValueToValueMetadataMap,
            Map<String, String> rIdToImagePathMap) {
        Map<Integer, String> valueMetadataToImagePath = new HashMap<>();
        rIdToImagePathMap.forEach((rid, imagePath) -> {
            Integer localImageId = Integer.parseInt(rid.substring(3)) - 1;
            Optional.of(localImageId)
                    .map(imageIdToRichValueIndexMap::get)
                    .stream()
                    .flatMap(Collection::stream)
                    .map(richValueToValueMetadataMap::get)
                    .flatMap(Collection::stream)
                    .forEach(vm -> valueMetadataToImagePath.put(vm, imagePath));
        });
        return valueMetadataToImagePath;
    }

    /**
     * 获取 RichValue 和 ValueMetadata 映射关系
     *
     * @param metadataPart 关系文件
     * @return 关系
     */
    private static Map<Integer, List<Integer>> readRichValueToValueMetadataMap(PackagePart metadataPart) {
        Map<Integer, List<Integer>> richValueToValueMetadataMap = new HashMap<>();
        try (InputStream is = metadataPart.getInputStream()) {
            DocumentBuilder builder = createSecureDocumentBuilder();
            Document doc = builder.parse(is);
            NodeList bkList = doc.getElementsByTagName("bk");
            if (bkList.getLength() > 0) {
                // 查找 ValueMetadata 下的bk元素
                org.w3c.dom.Node valueMetadataNode;
                NodeList metadataNodes = doc.getElementsByTagName("valueMetadata");
                if (metadataNodes.getLength() > 0) {
                    valueMetadataNode = metadataNodes.item(0);
                    NodeList vmBkList = ((Element) valueMetadataNode).getElementsByTagName("bk");

                    log.info("【Office】:找到 {} 个 ValueMetadata 定义", vmBkList.getLength());

                    for (int i = 0; i < vmBkList.getLength(); i++) {
                        Element bkElement = (Element) vmBkList.item(i);
                        NodeList rcList = bkElement.getElementsByTagName("rc");

                        if (rcList.getLength() > 0) {
                            Element rcElement = (Element) rcList.item(0);
                            String vAttr = rcElement.getAttribute("v");

                            if (vAttr != null && !vAttr.isEmpty()) {
                                try {
                                    int rvIndex = Integer.parseInt(vAttr);
                                    richValueToValueMetadataMap
                                            .computeIfAbsent(rvIndex, k -> new ArrayList<>())
                                            .add(i);
                                    log.info("【Office】:ValueMetadata[{}] -> RichValue[{}]", i, rvIndex);
                                } catch (NumberFormatException e) {
                                    log.warn("【Office】:ValueMetadata v属性解析失败: {}", vAttr);
                                }
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            log.warn("【Office】:解析 metadata.xml 失败: {}", e.getMessage());
        }
        return richValueToValueMetadataMap;
    }

    /**
     * 获取 imageIdIndex 和 RichValue 的映射
     *
     * @param rdRichValuePart 映射关联文件
     * @return 结果
     */
    private static Map<Integer, List<Integer>> readImageIdToRichValueIndexMap(PackagePart rdRichValuePart) {
        Map<Integer, List<Integer>> imageIdToRichValueIndexMap = new HashMap<>();
        try (InputStream is = rdRichValuePart.getInputStream()) {
            DocumentBuilder builder = createSecureDocumentBuilder();
            Document doc = builder.parse(is);

            NodeList rvList = doc.getElementsByTagName("rv");
            log.info("【Office】:找到 {} 个 RichValue 定义", rvList.getLength());

            for (int i = 0; i < rvList.getLength(); i++) {
                Element rvElement = (Element) rvList.item(i);
                NodeList vList = rvElement.getElementsByTagName("v");

                if (vList.getLength() >= 1) {
                    // 第一个<v>元素是LocalImageIdentifier,值为rId的索引(0=rId1, 1=rId2...)
                    String localImageIdStr = vList.item(0).getTextContent().trim();
                    try {
                        int localImageId = Integer.parseInt(localImageIdStr);
                        imageIdToRichValueIndexMap
                                .computeIfAbsent(localImageId, k -> new ArrayList<>())
                                .add(i);
                        log.info("【Office】:RichValue[{}] -> LocalImageIdentifier={} (rId{})",
                                i, localImageId, localImageId + 1);
                    } catch (Exception e) {
                        log.warn("【Office】:LocalImageIdentifier 解析失败: {}, {}", localImageIdStr, e.getMessage());
                    }
                }
            }
        } catch (Exception e) {
            log.warn("【Office】:解析 rdrichvalue.xml 失败: {}", e.getMessage());
        }
        return imageIdToRichValueIndexMap;
    }

    /**
     * 解析 WPS 的 cellimages.xml,建立图片ID到图片路径的映射
     * <p>
     * 文件位于 excel /xl/cellimages.xml
     *
     * @return Map<ImageID, ImagePath>
     */
    private static Map<String, String> readWpsEmbeddedImageIdToImagePathMap(XSSFWorkbook workbook) {

        try {
            // 获取 workbook 的底层 Package
            OPCPackage pkg = workbook.getPackage();

            // 获取 cellimages.xml 文件
            PackagePart cellImagesPart = readPackagePart(pkg, "/xl/cellimages.xml");
            if (cellImagesPart == null) {
                return null;
            }

            // 获取 cellimages.xml.rels 文件来建立 rId 到图片路径的映射
            Map<String, String> rIdToImagePathMap = readRIdToImagePathMap(cellImagesPart);

            if (CollectionUtils.isEmpty(rIdToImagePathMap)) {
                log.info("【WPS】:cellimages.xml.rels 中没有图片关系");
                return null;
            }

            // 3、 解析 cellimages.xml 内容, 获取 imageId 和 ImagePath 路径的映射
            return readWpsEmbeddedImageIdPathMapFromXml(cellImagesPart, rIdToImagePathMap);
        } catch (Exception e) {
            log.warn("【WPS】:解析 cellimages.xml 失败: {}", e.getMessage(), e);
        }

        return null;
    }

    private static Map<String, String> readWpsEmbeddedImageIdPathMapFromXml(
            PackagePart cellImagesPart, Map<String, String> rIdToPathMap) throws Exception {
        Map<String, String> imageIdPathMap = new HashMap<>();
        try (InputStream is = cellImagesPart.getInputStream()) {
            DocumentBuilder builder = createSecureDocumentBuilder();
            Document doc = builder.parse(is);

            // 查找所有 cellImage 元素
            NodeList cellImages = doc.getElementsByTagNameNS("*", "cellImage");
            log.info("【WPS】:找到 {} 个 cellImage 元素", cellImages.getLength());

            for (int i = 0; i < cellImages.getLength(); i++) {
                Element cellImage = (Element) cellImages.item(i);

                // 提取图片ID (在cNvPr的name属性中)
                NodeList cNvPrList = cellImage.getElementsByTagNameNS("*", "cNvPr");
                if (cNvPrList.getLength() > 0) {
                    Element cNvPr = (Element) cNvPrList.item(0);
                    String imageId = cNvPr.getAttribute("name");

                    // 提取rId (在blip的embed属性中)
                    NodeList blipList = cellImage.getElementsByTagNameNS("*", "blip");
                    if (blipList.getLength() > 0) {
                        Element blip = (Element) blipList.item(0);
                        String rId = blip.getAttributeNS(
                                "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
                                "embed");

                        // 通过 rId 找到图片路径
                        String imagePath = rIdToPathMap.get(rId);
                        imageIdPathMap.put(imageId, imagePath);
                    }
                }
            }
        }
        return imageIdPathMap;
    }

    private static Map<String, String> readRIdToImagePathMap(PackagePart cellImagesPart) {
        try {
            Map<String, String> rIdToImagePathMap = new HashMap<>();
            PackageRelationshipCollection relationships = cellImagesPart.getRelationships();
            for (PackageRelationship rel : relationships) {
                if (rel.getRelationshipType().contains("image")) {
                    rIdToImagePathMap.put(rel.getId(), rel.getTargetURI().toString());
                    log.info("获取 rId 映射: {} -> {}", rel.getId(), rel.getTargetURI());
                }
            }
            return rIdToImagePathMap;
        } catch (Exception e) {
            log.warn("获取 rid 和 image path 的关系映射失败: {}", e.getMessage());
        }
        return null;
    }

    private static PackagePart readPackagePart(OPCPackage pkg, String path) {
        try {
            return pkg.getPart(PackagingURIHelper.createPartName(path));
        } catch (Exception e) {
            log.info("未找到 {}: {}", path, e.getMessage());
        }
        return null;
    }

    private static DocumentBuilderFactory createSecureDocumentBuilderFactory()
            throws Exception {

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

        // 1. 禁用DTD(防止XXE攻击的最有效方式)
        factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);

        // 2. 禁用外部实体
        factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
        factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);

        // 3. 其他安全配置
        factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        factory.setXIncludeAware(false);
        factory.setExpandEntityReferences(false);

        // 4. 启用安全处理特性
        factory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);

        factory.setNamespaceAware(true);

        return factory;
    }

    public static DocumentBuilder createSecureDocumentBuilder()
            throws Exception {
        DocumentBuilderFactory factory = createSecureDocumentBuilderFactory();
        return factory.newDocumentBuilder();
    }

    @FunctionalInterface
    public interface ExceptionConsumer<T> {

        /**
         * Performs this operation on the given argument.
         *
         * @param t the input argument
         */
        void accept(T t) throws Exception;

        /**
         * Returns a composed {@code Consumer} that performs, in sequence, this
         * operation followed by the {@code after} operation. If performing either
         * operation throws an exception, it is relayed to the caller of the
         * composed operation.  If performing this operation throws an exception,
         * the {@code after} operation will not be performed.
         *
         * @param after the operation to perform after this operation
         * @return a composed {@code Consumer} that performs in sequence this
         * operation followed by the {@code after} operation
         * @throws NullPointerException if {@code after} is null
         */
        default ExceptionConsumer<T> andThen(ExceptionConsumer<? super T> after) {
            Objects.requireNonNull(after);
            return (T t) -> {
                accept(t);
                after.accept(t);
            };
        }

    }

    public static void main(String[] args) throws IOException {
        // 文件路径
        String flightPath = "/temp/test_wps.xlsx";
        // String flightPath = "/temp/test_wps.xlsx";
        try (XSSFWorkbook workbook = new XSSFWorkbook(flightPath)) {
            // 图片所在 sheet
            XSSFSheet sheet = workbook.getSheetAt(0);
            // 图片数据 行、列 图片数组
            Map<Integer, Map<Integer, String>> excelImagePath = new HashMap<>();
            // 图片所在列
            // ArrayList<Integer> imgColIndexList = Lists.newArrayList(0);

            ExcelImageAnalysisUtils.readImage(workbook, sheet, excelImagePath);
            System.out.println("图片数量:" + excelImagePath.size());
            // 遍历图片数据
            for (Map.Entry<Integer, Map<Integer, String>> rowEntry : excelImagePath.entrySet()) {
                for (Map.Entry<Integer, String> cellEntry : rowEntry.getValue().entrySet()) {
                    byte[] bytes = readImageDataFromImagePath(workbook.getPackage(), cellEntry.getValue(), true);
                    System.out.println("行:" + rowEntry.getKey() + "列:"
                            + cellEntry.getKey() + "图片数据大小:"
                            + (bytes == null ? 0 : bytes.length));
                }
            }
        }
    }

}
相关推荐
青云计划6 小时前
知光项目知文发布模块
java·后端·spring·mybatis
赶路人儿6 小时前
Jsoniter(java版本)使用介绍
java·开发语言
探路者继续奋斗7 小时前
IDD意图驱动开发之意图规格说明书
java·规格说明书·开发规范·意图驱动开发·idd
AI_56787 小时前
Excel数据透视表提速:Power Query预处理百万数据
数据库·excel
消失的旧时光-19438 小时前
第十九课:为什么要引入消息队列?——异步系统设计思想
java·开发语言
A懿轩A8 小时前
【Java 基础编程】Java 面向对象入门:类与对象、构造器、this 关键字,小白也能写 OOP
java·开发语言
乐观勇敢坚强的老彭9 小时前
c++寒假营day03
java·开发语言·c++
biubiubiu07069 小时前
谷歌浏览器无法访问localhost:8080
java
大黄说说9 小时前
新手选语言不再纠结:Java、Python、Go、JavaScript 四大热门语言全景对比与学习路线建议
java·python·golang
烟沙九洲9 小时前
Java 中的 封装、继承、多态
java