Java 兼容读取WPS和Office图片,结合EasyExcel读取单元格信息

在Java开发中,处理Excel文件中的图片(包括浮动图片和嵌入图片)是一个常见的需求。本文将介绍如何使用EasyExcel和Apache POI库来读取Excel文件中的图片,并将其与数据进行关联。

1. 引言

在许多应用场景中,Excel文件不仅包含数据,还可能嵌入了图片。为了有效地提取这些图片并与数据关联,我们需要使用合适的库和方法。本文将详细介绍如何实现这一过程。

浮动图片

内嵌图片

pom

xml 复制代码
<dependency>
   <groupId>com.alibaba</groupId>
    <artifactId>easyexcel</artifactId>
    <version>3.3.2</version>
</dependency>
<dependency>
    <groupId>org.jdom</groupId>
    <artifactId>jdom2</artifactId>
    <version>2.0.6</version>
</dependency>
<dependency>
  <groupId>org.apache.commons</groupId>
    <artifactId>commons-lang3</artifactId>
    <version>3.9</version>
</dependency>

java代码

java 复制代码
public class MySAXParserHandler extends DefaultHandler {
    String value = null;
 
    List<String> rows = new ArrayList<>();
 
    int rowIndex = 0;
 
    public List<String> getRows() {
        return rows;
    }
 
    /**
     * 用来标识解析开始
     */
    @Override
    public void startDocument() throws SAXException {
        // TODO Auto-generated method stub
        super.startDocument();
        // System.out.println("SAX解析开始");
    }
 
    /**
     * 用来标识解析结束
     */
    @Override
    public void endDocument() throws SAXException {
        // TODO Auto-generated method stub
        super.endDocument();
        // System.out.println("SAX解析结束");
    }
 
    /**
     * 解析xml元素
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        // 调用DefaultHandler类的startElement方法
        super.startElement(uri, localName, qName, attributes);
        if (qName.equals("row")) {
            value = "";
        }
 
    }
 
    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        //调用DefaultHandler类的endElement方法
        super.endElement(uri, localName, qName);
        if (qName.equals("row")) {
            if (value != null && value.contains("DISPIMG")) {
                value = value.substring(value.lastIndexOf("DISPIMG(")).replace("DISPIMG(\"", "");
                value = value.substring(0, value.indexOf("\""));
                rows.add(rowIndex, value);
            } else {
                rows.add(rowIndex, null);
            }
            rowIndex++;
            value = "";
        }
    }
 
    @Override
    public void characters(char[] ch, int start, int length)
            throws SAXException {
        super.characters(ch, start, length);
        value += new String(ch, start, length);
    }
}
java 复制代码
@Data
public class BatchSupplierGoodImportForm {

    private Integer supplierId;

    private String supplierCode;

    @Data
    @NoArgsConstructor
    @AllArgsConstructor
    public static class GoodItem {

        @ExcelProperty(value = "产品名称")
        private String productName;
        @ExcelImageProperty(value = {"imageUrl"}, index = 2)
        @ExcelProperty(value = "图片", converter = ExcelUrlImageConverter.class)
        private String imageUrl;
        @ExcelProperty(value = "品牌")
        private String supplierBrand;
        @ExcelProperty(value = "规格")
        private String skuDesc;
        @ExcelProperty(value = "集采价")
        private String groupPurchasePrice;
        @ExcelProperty(value = "卖点")
        private String sellPoint;
    }
}
java 复制代码
public class ExcelImageExtractor2 {

    public static void main(String[] args) throws Exception {
        // 测试文件路径
        File xlsFile = new File("E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01\\测试.xls");
        File xlsxFile = new File("E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01\\测试.xlsx");

        String fileDir = "E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01";
        // 读取 .xls 文件中的浮动图片
        System.out.println("读取 .xls 文件中的浮动图片...");
        //readXlsImages(xlsFile, fileDir);

        // 读取 .xlsx 文件中的浮动图片
        System.out.println("读取 .xlsx 文件中的浮动图片...");
        //readXlsxImages(xlsxFile, fileDir);

        // 读取 .xlsx 文件中的嵌入图片
        System.out.println("读取 .xlsx 文件中的嵌入图片...");
        //readXlsxEmbeddedImages2(xlsxFile, fileDir);

        System.out.println("读取 ");
        importExcelSpuItem(xlsxFile, fileDir);
    }

    @SneakyThrows
    public static void importExcelSpuItem(File originalFile, String fileDir) {
        try {
            // 将上传的文件缓存到内存中,方便多次读取
            byte[] fileBytes = FileUtils.readFileToByteArray(originalFile);

            // 第一步:动态读取 Excel 文件的表头和所有数据
            List<Map<Integer, String>> headerList = new ArrayList<>();
            List<Map<Integer, String>> rawDataList = new ArrayList<>();
            List<BatchSupplierGoodImportForm.GoodItem> goodItemExcelInputs = new ArrayList<>();

            EasyExcel.read(new ByteArrayInputStream(fileBytes))
                    .registerReadListener(new AnalysisEventListener<Map<Integer, String>>() {
                        @Override
                        public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
                            headerList.add(headMap); // 捕获表头信息
                        }

                        @Override
                        public void invoke(Map<Integer, String> data, AnalysisContext context) {
                            // 检查是否为空行
                            if (isEmptyRow(data)) {
                                return; // 跳过空行
                            }
                            rawDataList.add(data); // 捕获每行数据
                        }

                        @Override
                        public void doAfterAllAnalysed(AnalysisContext context) {
                            // 不需要额外操作
                        }
                    })
                    .sheet()
                    .doRead();

            // 检查是否成功读取到表头
            if (headerList.isEmpty()) {
                throw new RuntimeException("Excel 文件中没有表头");
            }

            // 第二步:读取 Excel 文件中的图片
            Map<Integer, String> imagePathMap = new HashMap<>(); // 行号 -> 图片路径
            readXlsxImages(originalFile, fileDir, imagePathMap);
            readXlsxEmbeddedImages2(originalFile, fileDir, imagePathMap);

            // 第三步:将图片与每行数据对应
            for (int i = 0; i < rawDataList.size(); i++) {
                Map<Integer, String> rowData = rawDataList.get(i);
                BatchSupplierGoodImportForm.GoodItem item = new BatchSupplierGoodImportForm.GoodItem();

                // 提取每列数据
                item.setProductName(rowData.get(0));
                item.setImageUrl(imagePathMap.getOrDefault(i + 1, null)); // 根据行号获取图片路径
                item.setSupplierBrand(rowData.get(2));
                item.setSkuDesc(rowData.get(3));
                item.setGroupPurchasePrice(rowData.get(4));
                item.setSellPoint(rowData.get(5));

                // 添加到列表
                goodItemExcelInputs.add(item);

                // 输出每行数据的文本内容和图片路径
                //System.out.println("行号: " + i);
                //System.out.println("数据: " + rowData);
                //System.out.println("图片路径: " + imagePathMap.getOrDefault(i, "无图片"));
            }

            System.out.println(goodItemExcelInputs);
        } catch (IOException e) {
            throw new RuntimeException("文件读取失败", e);
        }
    }

    /**
     * 判断是否为空行
     */
    private static boolean isEmptyRow(Map<Integer, String> data) {
        for (String value : data.values()) {
            if (StringUtils.isNotBlank(value)) {
                return false;
            }
        }
        return true;
    }

    // 读取 .xls 文件中的浮动图片
    public static void readXlsImages(File file, String fileDir) throws Exception {
        HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
        for (Sheet sheet : workbook) {
            HSSFSheet hssSheet = (HSSFSheet) sheet;
            HSSFPatriarch drawingPatriarch = hssSheet.getDrawingPatriarch();
            if (drawingPatriarch != null) {
                List<HSSFShape> shapes = drawingPatriarch.getChildren();
                for (HSSFShape shape : shapes) {
                    if (shape instanceof HSSFPicture) {
                        HSSFPicture pic = (HSSFPicture) shape;
                        HSSFPictureData picData = pic.getPictureData();
                        HSSFClientAnchor anchor = (HSSFClientAnchor) shape.getAnchor();
                        String key = anchor.getRow1() + "行," + anchor.getCol1() + "列";
                        byte[] data = picData.getData();
                        String suffix = picData.suggestFileExtension();
                        File dir = new File(fileDir);
                        if (!dir.exists()) dir.mkdirs();
                        FileUtils.writeByteArrayToFile(new File(dir, key + "." + suffix), data);
                    }
                }
            }
        }
        workbook.close();
    }

    // 读取 .xlsx 文件中的浮动图片
    public static void readXlsxImages(File file, String fileDir, Map<Integer, String> imagePathMap) throws Exception {
        XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(file));
        for (Sheet sheet : workbook) {
            XSSFSheet xssSheet = (XSSFSheet) sheet;
            XSSFDrawing drawing = xssSheet.getDrawingPatriarch();
            if (drawing != null) {
                List<XSSFShape> shapes = drawing.getShapes();
                for (XSSFShape shape : shapes) {
                    if (shape instanceof XSSFPicture) {
                        XSSFPicture pic = (XSSFPicture) shape;
                        XSSFPictureData picData = pic.getPictureData();
                        XSSFClientAnchor anchor = (XSSFClientAnchor) shape.getAnchor();
                        String key = anchor.getRow1() + "行," + anchor.getCol1() + "列";
                        byte[] data = picData.getData();
                        String suffix = picData.suggestFileExtension();
                        File dir = new File(fileDir);
                        if (!dir.exists()) dir.mkdirs();
                        FileUtils.writeByteArrayToFile(new File(dir, key + "." + suffix), data);
                        imagePathMap.put(anchor.getRow1(), key + "." + suffix);
                    }
                }
            }
        }
        workbook.close();
    }

    // 读取 .xlsx 文件中的嵌入图片
    public static void readXlsxEmbeddedImages2(File file, String fileDir, Map<Integer, String> imagePathMap) throws Exception {
        OPCPackage opcPackage = OPCPackage.open(new FileInputStream(file));
        List<PackagePart> parts = opcPackage.getParts();
        Map<Integer, List<PackagePart>> picturePath = getEmbedPictures(parts);

        for (Integer sheetIndex : picturePath.keySet()) {
            List<PackagePart> rows = picturePath.get(sheetIndex);
            for (int i = 0; i < rows.size(); i++) {
                PackagePart part = rows.get(i);
                if (part != null) {
                    InputStream imgIs = part.getInputStream();
                    String name = part.getPartName().getName();

                    // 从 XML 文件中解析行和列信息
                    String cellPosition = getCellPosition(sheetIndex, i, parts);

                    // 保存图片到本地
                    File dir = new File(fileDir);
                    if (!dir.exists()) dir.mkdirs();
                    FileUtils.copyInputStreamToFile(imgIs, new File(dir, "工作表" + sheetIndex + "_" + cellPosition + "_" + name.substring(name.lastIndexOf("/") + 1)));
                    imagePathMap.put(i, cellPosition + "_" + name.substring(name.lastIndexOf("/") + 1));
                }
            }
        }

        opcPackage.close();
    }

    /**
     * 获取图片所在的单元格位置(行和列)。
     *
     * @param sheetIndex 工作表索引
     * @param imageIndex 图片索引
     * @param parts      压缩包中的所有部分
     * @return 单元格位置(如 "5行_3列")
     */
    private static String getCellPosition(int sheetIndex, int imageIndex, List<PackagePart> parts) throws Exception {
        // 解析 `/xl/worksheets/sheet*.xml` 文件,获取行和列信息
        for (PackagePart part : parts) {
            String name = part.getPartName().getName();
            if (name.equals("/xl/worksheets/sheet" + (sheetIndex + 1) + ".xml")) {
                SAXParserFactory factory = SAXParserFactory.newInstance();
                SAXParser parser = factory.newSAXParser();
                MySAXParserHandler handler = new MySAXParserHandler();
                parser.parse(part.getInputStream(), handler);

                // 获取图片对应的行和列信息
                List<String> rows = handler.getRows();
                if (imageIndex < rows.size()) {
                    return rows.get(imageIndex); // 返回行和列信息
                }
            }
        }
        return "未知行_未知列"; // 如果未找到,返回默认值
    }

    @SneakyThrows
    private static Map<Integer, List<PackagePart>> getEmbedPictures(List<PackagePart> parts) {
        Map<String, Set<String>> mapImg = new HashMap<>();
        Map<String, String> mapImgPath = new HashMap<>();
        Map<Integer, List<String>> dataMap = new HashMap<>();

        for (PackagePart part : parts) {
//            System.out.println(part.getPartName());
            PackagePartName partName = part.getPartName();
            String name = partName.getName();
            if ("/xl/cellimages.xml".equals(name)) {
                SAXBuilder builder = new SAXBuilder();
                // 获取文档
                Document doc = builder.build(part.getInputStream());
                // 获取根节点
                Element root = doc.getRootElement();
                List<Element> cellImageList = root.getChildren();
                for (Element imgEle : cellImageList) {
                    Element xdrPic = imgEle.getChildren().get(0);
                    Element xdrNvPicPr = xdrPic.getChildren().get(0);
                    Element xdrBlipFill = xdrPic.getChildren().get(1);
                    Element aBlip = xdrBlipFill.getChildren().get(0);
                    Attribute attr = aBlip.getAttributes().get(0);
                    String imgId = xdrNvPicPr.getChildren().get(0).getAttributeValue("name");
                    String id = attr.getValue();
//                    if (id.equals("rId12")) {
//                        System.out.println(attr.getValue() + "\t" + imgId);
//                    }
                    if (mapImg.containsKey(id)) {
                        mapImg.get(id).add(imgId);
                    } else {
                        Set<String> set = new HashSet<>();
                        set.add(imgId);
                        mapImg.put(id, set);
                    }
                }
            }

            if ("/xl/_rels/cellimages.xml.rels".equals(name)) {
                SAXBuilder builder = new SAXBuilder();
                // 获取文档
                Document doc = builder.build(part.getInputStream());
                // 获取根节点
                Element root = doc.getRootElement();
                List<Element> relationshipList = root.getChildren();
                /*
                  <Relationship Id="rId999" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1000.jpeg"/>
                 */
                for (Element relationship : relationshipList) {
                    String id = relationship.getAttributeValue("Id");
                    String target = relationship.getAttributeValue("Target");
                    mapImgPath.put(id, target);
//                    if (id.equals("rId12")) {
//                        System.out.println(id + "\t" + target);
//                    }
                }
            }

            if (name.contains("/xl/worksheets/sheet")) {
//                SAXBuilder builder = new SAXBuilder();
                // 获取文档
                String sheetNoStr = name.replace("/xl/worksheets/sheet", "").replace(".xml", "");
                Integer sheetNo = Integer.valueOf(sheetNoStr) - 1;
                // 步骤1:创建SAXParserFactory实例
                SAXParserFactory factory = SAXParserFactory.newInstance();
                // 步骤2:创建SAXParser实例
                SAXParser parser = factory.newSAXParser();
                MySAXParserHandler handler = new MySAXParserHandler();
                parser.parse(part.getInputStream(), handler);

                List<String> rows = handler.getRows();

                dataMap.put(sheetNo, rows);
            }

        }

//        for (Integer sheetNo : dataMap.keySet()) {
//            System.out.println(sheetNo + "\t" + dataMap.get(sheetNo).size());
//        }

        Map<String, String> imgMap = new HashMap<>();
        for (String id : mapImg.keySet()) {
            Set<String> imgIds = mapImg.get(id);
            String path = mapImgPath.get(id);
            for (String imgId : imgIds) {
                imgMap.put(imgId, path);
            }
        }
        for (Integer key : dataMap.keySet()) {
            List<String> rows = dataMap.get(key);
            for (int i = 0; i < rows.size(); i++) {
                String imgId = rows.get(i);
                if (imgMap.containsKey(imgId)) {
                    rows.set(i, imgMap.get(imgId));
                }
            }
        }

        Map<Integer, List<PackagePart>> map = new HashMap<>();
        for (Integer key : dataMap.keySet()) {
            List<PackagePart> list = new ArrayList<>();
            map.put(key, list);
            List<String> pathList = dataMap.get(key);
            for (int i = 0; i < pathList.size(); i++) {
                list.add(i, null);
                String path = pathList.get(i);
                if (StringUtils.isNotEmpty(path)) {
                    for (PackagePart part : parts) {
                        PackagePartName partName = part.getPartName();
                        String name = partName.getName();
                        // /xl/media/image373.jpeg = media/image702.jpeg
                        if (name.contains(path)) {
                            list.set(i, part);
                            break;
                        }
                    }
                }

            }
        }
        return map;
    }
}

参考链接:https://blog.csdn.net/qq_23123177/article/details/133638391

相关推荐
Quantum&Coder8 分钟前
MATLAB语言的数据库交互
开发语言·后端·golang
网络空间站9 分钟前
MATLAB语言的软件工程
开发语言·后端·golang
C++小厨神13 分钟前
Rust语言的循环实现
开发语言·后端·golang
ss27316 分钟前
2025新年源码免费送
java·前端·javascript·spring boot·后端·html
感谢地心引力23 分钟前
【MATLAB】绘制投资组合的有效前沿
开发语言·matlab·金融·股票·有效前沿
旧物有情23 分钟前
蓝桥杯历届真题--#好数,简单模拟(C++,Java)
java·c++·蓝桥杯
嘻嘻哈哈曹先生24 分钟前
Token
java
xianwu54326 分钟前
反向代理模块。
linux·开发语言·网络·c++·git
前端啊龙1 小时前
eslint.config.js和.eslintrc.js有什么区别
开发语言·前端·javascript
BinaryBardC1 小时前
R语言的软件工程
开发语言·后端·golang