Java读取Excel图片技术详解:悬浮式与嵌入式图片的三种实现方案(支持WPS嵌入和Office Excel嵌入)

一、概要

在实际业务中,我们经常需要从Excel文件中提取数据,其中包括单元格数据和图片信息。本文将详细介绍三种读取Excel图片的技术方案,分别针对悬浮式图片、WPS嵌入式图片和Office嵌入式图片。

二、核心类说明

类名 功能描述
ExcelController 提供三个上传接口,分别对应三种图片读取方式
ExcelService 抽象基类,包含通用解析逻辑和图片保存方法
ExcelServiceFloatImpl 处理悬浮式图片的实现类
ExcelServiceImplantWpsImpl 处理WPS嵌入式图片的实现类
ExcelServiceImplantOfficeImpl 处理Office嵌入式图片的实现类
ExcelVO 数据载体类,存储行号和单元格数据
ImagePosition 图片位置信息类
ImageInfo 图片信息辅助类

三、控制器接口

java 复制代码
@RestController
@RequestMapping("/excel")
public class ExcelController {
    
    @Resource
    private ExcelService excelServiceFloatImpl;
    @Resource
    private ExcelService excelServiceImplantWpsImpl;
    @Resource
    private ExcelService excelServiceImplantOfficeImpl;
    
    // 悬浮式图片读取接口
    @PostMapping("/upload/float")
    public ResponseEntity<List<ExcelVO>> uploadExcelFloat(@RequestParam("file") MultipartFile file)
    
    // WPS嵌入式图片读取接口
    @PostMapping("/upload/implant/wps")
    public ResponseEntity<List<ExcelVO>> uploadExcelImplantWps(@RequestParam("file") MultipartFile file)
    
    // Office嵌入式图片读取接口
    @PostMapping("/upload/implant/office")
    public ResponseEntity<List<ExcelVO>> uploadExcelImplantOffice(@RequestParam("file") MultipartFile file)
}

四、抽象父类

java 复制代码
import com.central.common.exception.BusinessException;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

@Slf4j
public abstract class ExcelService {

    // 定义从第几行开始读取(从0开始计算,6则为第七行)
    public static final int START_ROW_INDEX = 6;

    /**
     * 解析上传的Excel文件
     *
     * @param file 上传的Excel文件
     * @return 解析后的数据列表
     */
    public List<ExcelVO> parseExcel(MultipartFile file) throws Exception {
        if (file == null || file.isEmpty()) {
            return Collections.emptyList();
        }
        // 校验文件扩展名
        String filename = file.getOriginalFilename();
        if (filename == null || !filename.toLowerCase().endsWith(".xlsx")) {
            throw new BusinessException("仅支持.xlsx格式文件");
        }
        if (file.getSize() > 55 * 1024 * 1024) {
            throw new BusinessException("文件大小不能超过55MB");
        }
        File tempFile = null;
        try {
            // 创建临时文件
            List<ExcelVO> result = new ArrayList<>();
            tempFile = File.createTempFile("excel_temp_", ".xlsx");
            file.transferTo(tempFile);
            try (OPCPackage pkg = OPCPackage.open(tempFile, PackageAccess.READ);
                 XSSFWorkbook workbook = new XSSFWorkbook(pkg)) {
                XSSFSheet sheet = workbook.getSheetAt(0);
                // 读取单元格数据
                for (int i = START_ROW_INDEX; i <= sheet.getLastRowNum(); i++) {
                    Row row = sheet.getRow(i);
                    if (row == null) {
                        continue;
                    }
                    ExcelVO vo = new ExcelVO();
                    vo.setRowNum(i + 1);
                    List<String> cellValues = new ArrayList<>();
                    DataFormatter dataFormatter = new DataFormatter(Locale.CHINA);
                    for (Cell cell : row) {
                        cellValues.add(dataFormatter.formatCellValue(cell));
                    }
                    vo.setCellValues(cellValues);
                    result.add(vo);
                }
                return readExcel(result, pkg, sheet);

            }
        } catch (Exception e) {
            log.error("解析失败", e);
        } finally {
            // 删除临时文件
            if (tempFile != null && tempFile.exists()) {
                boolean deleted = tempFile.delete();
                if (!deleted) {
                    log.warn("临时文件删除失败: {}", tempFile.getAbsolutePath());
                }
            }

        }
        return null;
    }

    /**
     * 保存图片到本地
     *
     * @param imageData 图片字节数组
     * @param rowNum    图片所在行号
     * @param colNum    图片所在列号
     */
    String saveImageToLocal(byte[] imageData, int rowNum, int colNum, String extension) {
        try {
            Path saveDir = Paths.get("D:\\image");
            if (!Files.exists(saveDir)) {
                Files.createDirectories(saveDir);
            }

            String fileName = String.format("row_%d_col_%d_%s.%s",
                    rowNum, colNum, UUID.randomUUID(), extension);
            Path filePath = saveDir.resolve(fileName);
            Files.write(filePath, imageData);
            return filePath.toString();
        } catch (IOException e) {
            log.error("保存图片到本地失败", e);
            return null;
        }
    }


    public abstract List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception;


}

五、悬浮式图片读取

java 复制代码
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.usermodel.*;
import org.springframework.stereotype.Service;

import java.util.*;

@Service
@Slf4j
public class ExcelServiceFloatImpl extends ExcelService {

    @Override
    public List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {
        if (sheet.getDrawingPatriarch() == null) {
            return result;
        }
        // 读取图片
        for (XSSFShape shape : sheet.getDrawingPatriarch().getShapes()) {
            if (shape instanceof XSSFPicture) {
                XSSFPicture picture = (XSSFPicture) shape;
                if (picture.getAnchor() instanceof XSSFClientAnchor) {
                    XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();
                    int pictureRow = anchor.getRow1();
                    if (pictureRow >= START_ROW_INDEX) {
                        int resultIndex = pictureRow - START_ROW_INDEX;
                        if (resultIndex < result.size()) {
                            byte[] data = picture.getPictureData().getData();
                            String filePath = saveImageToLocal(
                                    data,
                                    pictureRow + 1,
                                    anchor.getCol1() + 1,
                                    picture.getPictureData().suggestFileExtension()
                            );

                            ExcelVO vo = result.get(resultIndex);
                            ImagePosition imagePosition = new ImagePosition();
                            imagePosition.setCol(anchor.getCol1() + 1);
                            imagePosition.setRow(pictureRow + 1);
                            imagePosition.setUrl(filePath);
                            vo.setImagePositions(imagePosition);
                        }
                    }
                }
            }
        }
        return result;
    }


    // 获取悬浮图片
    private Map<Integer, ImagePosition> getFloatingPictures(XSSFSheet sheet) throws Exception {
        Map<Integer, ImagePosition> pictures = new HashMap<>();
        XSSFDrawing drawing = sheet.getDrawingPatriarch();
        if (drawing != null) {
            for (XSSFShape shape : drawing.getShapes()) {
                if (shape instanceof XSSFPicture) {
                    XSSFPicture picture = (XSSFPicture) shape;
                    if (picture.getAnchor() instanceof XSSFClientAnchor) {
                        XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();
                        int pictureRow = anchor.getRow1();
                        if (pictureRow >= START_ROW_INDEX) {
                            byte[] data = picture.getPictureData().getData();
                            String filePath = saveImageToLocal(
                                    data,
                                    pictureRow + 1,
                                    anchor.getCol1() + 1,
                                    picture.getPictureData().suggestFileExtension()
                            );

                            ImagePosition imagePosition = new ImagePosition();
                            imagePosition.setCol(anchor.getCol1() + 1);
                            imagePosition.setRow(pictureRow + 1);
                            imagePosition.setUrl(filePath);
                            pictures.put(anchor.getRow1() + 1, imagePosition);
                        }
                    }
                }
            }
        }
        return pictures;
    }

}

六、Office Excel嵌入式图片读取

通过观察发现Office嵌入图片后,在内部 一个XML文件(xl\worksheets\sheet1.xml)中,嵌入图片对应的单元格标签(C 标签)会包含一个 "vm" 属性,该属性的取值恰好与图片名称(图片存放在xl\media中)中的递增数值一致。基于这一对应关系,通过查找单元格 C标签的 "vm" 属性值,即可匹配到对应的图片文件,从而完成嵌入式图片的读取。

Excel

xl\media

xl\worksheets\sheet1.xml

实现逻辑:

  • 解析工作表XML(sheet1.xml),获取AB列单元格的vm属性(该属性与图片索引关联)和行号。
  • 遍历Excel包中的媒体文件(位于/xl/media/),建立图片索引和图片数据的映射。
  • 根据vm值匹配图片,将图片保存到本地,并设置到对应的行

存在问题

只用Office Excel编辑时,VM与图片属性对应不会存在问题。但用WPS与Office Excel混合嵌入不同格式的图片时可能会导致VM属性与Image图片数值对应不上,请注意!!!

java 复制代码
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.springframework.stereotype.Service;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Service
@Slf4j
public class ExcelServiceImplantOfficeImpl extends ExcelService {
    private static final Pattern CELL_REF_PATTERN = Pattern.compile("AB(\\d+)");
    private static final Pattern IMAGE_PATH_PATTERN = Pattern.compile("/xl/media/image(\\d+)\\.(\\w+)");


    @Override
    public List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {
        // 获取工作表XML内容
        PackagePart sheetPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/worksheets/sheet1.xml"));
        InputStream sheetStream = sheetPart.getInputStream();

        // 解析工作表XML获取AB列单元格的vm属性
        Map<Integer, Integer> vmMap = parseSheetXML(sheetStream);
        sheetStream.close();

        // 获取所有图片并建立索引映射
        Map<Integer, ImageInfo> imageMap = getAllImages(pkg);

        //根据vm值匹配图片并保存
        for (Map.Entry<Integer, Integer> entry : vmMap.entrySet()) {
            int vm = entry.getKey();
            if (imageMap.containsKey(vm)) {
                //office 读到的是实际行号,list集合从0开始计算所以要减1
                int resultIndex = entry.getValue() - START_ROW_INDEX - 1;
                if (resultIndex < result.size()) {
                    ImageInfo imageInfo = imageMap.get(vm);
                    ImagePosition imagePosition = new ImagePosition();
                    imagePosition.setCol(entry.getValue());
                    imagePosition.setUrl(saveImageToLocal(
                            imageInfo.getFileBytes(),
                            entry.getValue(),
                            28,
                            imageInfo.getExtension()
                    ));
                    ExcelVO vo = result.get(resultIndex);
                    vo.setImagePositions(imagePosition);
                }
            }
        }
        return result;
    }


    // 解析工作表XML获取AB列单元格的vm属性 k:vm value:行号
    private Map<Integer, Integer> parseSheetXML(InputStream is) throws Exception {
        Map<Integer, Integer> vmMap = new HashMap<>();
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(is);

        NodeList cells = doc.getElementsByTagName("c");
        for (int i = 0; i < cells.getLength(); i++) {
            Node cellNode = cells.item(i);
            if (cellNode.getNodeType() == Node.ELEMENT_NODE) {
                Element cellElement = (Element) cellNode;
                String cellRef = cellElement.getAttribute("r");

                // 只需要AB列且行号>=7
                Matcher matcher = CELL_REF_PATTERN.matcher(cellRef);
                if (matcher.matches()) {
                    int rowNum = Integer.parseInt(matcher.group(1));
                    if (rowNum >= 7) {
                        String vmAttr = cellElement.getAttribute("vm");
                        if (!vmAttr.isEmpty()) {
                            int vm = Integer.parseInt(vmAttr);
                            vmMap.put(vm, rowNum);
                        }
                    }
                }
            }
        }
        return vmMap;
    }

    // 获取所有图片并建立索引映射 k:图片索引 value :图片信息
    private Map<Integer, ImageInfo> getAllImages(OPCPackage pkg) throws Exception {
        Map<Integer, ImageInfo> imageMap = new HashMap<>();
        // 遍历所有文件
        for (PackagePart part : pkg.getParts()) {
            String partName = part.getPartName().toString();
            if (partName.startsWith("/xl/media/")) {
                Matcher matcher = IMAGE_PATH_PATTERN.matcher(partName);
                if (matcher.matches() && "png".equalsIgnoreCase(matcher.group(2))) {
                    try (InputStream is = part.getInputStream();
                         ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
                        byte[] buffer = new byte[4096];
                        int bytesRead;
                        while ((bytesRead = is.read(buffer)) != -1) {
                            bos.write(buffer, 0, bytesRead);
                        }
                        ImageInfo imagePosition = new ImageInfo();
                        imagePosition.setExtension(matcher.group(2).toLowerCase());
                        imagePosition.setFileBytes(bos.toByteArray());
                        imageMap.put(Integer.parseInt(matcher.group(1)), imagePosition);
                    }
                }
            }
        }
        return imageMap;
    }

}

七、WPS嵌入式图片读取

具体实现逻辑可参考其他人博客,以下代码参考该文章修改实现

实现类:

java 复制代码
import com.alibaba.cloud.commons.lang.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import org.springframework.stereotype.Service;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.util.*;


/**
 * @author mashb
 */
@Service
@Slf4j
public class ExcelServiceImplantWpsImpl extends ExcelService {

    @Override
    public List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {
        //获取所有包文件
        List<PackagePart> parts = pkg.getParts();
        //获取每个工作表中的包文件
        Map<Integer, List<PackagePart>> picturePath = getEmbedPictures(parts);
        for (Integer key : picturePath.keySet()) {
            List<PackagePart> rows = picturePath.get(key);
            if (rows.size() < START_ROW_INDEX) {
                return result;
            }
            for (int i = START_ROW_INDEX; i < rows.size(); i++) {
                int resultIndex = i - START_ROW_INDEX;
                if (resultIndex < result.size()) {
                    PackagePart part = rows.get(i);
                    // 获取文件名
                    String partName = part.getPartName().getName();
                    String fileName = new File(partName).getName();
                    String extension = fileName.substring(fileName.lastIndexOf('.') + 1);
                    ExcelVO vo = result.get(resultIndex);
                    ImagePosition imagePosition = new ImagePosition();
                    imagePosition.setCol(28);
                    imagePosition.setRow(i + 1);
                    imagePosition.setUrl(saveImageToLocal(
                            IOUtils.toByteArray(part.getInputStream()),
                            i + 1,
                            28,
                            extension.toLowerCase()
                    ));
                    vo.setImagePositions(imagePosition);
                }
            }
        }
        return result;
    }

    private static Map<Integer, List<PackagePart>> getEmbedPictures(List<PackagePart> parts) throws Exception {
        Map<String, Set<String>> mapImg = new HashMap<>();
        Map<String, String> mapImgPath = new HashMap<>();
        Map<Integer, List<String>> dataMap = new HashMap<>();

        for (PackagePart part : parts) {
            PackagePartName partName = part.getPartName();
            String name = partName.getName();
            if ("/xl/cellimages.xml".equals(name)) {
                SAXBuilder builder = new SAXBuilder();
                // 获取文档
                Document doc = builder.build(part.getInputStream());
                // 获取根节点
                Element root = doc.getRootElement();
                List<Element> cellImageList = root.getChildren();
                for (Element imgEle : cellImageList) {
                    Element xdrPic = imgEle.getChildren().get(0);
                    Element xdrNvPicPr = xdrPic.getChildren().get(0);
                    Element xdrBlipFill = xdrPic.getChildren().get(1);
                    Element aBlip = xdrBlipFill.getChildren().get(0);
                    Attribute attr = aBlip.getAttributes().get(0);
                    String imgId = xdrNvPicPr.getChildren().get(0).getAttributeValue("name");
                    String id = attr.getValue();
                    if (mapImg.containsKey(id)) {
                        mapImg.get(id).add(imgId);
                    } else {
                        Set<String> set = new HashSet<>();
                        set.add(imgId);
                        mapImg.put(id, set);
                    }
                }
            }

            if ("/xl/_rels/cellimages.xml.rels".equals(name)) {
                SAXBuilder builder = new SAXBuilder();
                // 获取文档
                Document doc = builder.build(part.getInputStream());
                // 获取根节点
                Element root = doc.getRootElement();
                List<Element> relationshipList = root.getChildren();
                for (Element relationship : relationshipList) {
                    String id = relationship.getAttributeValue("Id");
                    String target = relationship.getAttributeValue("Target");
                    mapImgPath.put(id, target);

                }
            }

            if (name.contains("/xl/worksheets/sheet")) {
                // 获取文档
                String sheetNoStr = name.replace("/xl/worksheets/sheet", "").replace(".xml", "");
                Integer sheetNo = Integer.valueOf(sheetNoStr) - 1;
                // 步骤1:创建SAXParserFactory实例
                SAXParserFactory factory = SAXParserFactory.newInstance();
                // 步骤2:创建SAXParser实例
                SAXParser parser = factory.newSAXParser();
                MySAXParserHandler handler = new MySAXParserHandler();
                parser.parse(part.getInputStream(), handler);

                List<String> rows = handler.getRows();

                dataMap.put(sheetNo, rows);
            }

        }

        Map<String, String> imgMap = new HashMap<>();
        for (String id : mapImg.keySet()) {
            Set<String> imgIds = mapImg.get(id);
            String path = mapImgPath.get(id);
            for (String imgId : imgIds) {
                imgMap.put(imgId, path);
            }
        }
        for (Integer key : dataMap.keySet()) {
            List<String> rows = dataMap.get(key);
            for (int i = 0; i < rows.size(); i++) {
                String imgId = rows.get(i);
                if (imgMap.containsKey(imgId)) {
                    rows.set(i, imgMap.get(imgId));
                }
            }
        }

        Map<Integer, List<PackagePart>> map = new HashMap<>();
        for (Integer key : dataMap.keySet()) {
            List<PackagePart> list = new ArrayList<>();
            map.put(key, list);
            List<String> pathList = dataMap.get(key);
            for (int i = 0; i < pathList.size(); i++) {
                list.add(i, null);
                String path = pathList.get(i);
                if (StringUtils.isNotEmpty(path)) {
                    for (PackagePart part : parts) {
                        PackagePartName partName = part.getPartName();
                        String name = partName.getName();
                        if (name.contains(path)) {
                            list.set(i, part);
                            break;
                        }
                    }
                }

            }
        }
        return map;
    }
}

SAX解析类

java 复制代码
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.ArrayList;
import java.util.List;

@Data
@EqualsAndHashCode(callSuper = true)
public class MySAXParserHandler extends DefaultHandler {
    String value = null;

    List<String> rows = new ArrayList<>();

    int rowIndex = 0;

    /**
     * 用来标识解析开始
     */
    @Override
    public void startDocument() throws SAXException {
        super.startDocument();
    }

    /**
     * 用来标识解析结束
     */
    @Override
    public void endDocument() throws SAXException {
        super.endDocument();
    }

    /**
     * 解析xml元素
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        // 调用DefaultHandler类的startElement方法
        super.startElement(uri, localName, qName, attributes);
        if (("row").equals(qName)) {
            value = "";
        }

    }

    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        //调用DefaultHandler类的endElement方法
        super.endElement(uri, localName, qName);
        if (("row").equals(qName)) {
            if (value != null && value.contains("DISPIMG")) {
                value = value.substring(value.lastIndexOf("DISPIMG(")).replace("DISPIMG(\"", "");
                value = value.substring(0, value.indexOf("\""));
                rows.add(rowIndex, value);
            } else {
                rows.add(rowIndex, null);
            }
            rowIndex++;
            value = "";
        }
    }

    @Override
    public void characters(char[] ch, int start, int length)
            throws SAXException {
        super.characters(ch, start, length);
        value += new String(ch, start, length);
    }
}

八、辅助类

ExcelVO

java 复制代码
import lombok.Data;

import java.util.List;
@Data
public class ExcelVO {
    // 行号
    private int rowNum;
    // 单元格值列表
    private List<String> cellValues;
    // 图片位置信息(可选)
    private ImagePosition imagePositions;
}

ImageInfo

java 复制代码
import lombok.Data;

@Data
public class ImageInfo {
    private String extension;
    private byte[] fileBytes;
}

ImagePosition

java 复制代码
import lombok.Data;

@Data
public class ImagePosition {
    private int row;
    private int col;
    private String url;
}
相关推荐
拾贰_C2 小时前
[wps_clear]wps清理残余 ——注册表不干净
经验分享·wps
小七de尾巴2 小时前
WPS清除图片段落布局脚本
wps
爱上妖精的尾巴2 小时前
5-16WPS JS宏 map数组转换迭代应用-1(一维嵌套数组结构重组)
开发语言·前端·javascript·wps·jsa
神云瑟瑟2 小时前
spring boot拦截器获取requestBody的巨坑
java·spring boot·拦截器
博睿谷IT99_2 小时前
Linux 备份与恢复常用命令
java·linux·服务器
qq_402605653 小时前
JAVA大文件分片上传
java·大文件上传
ss2733 小时前
手写MyBatis第78弹:装饰器模式在MyBatis二级缓存中的应用:从LRU到防击穿的全方案实现
java·开发语言
凯尔萨厮3 小时前
Java学习笔记五(多态)
java·笔记·学习