在Java开发中,处理Excel文件中的图片(包括浮动图片和嵌入图片)是一个常见的需求。本文将介绍如何使用EasyExcel和Apache POI库来读取Excel文件中的图片,并将其与数据进行关联。
1. 引言
在许多应用场景中,Excel文件不仅包含数据,还可能嵌入了图片。为了有效地提取这些图片并与数据关联,我们需要使用合适的库和方法。本文将详细介绍如何实现这一过程。
浮动图片
内嵌图片
pom
xml
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom2</artifactId>
<version>2.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
java代码
java
public class MySAXParserHandler extends DefaultHandler {
String value = null;
List<String> rows = new ArrayList<>();
int rowIndex = 0;
public List<String> getRows() {
return rows;
}
/**
* 用来标识解析开始
*/
@Override
public void startDocument() throws SAXException {
// TODO Auto-generated method stub
super.startDocument();
// System.out.println("SAX解析开始");
}
/**
* 用来标识解析结束
*/
@Override
public void endDocument() throws SAXException {
// TODO Auto-generated method stub
super.endDocument();
// System.out.println("SAX解析结束");
}
/**
* 解析xml元素
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
// 调用DefaultHandler类的startElement方法
super.startElement(uri, localName, qName, attributes);
if (qName.equals("row")) {
value = "";
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
//调用DefaultHandler类的endElement方法
super.endElement(uri, localName, qName);
if (qName.equals("row")) {
if (value != null && value.contains("DISPIMG")) {
value = value.substring(value.lastIndexOf("DISPIMG(")).replace("DISPIMG(\"", "");
value = value.substring(0, value.indexOf("\""));
rows.add(rowIndex, value);
} else {
rows.add(rowIndex, null);
}
rowIndex++;
value = "";
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
super.characters(ch, start, length);
value += new String(ch, start, length);
}
}
java
@Data
public class BatchSupplierGoodImportForm {
private Integer supplierId;
private String supplierCode;
@Data
@NoArgsConstructor
@AllArgsConstructor
public static class GoodItem {
@ExcelProperty(value = "产品名称")
private String productName;
@ExcelImageProperty(value = {"imageUrl"}, index = 2)
@ExcelProperty(value = "图片", converter = ExcelUrlImageConverter.class)
private String imageUrl;
@ExcelProperty(value = "品牌")
private String supplierBrand;
@ExcelProperty(value = "规格")
private String skuDesc;
@ExcelProperty(value = "集采价")
private String groupPurchasePrice;
@ExcelProperty(value = "卖点")
private String sellPoint;
}
}
java
public class ExcelImageExtractor2 {
public static void main(String[] args) throws Exception {
// 测试文件路径
File xlsFile = new File("E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01\\测试.xls");
File xlsxFile = new File("E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01\\测试.xlsx");
String fileDir = "E:\\WeChat Files\\xxx\\FileStorage\\File\\2025-01";
// 读取 .xls 文件中的浮动图片
System.out.println("读取 .xls 文件中的浮动图片...");
//readXlsImages(xlsFile, fileDir);
// 读取 .xlsx 文件中的浮动图片
System.out.println("读取 .xlsx 文件中的浮动图片...");
//readXlsxImages(xlsxFile, fileDir);
// 读取 .xlsx 文件中的嵌入图片
System.out.println("读取 .xlsx 文件中的嵌入图片...");
//readXlsxEmbeddedImages2(xlsxFile, fileDir);
System.out.println("读取 ");
importExcelSpuItem(xlsxFile, fileDir);
}
@SneakyThrows
public static void importExcelSpuItem(File originalFile, String fileDir) {
try {
// 将上传的文件缓存到内存中,方便多次读取
byte[] fileBytes = FileUtils.readFileToByteArray(originalFile);
// 第一步:动态读取 Excel 文件的表头和所有数据
List<Map<Integer, String>> headerList = new ArrayList<>();
List<Map<Integer, String>> rawDataList = new ArrayList<>();
List<BatchSupplierGoodImportForm.GoodItem> goodItemExcelInputs = new ArrayList<>();
EasyExcel.read(new ByteArrayInputStream(fileBytes))
.registerReadListener(new AnalysisEventListener<Map<Integer, String>>() {
@Override
public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
headerList.add(headMap); // 捕获表头信息
}
@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
// 检查是否为空行
if (isEmptyRow(data)) {
return; // 跳过空行
}
rawDataList.add(data); // 捕获每行数据
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
// 不需要额外操作
}
})
.sheet()
.doRead();
// 检查是否成功读取到表头
if (headerList.isEmpty()) {
throw new RuntimeException("Excel 文件中没有表头");
}
// 第二步:读取 Excel 文件中的图片
Map<Integer, String> imagePathMap = new HashMap<>(); // 行号 -> 图片路径
readXlsxImages(originalFile, fileDir, imagePathMap);
readXlsxEmbeddedImages2(originalFile, fileDir, imagePathMap);
// 第三步:将图片与每行数据对应
for (int i = 0; i < rawDataList.size(); i++) {
Map<Integer, String> rowData = rawDataList.get(i);
BatchSupplierGoodImportForm.GoodItem item = new BatchSupplierGoodImportForm.GoodItem();
// 提取每列数据
item.setProductName(rowData.get(0));
item.setImageUrl(imagePathMap.getOrDefault(i + 1, null)); // 根据行号获取图片路径
item.setSupplierBrand(rowData.get(2));
item.setSkuDesc(rowData.get(3));
item.setGroupPurchasePrice(rowData.get(4));
item.setSellPoint(rowData.get(5));
// 添加到列表
goodItemExcelInputs.add(item);
// 输出每行数据的文本内容和图片路径
//System.out.println("行号: " + i);
//System.out.println("数据: " + rowData);
//System.out.println("图片路径: " + imagePathMap.getOrDefault(i, "无图片"));
}
System.out.println(goodItemExcelInputs);
} catch (IOException e) {
throw new RuntimeException("文件读取失败", e);
}
}
/**
* 判断是否为空行
*/
private static boolean isEmptyRow(Map<Integer, String> data) {
for (String value : data.values()) {
if (StringUtils.isNotBlank(value)) {
return false;
}
}
return true;
}
// 读取 .xls 文件中的浮动图片
public static void readXlsImages(File file, String fileDir) throws Exception {
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
for (Sheet sheet : workbook) {
HSSFSheet hssSheet = (HSSFSheet) sheet;
HSSFPatriarch drawingPatriarch = hssSheet.getDrawingPatriarch();
if (drawingPatriarch != null) {
List<HSSFShape> shapes = drawingPatriarch.getChildren();
for (HSSFShape shape : shapes) {
if (shape instanceof HSSFPicture) {
HSSFPicture pic = (HSSFPicture) shape;
HSSFPictureData picData = pic.getPictureData();
HSSFClientAnchor anchor = (HSSFClientAnchor) shape.getAnchor();
String key = anchor.getRow1() + "行," + anchor.getCol1() + "列";
byte[] data = picData.getData();
String suffix = picData.suggestFileExtension();
File dir = new File(fileDir);
if (!dir.exists()) dir.mkdirs();
FileUtils.writeByteArrayToFile(new File(dir, key + "." + suffix), data);
}
}
}
}
workbook.close();
}
// 读取 .xlsx 文件中的浮动图片
public static void readXlsxImages(File file, String fileDir, Map<Integer, String> imagePathMap) throws Exception {
XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(file));
for (Sheet sheet : workbook) {
XSSFSheet xssSheet = (XSSFSheet) sheet;
XSSFDrawing drawing = xssSheet.getDrawingPatriarch();
if (drawing != null) {
List<XSSFShape> shapes = drawing.getShapes();
for (XSSFShape shape : shapes) {
if (shape instanceof XSSFPicture) {
XSSFPicture pic = (XSSFPicture) shape;
XSSFPictureData picData = pic.getPictureData();
XSSFClientAnchor anchor = (XSSFClientAnchor) shape.getAnchor();
String key = anchor.getRow1() + "行," + anchor.getCol1() + "列";
byte[] data = picData.getData();
String suffix = picData.suggestFileExtension();
File dir = new File(fileDir);
if (!dir.exists()) dir.mkdirs();
FileUtils.writeByteArrayToFile(new File(dir, key + "." + suffix), data);
imagePathMap.put(anchor.getRow1(), key + "." + suffix);
}
}
}
}
workbook.close();
}
// 读取 .xlsx 文件中的嵌入图片
public static void readXlsxEmbeddedImages2(File file, String fileDir, Map<Integer, String> imagePathMap) throws Exception {
OPCPackage opcPackage = OPCPackage.open(new FileInputStream(file));
List<PackagePart> parts = opcPackage.getParts();
Map<Integer, List<PackagePart>> picturePath = getEmbedPictures(parts);
for (Integer sheetIndex : picturePath.keySet()) {
List<PackagePart> rows = picturePath.get(sheetIndex);
for (int i = 0; i < rows.size(); i++) {
PackagePart part = rows.get(i);
if (part != null) {
InputStream imgIs = part.getInputStream();
String name = part.getPartName().getName();
// 从 XML 文件中解析行和列信息
String cellPosition = getCellPosition(sheetIndex, i, parts);
// 保存图片到本地
File dir = new File(fileDir);
if (!dir.exists()) dir.mkdirs();
FileUtils.copyInputStreamToFile(imgIs, new File(dir, "工作表" + sheetIndex + "_" + cellPosition + "_" + name.substring(name.lastIndexOf("/") + 1)));
imagePathMap.put(i, cellPosition + "_" + name.substring(name.lastIndexOf("/") + 1));
}
}
}
opcPackage.close();
}
/**
* 获取图片所在的单元格位置(行和列)。
*
* @param sheetIndex 工作表索引
* @param imageIndex 图片索引
* @param parts 压缩包中的所有部分
* @return 单元格位置(如 "5行_3列")
*/
private static String getCellPosition(int sheetIndex, int imageIndex, List<PackagePart> parts) throws Exception {
// 解析 `/xl/worksheets/sheet*.xml` 文件,获取行和列信息
for (PackagePart part : parts) {
String name = part.getPartName().getName();
if (name.equals("/xl/worksheets/sheet" + (sheetIndex + 1) + ".xml")) {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
MySAXParserHandler handler = new MySAXParserHandler();
parser.parse(part.getInputStream(), handler);
// 获取图片对应的行和列信息
List<String> rows = handler.getRows();
if (imageIndex < rows.size()) {
return rows.get(imageIndex); // 返回行和列信息
}
}
}
return "未知行_未知列"; // 如果未找到,返回默认值
}
@SneakyThrows
private static Map<Integer, List<PackagePart>> getEmbedPictures(List<PackagePart> parts) {
Map<String, Set<String>> mapImg = new HashMap<>();
Map<String, String> mapImgPath = new HashMap<>();
Map<Integer, List<String>> dataMap = new HashMap<>();
for (PackagePart part : parts) {
// System.out.println(part.getPartName());
PackagePartName partName = part.getPartName();
String name = partName.getName();
if ("/xl/cellimages.xml".equals(name)) {
SAXBuilder builder = new SAXBuilder();
// 获取文档
Document doc = builder.build(part.getInputStream());
// 获取根节点
Element root = doc.getRootElement();
List<Element> cellImageList = root.getChildren();
for (Element imgEle : cellImageList) {
Element xdrPic = imgEle.getChildren().get(0);
Element xdrNvPicPr = xdrPic.getChildren().get(0);
Element xdrBlipFill = xdrPic.getChildren().get(1);
Element aBlip = xdrBlipFill.getChildren().get(0);
Attribute attr = aBlip.getAttributes().get(0);
String imgId = xdrNvPicPr.getChildren().get(0).getAttributeValue("name");
String id = attr.getValue();
// if (id.equals("rId12")) {
// System.out.println(attr.getValue() + "\t" + imgId);
// }
if (mapImg.containsKey(id)) {
mapImg.get(id).add(imgId);
} else {
Set<String> set = new HashSet<>();
set.add(imgId);
mapImg.put(id, set);
}
}
}
if ("/xl/_rels/cellimages.xml.rels".equals(name)) {
SAXBuilder builder = new SAXBuilder();
// 获取文档
Document doc = builder.build(part.getInputStream());
// 获取根节点
Element root = doc.getRootElement();
List<Element> relationshipList = root.getChildren();
/*
<Relationship Id="rId999" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1000.jpeg"/>
*/
for (Element relationship : relationshipList) {
String id = relationship.getAttributeValue("Id");
String target = relationship.getAttributeValue("Target");
mapImgPath.put(id, target);
// if (id.equals("rId12")) {
// System.out.println(id + "\t" + target);
// }
}
}
if (name.contains("/xl/worksheets/sheet")) {
// SAXBuilder builder = new SAXBuilder();
// 获取文档
String sheetNoStr = name.replace("/xl/worksheets/sheet", "").replace(".xml", "");
Integer sheetNo = Integer.valueOf(sheetNoStr) - 1;
// 步骤1:创建SAXParserFactory实例
SAXParserFactory factory = SAXParserFactory.newInstance();
// 步骤2:创建SAXParser实例
SAXParser parser = factory.newSAXParser();
MySAXParserHandler handler = new MySAXParserHandler();
parser.parse(part.getInputStream(), handler);
List<String> rows = handler.getRows();
dataMap.put(sheetNo, rows);
}
}
// for (Integer sheetNo : dataMap.keySet()) {
// System.out.println(sheetNo + "\t" + dataMap.get(sheetNo).size());
// }
Map<String, String> imgMap = new HashMap<>();
for (String id : mapImg.keySet()) {
Set<String> imgIds = mapImg.get(id);
String path = mapImgPath.get(id);
for (String imgId : imgIds) {
imgMap.put(imgId, path);
}
}
for (Integer key : dataMap.keySet()) {
List<String> rows = dataMap.get(key);
for (int i = 0; i < rows.size(); i++) {
String imgId = rows.get(i);
if (imgMap.containsKey(imgId)) {
rows.set(i, imgMap.get(imgId));
}
}
}
Map<Integer, List<PackagePart>> map = new HashMap<>();
for (Integer key : dataMap.keySet()) {
List<PackagePart> list = new ArrayList<>();
map.put(key, list);
List<String> pathList = dataMap.get(key);
for (int i = 0; i < pathList.size(); i++) {
list.add(i, null);
String path = pathList.get(i);
if (StringUtils.isNotEmpty(path)) {
for (PackagePart part : parts) {
PackagePartName partName = part.getPartName();
String name = partName.getName();
// /xl/media/image373.jpeg = media/image702.jpeg
if (name.contains(path)) {
list.set(i, part);
break;
}
}
}
}
}
return map;
}
}
参考链接:https://blog.csdn.net/qq_23123177/article/details/133638391