Java操作Excel最佳实践
-
-
- 1、背景描述
- [2、Apache POI简介](#2、Apache POI简介)
- 3、Java解析Excel
-
1、背景描述
在数据开发中通常会涉及到Excel的处理。Java和Apache Spark都不支持读取Excel文件,除了使用Python外,Spark操作Excel一般有两个框架:Apache POI和Spark-Excel
2、Apache POI简介
官网:http://poi.apache.org/index.html
官方文档:https://poi.apache.org/apidocs/index.html
3、Java解析Excel
3.1、导入依赖
xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>com.crealytics</groupId>
<artifactId>spark-excel_2.12</artifactId>
<version>0.13.7</version>
</dependency>
<!--日期时间格式化库-->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.10.10</version>
</dependency>
3.2、工具类POIUtils.java
为方便使用,直接封装成工具类
java
package com.cc;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class POIUtils {
private static Workbook workbook = null;
private static InputStream excel = null;
// 获取列数
public static Integer getColsNum(Sheet sheet) {
return sheet.getLastRowNum() == -1 ? -1 : sheet.getRow(0).getPhysicalNumberOfCells();
}
// 获取行数
public static Integer getRowsNum(Sheet sheet) {
return sheet.getLastRowNum() == -1 ? -1 : sheet.getPhysicalNumberOfRows();
}
// 初始化Excel
public static Sheet excelInit(String path, String name, Boolean ooxml) throws IOException {
excel = Files.newInputStream(Paths.get(path));
// Excel加载
if (ooxml){
// Microsoft Office 2007起(xlsx)
workbook = new XSSFWorkbook(excel);
} else {
// Microsoft Office 2007之前(xls)
workbook = new HSSFWorkbook(excel);
}
return workbook.getSheet(name);
}
// 读取Excel,返回字符串类型
public static String excelReader(Sheet sheet) {
// 行数判断
if (sheet.getLastRowNum() == -1){
return "";
}
// 遍历行和单元格(除了迭代器和如下遍历,其他遍历可能解析报错)
StringBuilder builder = new StringBuilder();
for (Row row : sheet) {
StringBuilder line = new StringBuilder();
for (Cell cell : row) {
line.append(cell.toString()).append("\001");
}
builder.append(line.toString().trim()).append("\n");
}
return builder.toString().trim();
}
// 读取Excel,返回列表类型
public static List<List<String>> excelParser(Sheet sheet) {
// 行数判断
if (sheet.getLastRowNum() == -1){
return null;
}
// 遍历行和单元格(除了迭代器和如下遍历,其他遍历可能解析报错)
ArrayList<List<String>> rows = new ArrayList<>();
for (Row row : sheet) {
ArrayList<String> cells = new ArrayList<>();
for (Cell cell : row) {
cells.add(cell.toString().trim());
}
rows.add(cells);
}
return rows;
}
// 释放资源
public static void excelDestroy() throws IOException {
workbook.close();
excel.close();
}
}
3.3、测试类POIUtilsTest.java
java
package com.cc;
import org.apache.poi.ss.usermodel.Sheet;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
public class POIUtilsTest {
@Test
public void test() throws IOException {
// 文件
String path = "F:\\...\\file.xlsx";
// 加载Excel
Sheet sheet = POIUtils.excelInit(path, "Sheet1", true);
// 获取行列数
System.out.println(POIUtils.getRowsNum(sheet));
System.out.println(POIUtils.getColsNum(sheet));
// 读取Excel
System.out.println(POIUtils.excelReader(sheet));
// List<List<String>> lines = POIUtils.excelParser(sheet);
// lines.forEach(System.out::println);
// 释放资源
POIUtils.excelDestroy();
}
}
参考文章:https://blog.csdn.net/qq_47387991/article/details/136207565