Excel导入可能产生的问题
1、内存溢出问题
百万级数据量,一次性都读取到内存中,肯定是不现实的,那么好的办法就是基于流式读取的方式进行分批处理。
在技术选型上,我们选择使用EasyExcel,他特别针对大数据量和复杂Excel文件的处理进行了优化。在解析Excel时EasyExcel不会将Excel一次性全部加载到内存中,而是从磁盘上一行行读取数据,逐个解析。
导入依赖
java
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.3.4</version>
</dependency>
Controller
java
@RequestMapping(value = "/millionListUpload",method = RequestMethod.POST)
public Map<String,Object> millionListUpload(MultipartFile file) throws IOException {
List<ExcelError> excelErrors = new ArrayList<>();
financeService.deleteTemp();//删除临时表
ExcelUtil.readByMillionLevel(file.getInputStream(),FinanceList.class,financeService,excelErrors).sheet().doRead();
Map<String, Object> resultMap = new HashMap<>();
if (excelErrors.size()>0){
resultMap.put("error",excelErrors);//@NotNull等基本校验
return resultMap;
}else {
excelErrors = financeService.getDuplicateFinanceList();//重复数据用数据库查
if (excelErrors.size()>0){
resultMap.put("error",excelErrors);
}else {
resultMap.put("succeed","succeed");
}
}
return resultMap;
}
实现ExcelUtil
java
import java.io.File;
import java.io.InputStream;
import java.util.List;
public class ExcelUtil extends EasyExcel {
private ExcelUtil() {}
public static <T> ExcelReaderBuilder read(String pathName, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
return read(pathName, head, new ExcelListener<T>(consumer, excelErrors));
}
public static <T> ExcelReaderBuilder read(File file, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
return read(file, head, new ExcelListener<T>(consumer, excelErrors));
}
public static <T> ExcelReaderBuilder read(InputStream inputStream, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
return read(inputStream, head, new ExcelListener<T>(consumer, excelErrors));
}
public static <E,T> ExcelReaderBuilder readByMillionLevel(InputStream inputStream, Class<T> head, BaseService baseService, List<ExcelError> excelErrors) {
return read(inputStream, head, new ExcelMillionListener<E,T>(baseService,excelErrors));
}
}
实现ExcelMillionListener
java
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import lombok.SneakyThrows;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import java.util.List;
import java.util.ArrayList;
/**
* @Description
* @Author fly
* @date 2024-07-02 23:35
**/
public class ExcelMillionListener<E,T> extends AnalysisEventListener<T> {
/**
* 每隔200条存储数据库,然后清理list,方便内存回收
*/
private static final int BATCH_COUNT = 200;
private final List<E> list = new ArrayList<>();
private final List<ExcelError> excelErrors;
/**
* 通过构造器注入Service
*/
private final BaseService<E> baseService;
/**
* 构造方法
*
* @param baseService Service对象
*/
public ExcelMillionListener(BaseService<E> baseService, List<ExcelError> excelErrors) {
this.baseService = baseService;
this.excelErrors = excelErrors;
}
/**
* 每条数据解析完,都会调用此方法
*/
@SneakyThrows
@Override
public void invoke(T data, AnalysisContext analysisContext) {
validateBeforeAddData(data, analysisContext);
Class<E> entityClass = baseService.currentModelClass();
E entity=entityClass.newInstance();
BeanUtils.copyProperties(data, entity);
list.add(entity);
// 达到BATCH_COUNT了,需要去存储一次数据库,防止数据几万条数据在内存,容易OOM
if (list.size() >= BATCH_COUNT) {
saveData();
// 存储完成清理 list
list.clear();
}
}
@Override
public void doAfterAllAnalysed(AnalysisContext analysisContext) {
// 这里也要保存数据,确保最后遗留的数据也存储到数据库
saveData();
}
/**
* 加上存储数据库
*/
private void saveData() {
baseService.insertBatch(list);
}
private void validateBeforeAddData(T data, AnalysisContext context) {
String errorMessage;
try {
errorMessage = ValidationUtil.validateEntity(data);
} catch (NoSuchFieldException e) {
errorMessage = "该类没有指定名称的字段,error:" + e.getMessage();
}
if (StringUtils.isNotBlank(errorMessage)) {
ExcelError excelError = new ExcelError();
excelError.setErrorMessage(errorMessage);
excelError.setRowNum(getCurrentRowIndex(context));
excelErrors.add(excelError);
}
}
private Integer getCurrentRowIndex(AnalysisContext context) {
return context.readRowHolder().getRowIndex();
}
}
通过自定义这个ExcelListener,我们就可以在读取Excel文件的过程中处理数据.
每读取到一条数据之后会把他们放入一个List,当List中积累200条之后,进行一次数据库的批量插入,再清空list,避免OOM
实现BaseService
java
import java.util.List;
public interface BaseService<T> {
//实现类每个方法都要实现
Class<T> currentModelClass();
boolean insertBatch(List<T> entityList);
}
实现业务Service层
java
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
/**
* @Description
* @Author fly
* @date 2024-07-03 0:33
**/
@Service
public class FinanceServiceImpl <M extends BaseMapper<T>, T> implements BaseService<T> {
@Resource
private FinanceDao financeDao;
@Override
public Class<T> currentModelClass() {
Class<T> classes = (Class<T>) FinanceList.class;
return classes;
}
@Override
public boolean insertBatch(List<T> financeList) {
List<FinanceList> r= (List<FinanceList>) financeList;
financeDao.saveBatch(r);
return true;
}
public void deleteTemp(){
financeDao.deleteTemp();
}
public List<ExcelError> getDuplicateFinanceList() {
List<FinanceList> duplicateFinanceList=financeDao.getDuplicateFinanceList();
List<ExcelError> errorList=new ArrayList<>();
if (duplicateFinanceList!=null&&duplicateFinanceList.size()>0) {
ExcelError excelError = new ExcelError();
excelError.setRowNum(0);
excelError.setErrorMessage(duplicateFinanceList.size() + " duplicate Keys [Collomn1 & Collomn2]");
errorList.add(excelError);
}else {
Calendar currentCalendar = Calendar.getInstance();
int currentYear = currentCalendar.get(Calendar.YEAR);
int currentMonth = currentCalendar.get(Calendar.MONTH) + 1; // 月份从0开始,所以需要加1
String currentYearMonth;
if(currentMonth<10){
currentYearMonth= currentYear + "-0" + currentMonth;
}else {
currentYearMonth= currentYear + "-" + currentMonth;
}
financeDao.updateUploadDateAndUpdateBy(currentYearMonth, UserDetail.getUserName());//UpdateBy&UpdateOn不必要上传时更新,校验通过后更新
financeDao.insertFinanceListFromTemp();//校验通过后用Sql插入正式表
}
return errorList;
}
}
Mapper
xml
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.***.dao.FinanceDao">
<insert id="saveBatch" parameterType="com.***.model.FinanceList">
INSERT INTO DB.dbo.FinanceListTemp
(market, plantCode, fruPN, materialLifeCycle, price, enoQty, enoUSD,base)
VALUES
<foreach collection="list" item="financeList" separator=",">
(#{financeList.market},#{financeList.plantCode},#{financeList.fruPn},#{financeList.materialLifeCycle},
#{financeList.price},#{financeList.enoQty},#{financeList.enoUSD},#{financeList.base})
</foreach>
</insert>
<select id="getDuplicateReserveList" resultType="com.***.model.ReserveList">
SELECT plantCode,fruPN from DB.dbo.FinanceListTemp group by plantCode,fruPN HAVING COUNT(1)>1
</select>
<delete id="deleteTemp">
delete from DB.dbo.FinanceListTemp
</delete>
<update id="updateUploadDateAndUpdateBy" parameterType="java.lang.String">
update DB.dbo.FinanceListTemp
set uploadDate=#{currentYearMonth},UpdateBy=#{userName}
where uploadDate is null or UpdateBy is null
</update>
<insert id="insertFinanceListFromTemp">
insert into DB.dbo.FinanceList
(base,market,plantCode,fruPn,materialLifeCycle,price,enoQty,enoUSD,uploadDate,updateOn, updateBy)
select base,market,plantCode,fruPn,materialLifeCycle,price,enoQty,enoUSD,uploadDate,updateOn, updateBy
from DB.dbo.FinanceListTemp
</insert>
</mapper>