Java实现百万级数据从Excel导入到数据库

Excel导入可能产生的问题

1、内存溢出问题

百万级数据量,一次性都读取到内存中,肯定是不现实的,那么好的办法就是基于流式读取的方式进行分批处理。

在技术选型上,我们选择使用EasyExcel,他特别针对大数据量和复杂Excel文件的处理进行了优化。在解析Excel时EasyExcel不会将Excel一次性全部加载到内存中,而是从磁盘上一行行读取数据,逐个解析。

导入依赖

java 复制代码
       <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>3.3.4</version>
        </dependency>

Controller

java 复制代码
 @RequestMapping(value = "/millionListUpload",method = RequestMethod.POST)
    public Map<String,Object> millionListUpload(MultipartFile file) throws IOException {
        List<ExcelError> excelErrors = new ArrayList<>();
        financeService.deleteTemp();//删除临时表
        ExcelUtil.readByMillionLevel(file.getInputStream(),FinanceList.class,financeService,excelErrors).sheet().doRead();
        Map<String, Object> resultMap = new HashMap<>();

        if (excelErrors.size()>0){
            resultMap.put("error",excelErrors);//@NotNull等基本校验
            return resultMap;
        }else {
            excelErrors = financeService.getDuplicateFinanceList();//重复数据用数据库查
            if (excelErrors.size()>0){
                resultMap.put("error",excelErrors);
            }else {
                resultMap.put("succeed","succeed");
            }
        }
        return resultMap;
    }

实现ExcelUtil

java 复制代码
import java.io.File;
import java.io.InputStream;
import java.util.List;

public class ExcelUtil extends EasyExcel {
    private ExcelUtil() {}

    public static <T> ExcelReaderBuilder read(String pathName, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
        return read(pathName, head, new ExcelListener<T>(consumer, excelErrors));
    }

    public static <T> ExcelReaderBuilder read(File file, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
        return read(file, head, new ExcelListener<T>(consumer, excelErrors));
    }

    public static <T> ExcelReaderBuilder read(InputStream inputStream, Class<T> head, List<T> consumer, List<ExcelError> excelErrors) {
        return read(inputStream, head, new ExcelListener<T>(consumer, excelErrors));
    }

    public static <E,T> ExcelReaderBuilder readByMillionLevel(InputStream inputStream, Class<T> head, BaseService baseService, List<ExcelError> excelErrors) {
        return read(inputStream, head, new ExcelMillionListener<E,T>(baseService,excelErrors));
    }
}

实现ExcelMillionListener

java 复制代码
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import lombok.SneakyThrows;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import java.util.List;
import java.util.ArrayList;

/**
 * @Description
 * @Author fly
 * @date 2024-07-02 23:35
 **/
public class ExcelMillionListener<E,T> extends AnalysisEventListener<T> {

    /**
     * 每隔200条存储数据库,然后清理list,方便内存回收
     */
    private static final int BATCH_COUNT = 200;
    private final List<E> list = new ArrayList<>();
    private final List<ExcelError> excelErrors;

    /**
     * 通过构造器注入Service
     */
    private final BaseService<E> baseService;

    /**
     * 构造方法
     *
     * @param baseService Service对象
     */
    public ExcelMillionListener(BaseService<E> baseService, List<ExcelError> excelErrors) {
        this.baseService = baseService;
        this.excelErrors = excelErrors;
    }


    /**
     * 每条数据解析完,都会调用此方法
     */
    @SneakyThrows
    @Override
    public void invoke(T data, AnalysisContext analysisContext) {
        validateBeforeAddData(data, analysisContext);

        Class<E> entityClass = baseService.currentModelClass();
        E entity=entityClass.newInstance();
        BeanUtils.copyProperties(data, entity);
        list.add(entity);

        // 达到BATCH_COUNT了,需要去存储一次数据库,防止数据几万条数据在内存,容易OOM
        if (list.size() >= BATCH_COUNT) {
            saveData();
            // 存储完成清理 list
            list.clear();
        }
    }

    @Override
    public void doAfterAllAnalysed(AnalysisContext analysisContext) {
        // 这里也要保存数据,确保最后遗留的数据也存储到数据库
        saveData();
    }

    /**
     * 加上存储数据库
     */
    private void saveData() {
        baseService.insertBatch(list);
    }

    private void validateBeforeAddData(T data, AnalysisContext context) {
        String errorMessage;
        try {
            errorMessage = ValidationUtil.validateEntity(data);
        } catch (NoSuchFieldException e) {
            errorMessage = "该类没有指定名称的字段,error:" + e.getMessage();
        }
        if (StringUtils.isNotBlank(errorMessage)) {
            ExcelError excelError = new ExcelError();
            excelError.setErrorMessage(errorMessage);
            excelError.setRowNum(getCurrentRowIndex(context));
            excelErrors.add(excelError);
        }
    }

    private Integer getCurrentRowIndex(AnalysisContext context) {
        return context.readRowHolder().getRowIndex();
    }
}

通过自定义这个ExcelListener,我们就可以在读取Excel文件的过程中处理数据.

每读取到一条数据之后会把他们放入一个List,当List中积累200条之后,进行一次数据库的批量插入,再清空list,避免OOM

实现BaseService

java 复制代码
import java.util.List;

public interface BaseService<T> {
	//实现类每个方法都要实现
    Class<T> currentModelClass();

    boolean insertBatch(List<T> entityList);
}

实现业务Service层

java 复制代码
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;


/**
 * @Description
 * @Author fly
 * @date 2024-07-03 0:33
 **/
@Service
public class FinanceServiceImpl <M extends BaseMapper<T>, T> implements BaseService<T> {

    @Resource
    private FinanceDao financeDao;

    @Override
    public Class<T> currentModelClass() {
        Class<T> classes = (Class<T>) FinanceList.class;
        return classes;
    }

    @Override
    public boolean insertBatch(List<T> financeList) {
        List<FinanceList> r= (List<FinanceList>) financeList;
        financeDao.saveBatch(r);
        return true;
    }

    public void deleteTemp(){
        financeDao.deleteTemp();
    }


    public List<ExcelError> getDuplicateFinanceList() {
        List<FinanceList> duplicateFinanceList=financeDao.getDuplicateFinanceList();
        List<ExcelError> errorList=new ArrayList<>();
        if (duplicateFinanceList!=null&&duplicateFinanceList.size()>0) {
            ExcelError excelError = new ExcelError();
            excelError.setRowNum(0);
            excelError.setErrorMessage(duplicateFinanceList.size() + " duplicate Keys [Collomn1 & Collomn2]");
            errorList.add(excelError);
        }else {
            Calendar currentCalendar = Calendar.getInstance();
            int currentYear = currentCalendar.get(Calendar.YEAR);
            int currentMonth = currentCalendar.get(Calendar.MONTH) + 1; // 月份从0开始,所以需要加1
            String currentYearMonth;
            if(currentMonth<10){
                currentYearMonth= currentYear + "-0" + currentMonth;
            }else {
                currentYearMonth= currentYear + "-" + currentMonth;

            }
            
            financeDao.updateUploadDateAndUpdateBy(currentYearMonth, UserDetail.getUserName());//UpdateBy&UpdateOn不必要上传时更新,校验通过后更新
            financeDao.insertFinanceListFromTemp();//校验通过后用Sql插入正式表
        }
        return errorList;
    }
}

Mapper

xml 复制代码
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.***.dao.FinanceDao">

    <insert id="saveBatch" parameterType="com.***.model.FinanceList">
        INSERT INTO DB.dbo.FinanceListTemp
        (market, plantCode, fruPN, materialLifeCycle, price, enoQty, enoUSD,base)
        VALUES
        <foreach collection="list" item="financeList" separator=",">
            (#{financeList.market},#{financeList.plantCode},#{financeList.fruPn},#{financeList.materialLifeCycle},
            #{financeList.price},#{financeList.enoQty},#{financeList.enoUSD},#{financeList.base})
        </foreach>
    </insert>

    <select id="getDuplicateReserveList" resultType="com.***.model.ReserveList">
        SELECT plantCode,fruPN from  DB.dbo.FinanceListTemp group by plantCode,fruPN HAVING COUNT(1)>1
    </select>

    <delete id="deleteTemp">
        delete from DB.dbo.FinanceListTemp 
    </delete>

    <update id="updateUploadDateAndUpdateBy" parameterType="java.lang.String">
        update DB.dbo.FinanceListTemp
        set uploadDate=#{currentYearMonth},UpdateBy=#{userName}
        where uploadDate is null or UpdateBy is null
    </update>

    <insert id="insertFinanceListFromTemp">
        insert into DB.dbo.FinanceList
       (base,market,plantCode,fruPn,materialLifeCycle,price,enoQty,enoUSD,uploadDate,updateOn, updateBy)
        select base,market,plantCode,fruPn,materialLifeCycle,price,enoQty,enoUSD,uploadDate,updateOn, updateBy
        from DB.dbo.FinanceListTemp
    </insert>

</mapper>
相关推荐
栀栀栀栀栀栀16 分钟前
JDBC 学习笔记+代码整理
数据库·笔记
java6666688881 小时前
使用Java实现区块链技术的应用开发
java·开发语言·区块链
huanhuan_m11 小时前
springboot2.7.6 集成swagger
java·开发语言·spring boot
marsjin2 小时前
MYSQL多个表进行笛卡尔积查询优化
数据库·mysql
数据发现3 小时前
昆虫学(书籍学习资料)
数据库·数据挖掘·数据分析
百事牛4 小时前
设置和取消Excel“打开密码”的3种方法
windows·excel
虫小宝5 小时前
如何在Java中实现批量数据处理
java·开发语言
king888866665 小时前
Java中的AQS
java
冰暮流星5 小时前
软设之类的继承与泛化,多重继承
java·开发语言
虫小宝5 小时前
Java中的多线程与并发编程详解
java·开发语言