基于FastExcel/EasyExcel多线程导出百万级数据Excel文件并利用 ResponseBodyEmitter 与caffeine技术实现进度查询

一、引言

在实际的业务开发中,经常会遇到需要导出大量数据到 Excel 文件的场景。当数据量达到百万级时,单线程导出会非常耗时,甚至可能导致内存溢出。为了提高导出效率,我们可以采用多线程的方式进行导出。同时,为了让用户了解导出进度,我们还需要实现进度查询功能。本文将介绍如何使用 FastExcel 实现多线程导出百万级数据 Excel 文件,并利用 ResponseBodyEmitter 与caffeine技术实现进度查询,让用户能够实时了解导出的进度。

二、技术选型

FastExcel:一个简单易用的 Java Excel 操作库,支持读写 Excel 文件,具有高性能和低内存占用的特点。 Java 多线程:利用 Java 的线程池和 CountDownLatch 实现多线程导出。 缓存:使用 caffeine作为缓存,记录导出进度。 ResponseBodyEmitter:Spring 框架提供的一个工具,用于异步发送响应数据,适合实现进度查询。

三、实现思路

数据拆分:将百万级数据拆分成多个小批次,每个批次对应一个 Excel 的 Sheet。 多线程处理:使用线程池管理多个线程,每个线程负责查询和写入一个 Sheet 的数据。 进度查询:使用 CountDownLatch 记录已完成的线程数量,通过缓存记录当前的导出进度。 Excel 写入:使用 FastExcel 将每个 Sheet 的数据写入 Excel 文件。

四、代码实现

项目依赖

首先,确保你的项目中引入了 FastExcel 和 caffeine的依赖。如果你使用的是 Maven 项目,可以在 pom.xml 中添加以下依赖:

java 复制代码
        <!-- fastexcel 工具包 -->
        <dependency>
            <groupId>cn.idev.excel</groupId>
            <artifactId>fastexcel</artifactId>
            <version>1.0.0</version>
        </dependency>
        <dependency>
            <groupId>com.github.ben-manes.caffeine</groupId>
            <artifactId>caffeine</artifactId>
            <version>2.8.8</version>
        </dependency>

代码示例

1. CacheConfig类: 初始化caffeine
java 复制代码
@Configuration
public class CacheConfig {
    public static final Cache<String, Object> caffeineCache = caffeineCache();
    @Bean
    public static Cache<String, Object> caffeineCache() {
        return Caffeine.newBuilder()
                // 初始容量
                //.initialCapacity(100)
                // 最大缓存数量
                .maximumSize(500)
                // 缓存过期时间:写入缓存后,经过某个时间缓存失效
                .expireAfterWrite(3, TimeUnit.MINUTES)
                // 缓存失效监听器
                .removalListener((key, value, cause) -> System.out.println("key:" + key + " value:" + value + " cause:" + cause))
                // 开启统计功能
                .recordStats()
                .build();
    }
}
2. controller类: 入口
java 复制代码
    /**
     * 多线程写入
     *
     * @param response
     * @throws IOException
     */
    @RequestMapping("/threadWrite")
    public void threadWrite(HttpServletResponse response, @RequestParam Integer count, @RequestParam String uuid) throws IOException {
        writeService.threadWrite(response, count, uuid);
    }
3. service类: 实现
java 复制代码
public class BaseService {

    protected static List<BaseEntity> data() {
        return data(10);
    }

    // 填充要写入的数据
    protected static List<BaseEntity> data(int num) {
        List<BaseEntity> list = new ArrayList<>();
        for (int i = 0; i < num; i++) {
            BaseEntity data = new BaseEntity();
            data.setId(i);
            data.setString("字符串" + new Random().nextInt(100));
            data.setDate(new Date());
            data.setDoubleData(0.56);
            list.add(data);
        }
        return list;
    }

    protected static List<FillBaseEntity> fillData() {
        List<FillBaseEntity> list = new ArrayList<>();
        for (int i = 0; i < 10; i++) {
            FillBaseEntity data = new FillBaseEntity();
            data.setName(RandomStringUtils.randomAlphanumeric(5));
            data.setPassword(RandomStringUtils.randomAlphanumeric(8));
            data.setAccount(RandomStringUtils.randomAlphanumeric(11));
            list.add(data);
        }
        return list;
    }


    protected static String getPath(Class<?> aClass) {
        // 获取target文件需要注意maven打包编码的问题  需要在pom文件处理
        return Objects.requireNonNull(aClass.getResource("/static/")).getPath();
    }

    protected void setResponse(HttpServletResponse response, String fileName) {
        response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
        response.setCharacterEncoding("utf-8");
        response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
    }

    protected static String getNow() {
        return LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
    }
}
java 复制代码
@Slf4j
@Service
public class WriteServiceImpl extends BaseService implements WriteService {

    private static final int SPLIT_COUNT = 100;
    private static final int PAGE_COUNT = 100;
    private static final int CORE_POOL_SIZE = 10;
    private static final int MAXIMUM_POOL_SIZE = 20;
    private static final long KEEP_ALIVE_TIME = 100;
    private static final TimeUnit TIME_UNIT = TimeUnit.SECONDS;
    
    @Override
    public void threadWrite(HttpServletResponse response, Integer count, String uuid) throws IOException {

        String fileName = "threadWrite" + getNow() + "_" + count;
        setResponse(response, fileName);

        //如果总数据量多余1万,分页导出
        if (count > SPLIT_COUNT) {
            // 导出数据
            this.multiThreadList(response, count, uuid);
        } else {
            List<BaseEntity> data = data(count);
            writeSingleSheet(response, data);
        }
    }


    private void multiThreadList(HttpServletResponse response, Integer totalNum, String uuid) throws IOException {
        //每个sheet保存的数据量
        int num = PAGE_COUNT;
        // 用线程池管理多线程
        ThreadPoolExecutor executor = new ThreadPoolExecutor(
                CORE_POOL_SIZE,
                MAXIMUM_POOL_SIZE,
                KEEP_ALIVE_TIME,
                TIME_UNIT,
                new LinkedBlockingQueue<>()
        );
        long start = System.currentTimeMillis();

        //必须放到循环外,否则会刷新流t
        try {
            try (ExcelWriter excelWriter = FastExcel.write(response.getOutputStream()).build()) {
                List<Future<List<BaseEntity>>> futures = new ArrayList<>();

                //sheetCount 要写入sheet页数量。同分页
                int sheetCount = calcSheetCount(totalNum, num);
                CacheConfig.caffeineCache.put(uuid, sheetCount);
                log.info("多线程查询,总数:{},开启线程数:{}", totalNum, sheetCount);
                CountDownLatch countDownLatch = new CountDownLatch(sheetCount);
                for (int i = 0; i < sheetCount; i++) {
                    int currentLimit = (i == sheetCount - 1) ? totalNum - num * i : num;
                    ListThread readExifInfoThread = new ListThread(countDownLatch, currentLimit, uuid, sheetCount);
                    futures.add(executor.submit(readExifInfoThread));
                }

                try {
                    countDownLatch.await();
                    writeDataToSheets(excelWriter, futures);
                } catch (InterruptedException | ExecutionException e) {
                    Thread.currentThread().interrupt();
                    log.error("写入excel数据失败", e);
                }
            }
        } finally {
            executor.shutdown();
        }

        log.info("查询结束,耗时:{}", System.currentTimeMillis() - start);
    }

    private static class ListThread implements Callable<List<BaseEntity>> {

        private final CountDownLatch countDownLatch;
        private final Integer limit;
        private final String uuid;
        private final int sheetCount;

        private ListThread(CountDownLatch countDownLatch, Integer limit, String uuid, int sheetCount) {
            this.countDownLatch = countDownLatch;
            this.limit = limit;
            this.uuid = uuid;
            this.sheetCount = sheetCount;
        }

        @Override
        public List<BaseEntity> call() {
            long startTime = System.currentTimeMillis();
            List<BaseEntity> exifInfoList = null;
            try {
                exifInfoList = data(limit);
                Thread.sleep(1000 * new Random().nextInt(10));
                long endTime = System.currentTimeMillis();
                long spendTime = endTime - startTime;
                log.info("{}查询耗时:{}", Thread.currentThread().getName(), spendTime);
                countDownLatch.countDown();
                CacheConfig.caffeineCache.put(uuid,  1-(countDownLatch.getCount() / (double)sheetCount));
            } catch (Exception e) {
                log.error("生成失败", e);
            }
            return exifInfoList;
        }
    }

    private int calcSheetCount(int totalNum, int num) {
        return totalNum % num == 0 ? (totalNum / num) : (totalNum / num + 1);
    }

    private void writeSingleSheet(HttpServletResponse response, List<BaseEntity> data) throws IOException {
        FastExcel.write(response.getOutputStream(), BaseEntity.class)
                .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                .sheet("0")
                .doWrite(data);
    }

    private void writeDataToSheets(ExcelWriter excelWriter, List<Future<List<BaseEntity>>> futures) throws ExecutionException, InterruptedException {
        int i = 0;
        for (Future<List<BaseEntity>> future : futures) {
            List<BaseEntity> exifInfoList = future.get();
            WriteSheet writeSheet = createWriteSheet(i);
            excelWriter.write(exifInfoList, writeSheet);
            i++;
        }
    }

    private WriteSheet createWriteSheet(int index) {
        return FastExcel.writerSheet(index, "sheet" + (index + 1))
                .head(BaseEntity.class)
                .registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
                .build();
    }

}
4. 进度查询
java 复制代码
	@GetMapping("/progress")
    public ResponseBodyEmitter progress(@RequestParam String uuid) {
        ResponseBodyEmitter emitter = new ResponseBodyEmitter(60000L);

        final double[] lastProgress = {-1.0};
        AtomicBoolean isTimedOut = new AtomicBoolean(false);
        AtomicBoolean isCompleted = new AtomicBoolean(false);

        // 超时处理逻辑
        emitter.onTimeout(() -> {
            log.error("Emitter timed out for uuid: {}", uuid);
            emitter.completeWithError(new RuntimeException("Emitter timed out"));
            isTimedOut.set(true);
        });

        // 完成处理逻辑
        emitter.onCompletion(() -> {
            log.info("Emitter completed for uuid: {}", uuid);
            isCompleted.set(true);
        });

        // 错误处理逻辑
        emitter.onError((throwable) -> {
            log.error("Error occurred for uuid: {}", uuid, throwable);
            isCompleted.set(true);
        });

        AtomicReference<ScheduledFuture<?>> futureRef = new AtomicReference<>();
        Runnable task = () -> {
            ScheduledFuture<?> localFuture = futureRef.get();
            try {
                if (isTimedOut.get() || isCompleted.get()) {
                    if (localFuture != null) {
                        localFuture.cancel(false); // 使用本地变量取消任务
                    }
                    return;
                }
                // 从缓存中获取进度
                double currentProgress = Double.parseDouble(
                        String.valueOf(CacheConfig.caffeineCache.get(uuid, (Function<String, Double>) key -> 0.0)));
                // 添加数据校验
                if (currentProgress < 0 || currentProgress > 1) {
                    log.error("Invalid progress value {} for uuid: {}", currentProgress, uuid);
                    return;
                }
                // 若进度有更新,发送给客户端
                if (currentProgress > lastProgress[0]) {
                    emitter.send("Progress: " + String.format("%.2f", currentProgress * 100) + "%\n", MediaType.TEXT_PLAIN);
                    lastProgress[0] = currentProgress;
                }
                // 若进度达到 100%,完成响应
                if (Double.compare(currentProgress, 1) == 0) {
                    emitter.complete();
                    isCompleted.set(true);
                    if (localFuture != null) {
                        localFuture.cancel(false); // 使用本地变量取消任务
                    }
                }
            } catch (Exception e) {
                log.error("Error while getting progress for uuid: {}", uuid, e);
                emitter.completeWithError(e);
                isCompleted.set(true);
                if (localFuture != null) {
                    localFuture.cancel(false); // 使用本地变量取消任务
                }
            }
        };

        ScheduledFuture<?> future = executorService.scheduleAtFixedRate(task, 0, 1, TimeUnit.SECONDS);
        futureRef.set(future);

        return emitter;
    }
相关推荐
佳佳_8 分钟前
Spring Boot SSE 示例
spring boot·后端
Seven971 小时前
【设计模式】使用解释器模式简化复杂的语法规则
java·后端·设计模式
李长渊哦1 小时前
Spring Boot 接口延迟响应的实现与应用场景
spring boot·后端·php
Seven971 小时前
【设计模式】通过访问者模式实现分离算法与对象结构
java·后端·设计模式
Seven972 小时前
【设计模式】遍历集合的艺术:深入探索迭代器模式的无限可能
java·后端·设计模式
小杨4042 小时前
springboot框架项目应用实践五(websocket实践)
spring boot·后端·websocket
浪九天2 小时前
Java直通车系列28【Spring Boot】(数据访问Spring Data JPA)
java·开发语言·spring boot·后端·spring
bobz9652 小时前
IKEv1 和 IKEv2 发展历史和演进背景
后端
大鹏dapeng3 小时前
Gone v2 goner/gin——试试用依赖注入的方式打开gin-gonic/gin
后端·go
tan180°3 小时前
版本控制器Git(1)
c++·git·后端