一、引言
在实际的业务开发中,经常会遇到需要导出大量数据到 Excel 文件的场景。当数据量达到百万级时,单线程导出会非常耗时,甚至可能导致内存溢出。为了提高导出效率,我们可以采用多线程的方式进行导出。同时,为了让用户了解导出进度,我们还需要实现进度查询功能。本文将介绍如何使用 FastExcel 实现多线程导出百万级数据 Excel 文件,并利用 ResponseBodyEmitter 与caffeine技术实现进度查询,让用户能够实时了解导出的进度。
二、技术选型
FastExcel:一个简单易用的 Java Excel 操作库,支持读写 Excel 文件,具有高性能和低内存占用的特点。 Java 多线程:利用 Java 的线程池和 CountDownLatch 实现多线程导出。 缓存:使用 caffeine作为缓存,记录导出进度。 ResponseBodyEmitter:Spring 框架提供的一个工具,用于异步发送响应数据,适合实现进度查询。
三、实现思路
数据拆分:将百万级数据拆分成多个小批次,每个批次对应一个 Excel 的 Sheet。 多线程处理:使用线程池管理多个线程,每个线程负责查询和写入一个 Sheet 的数据。 进度查询:使用 CountDownLatch 记录已完成的线程数量,通过缓存记录当前的导出进度。 Excel 写入:使用 FastExcel 将每个 Sheet 的数据写入 Excel 文件。
四、代码实现
项目依赖
首先,确保你的项目中引入了 FastExcel 和 caffeine的依赖。如果你使用的是 Maven 项目,可以在 pom.xml 中添加以下依赖:
java
<!-- fastexcel 工具包 -->
<dependency>
<groupId>cn.idev.excel</groupId>
<artifactId>fastexcel</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>2.8.8</version>
</dependency>
代码示例
1. CacheConfig类: 初始化caffeine
java
@Configuration
public class CacheConfig {
public static final Cache<String, Object> caffeineCache = caffeineCache();
@Bean
public static Cache<String, Object> caffeineCache() {
return Caffeine.newBuilder()
// 初始容量
//.initialCapacity(100)
// 最大缓存数量
.maximumSize(500)
// 缓存过期时间:写入缓存后,经过某个时间缓存失效
.expireAfterWrite(3, TimeUnit.MINUTES)
// 缓存失效监听器
.removalListener((key, value, cause) -> System.out.println("key:" + key + " value:" + value + " cause:" + cause))
// 开启统计功能
.recordStats()
.build();
}
}
2. controller类: 入口
java
/**
* 多线程写入
*
* @param response
* @throws IOException
*/
@RequestMapping("/threadWrite")
public void threadWrite(HttpServletResponse response, @RequestParam Integer count, @RequestParam String uuid) throws IOException {
writeService.threadWrite(response, count, uuid);
}
3. service类: 实现
java
public class BaseService {
protected static List<BaseEntity> data() {
return data(10);
}
// 填充要写入的数据
protected static List<BaseEntity> data(int num) {
List<BaseEntity> list = new ArrayList<>();
for (int i = 0; i < num; i++) {
BaseEntity data = new BaseEntity();
data.setId(i);
data.setString("字符串" + new Random().nextInt(100));
data.setDate(new Date());
data.setDoubleData(0.56);
list.add(data);
}
return list;
}
protected static List<FillBaseEntity> fillData() {
List<FillBaseEntity> list = new ArrayList<>();
for (int i = 0; i < 10; i++) {
FillBaseEntity data = new FillBaseEntity();
data.setName(RandomStringUtils.randomAlphanumeric(5));
data.setPassword(RandomStringUtils.randomAlphanumeric(8));
data.setAccount(RandomStringUtils.randomAlphanumeric(11));
list.add(data);
}
return list;
}
protected static String getPath(Class<?> aClass) {
// 获取target文件需要注意maven打包编码的问题 需要在pom文件处理
return Objects.requireNonNull(aClass.getResource("/static/")).getPath();
}
protected void setResponse(HttpServletResponse response, String fileName) {
response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
response.setCharacterEncoding("utf-8");
response.setHeader("Content-disposition", "attachment;filename*=utf-8''" + fileName + ".xlsx");
}
protected static String getNow() {
return LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
}
}
java
@Slf4j
@Service
public class WriteServiceImpl extends BaseService implements WriteService {
private static final int SPLIT_COUNT = 100;
private static final int PAGE_COUNT = 100;
private static final int CORE_POOL_SIZE = 10;
private static final int MAXIMUM_POOL_SIZE = 20;
private static final long KEEP_ALIVE_TIME = 100;
private static final TimeUnit TIME_UNIT = TimeUnit.SECONDS;
@Override
public void threadWrite(HttpServletResponse response, Integer count, String uuid) throws IOException {
String fileName = "threadWrite" + getNow() + "_" + count;
setResponse(response, fileName);
//如果总数据量多余1万,分页导出
if (count > SPLIT_COUNT) {
// 导出数据
this.multiThreadList(response, count, uuid);
} else {
List<BaseEntity> data = data(count);
writeSingleSheet(response, data);
}
}
private void multiThreadList(HttpServletResponse response, Integer totalNum, String uuid) throws IOException {
//每个sheet保存的数据量
int num = PAGE_COUNT;
// 用线程池管理多线程
ThreadPoolExecutor executor = new ThreadPoolExecutor(
CORE_POOL_SIZE,
MAXIMUM_POOL_SIZE,
KEEP_ALIVE_TIME,
TIME_UNIT,
new LinkedBlockingQueue<>()
);
long start = System.currentTimeMillis();
//必须放到循环外,否则会刷新流t
try {
try (ExcelWriter excelWriter = FastExcel.write(response.getOutputStream()).build()) {
List<Future<List<BaseEntity>>> futures = new ArrayList<>();
//sheetCount 要写入sheet页数量。同分页
int sheetCount = calcSheetCount(totalNum, num);
CacheConfig.caffeineCache.put(uuid, sheetCount);
log.info("多线程查询,总数:{},开启线程数:{}", totalNum, sheetCount);
CountDownLatch countDownLatch = new CountDownLatch(sheetCount);
for (int i = 0; i < sheetCount; i++) {
int currentLimit = (i == sheetCount - 1) ? totalNum - num * i : num;
ListThread readExifInfoThread = new ListThread(countDownLatch, currentLimit, uuid, sheetCount);
futures.add(executor.submit(readExifInfoThread));
}
try {
countDownLatch.await();
writeDataToSheets(excelWriter, futures);
} catch (InterruptedException | ExecutionException e) {
Thread.currentThread().interrupt();
log.error("写入excel数据失败", e);
}
}
} finally {
executor.shutdown();
}
log.info("查询结束,耗时:{}", System.currentTimeMillis() - start);
}
private static class ListThread implements Callable<List<BaseEntity>> {
private final CountDownLatch countDownLatch;
private final Integer limit;
private final String uuid;
private final int sheetCount;
private ListThread(CountDownLatch countDownLatch, Integer limit, String uuid, int sheetCount) {
this.countDownLatch = countDownLatch;
this.limit = limit;
this.uuid = uuid;
this.sheetCount = sheetCount;
}
@Override
public List<BaseEntity> call() {
long startTime = System.currentTimeMillis();
List<BaseEntity> exifInfoList = null;
try {
exifInfoList = data(limit);
Thread.sleep(1000 * new Random().nextInt(10));
long endTime = System.currentTimeMillis();
long spendTime = endTime - startTime;
log.info("{}查询耗时:{}", Thread.currentThread().getName(), spendTime);
countDownLatch.countDown();
CacheConfig.caffeineCache.put(uuid, 1-(countDownLatch.getCount() / (double)sheetCount));
} catch (Exception e) {
log.error("生成失败", e);
}
return exifInfoList;
}
}
private int calcSheetCount(int totalNum, int num) {
return totalNum % num == 0 ? (totalNum / num) : (totalNum / num + 1);
}
private void writeSingleSheet(HttpServletResponse response, List<BaseEntity> data) throws IOException {
FastExcel.write(response.getOutputStream(), BaseEntity.class)
.registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
.sheet("0")
.doWrite(data);
}
private void writeDataToSheets(ExcelWriter excelWriter, List<Future<List<BaseEntity>>> futures) throws ExecutionException, InterruptedException {
int i = 0;
for (Future<List<BaseEntity>> future : futures) {
List<BaseEntity> exifInfoList = future.get();
WriteSheet writeSheet = createWriteSheet(i);
excelWriter.write(exifInfoList, writeSheet);
i++;
}
}
private WriteSheet createWriteSheet(int index) {
return FastExcel.writerSheet(index, "sheet" + (index + 1))
.head(BaseEntity.class)
.registerWriteHandler(new LongestMatchColumnWidthStyleStrategy())
.build();
}
}
4. 进度查询
java
@GetMapping("/progress")
public ResponseBodyEmitter progress(@RequestParam String uuid) {
ResponseBodyEmitter emitter = new ResponseBodyEmitter(60000L);
final double[] lastProgress = {-1.0};
AtomicBoolean isTimedOut = new AtomicBoolean(false);
AtomicBoolean isCompleted = new AtomicBoolean(false);
// 超时处理逻辑
emitter.onTimeout(() -> {
log.error("Emitter timed out for uuid: {}", uuid);
emitter.completeWithError(new RuntimeException("Emitter timed out"));
isTimedOut.set(true);
});
// 完成处理逻辑
emitter.onCompletion(() -> {
log.info("Emitter completed for uuid: {}", uuid);
isCompleted.set(true);
});
// 错误处理逻辑
emitter.onError((throwable) -> {
log.error("Error occurred for uuid: {}", uuid, throwable);
isCompleted.set(true);
});
AtomicReference<ScheduledFuture<?>> futureRef = new AtomicReference<>();
Runnable task = () -> {
ScheduledFuture<?> localFuture = futureRef.get();
try {
if (isTimedOut.get() || isCompleted.get()) {
if (localFuture != null) {
localFuture.cancel(false); // 使用本地变量取消任务
}
return;
}
// 从缓存中获取进度
double currentProgress = Double.parseDouble(
String.valueOf(CacheConfig.caffeineCache.get(uuid, (Function<String, Double>) key -> 0.0)));
// 添加数据校验
if (currentProgress < 0 || currentProgress > 1) {
log.error("Invalid progress value {} for uuid: {}", currentProgress, uuid);
return;
}
// 若进度有更新,发送给客户端
if (currentProgress > lastProgress[0]) {
emitter.send("Progress: " + String.format("%.2f", currentProgress * 100) + "%\n", MediaType.TEXT_PLAIN);
lastProgress[0] = currentProgress;
}
// 若进度达到 100%,完成响应
if (Double.compare(currentProgress, 1) == 0) {
emitter.complete();
isCompleted.set(true);
if (localFuture != null) {
localFuture.cancel(false); // 使用本地变量取消任务
}
}
} catch (Exception e) {
log.error("Error while getting progress for uuid: {}", uuid, e);
emitter.completeWithError(e);
isCompleted.set(true);
if (localFuture != null) {
localFuture.cancel(false); // 使用本地变量取消任务
}
}
};
ScheduledFuture<?> future = executorService.scheduleAtFixedRate(task, 0, 1, TimeUnit.SECONDS);
futureRef.set(future);
return emitter;
}