背景
供应商系统下载的csv文件不支持域控(主要是第三方wps服务不能对csv文件加密,但是可以对office系列产品进行权限访问的加密控制)。因此思路就改为现将csv文件转为excel文件,然后对excel文件进行加域控制。本文主要介绍如何将csv文件转为excel文件。
要求
- Csv文件可能比较大,达到40-60M,需要控制内存使用率;
- 考虑接口的并发,需要进行接口的限流
方案
- 采用alibaba的easyexcel,降低内存占用率,根据压测结果,设置合理的接口限流参数(限流
- 本文不再介绍,可以使用java注解+redis+lua, 或者nginx限流等)
#### 代码
-
CsvController
java
package com.xxx.xxx.controller;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import javax.annotation.Resource;
import javax.servlet.http.HttpServletResponse;
import com.xxx.xxx.common.utils.EasyExcelUtil;
import com.xxx.xxx.common.utils.ObjectUtil;
import com.xxx.xxx.service.ExcelAnalysisService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
/**
* description:
*
* @author: lgq
* @create: 2024-04-16 11:06
*/
@Slf4j
@RestController
@RequestMapping("/csv")
public class CsvController {
@Resource
private ExcelAnalysisService excelAnalysisService;
/**
* 读取传入的csv 文本的内容可以存入数据库
*
* @param file
* @return
*/
@PostMapping("/uploadCsvAndImportExcel")
public void uploadCsvAndImportExcel(@RequestParam("file") MultipartFile file, HttpServletResponse response) {
String[] splitName = file.getOriginalFilename().split(".csv");
if (ObjectUtil.isEmpty(splitName) || ObjectUtil.isEmpty(splitName[0])) {
return;
}
EasyExcelUtil.setResponseParam(response, splitName[0]);
long startTime = System.currentTimeMillis();
log.info("导出开始时间:{}", startTime);
try {
// 输出流可以为本地文件
// OutputStream outputStream = new FileOutputStream("D:\\templateExcel\\filename.xlsx");
OutputStream outputStream = response.getOutputStream();
InputStream inputStream = file.getInputStream();
Future<String> future = excelAnalysisService.csv2Excel(inputStream, outputStream);
future.get();
} catch (IOException ioException) {
log.error("csv转为excel出错!", ioException.getMessage());
ioException.printStackTrace();
} catch (InterruptedException interruptedException) {
log.error("csv转为excel出错!", interruptedException.getMessage());
interruptedException.printStackTrace();
} catch (ExecutionException executionException) {
log.error("csv转为excel出错!", executionException.getMessage());
executionException.printStackTrace();
}
// 导出时间结束
long endTime = System.currentTimeMillis();
log.info("导出结束时间:{}", endTime + "ms");
log.info("导出所用时间:{}", (endTime - startTime) / 1000 + "秒");
}
}
EasyExcelGeneralCsvListener
4.
java
package com.xxx.xxx.listener;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.xxx.xxx.constants.ExcelConstants;
/**
* description:
*
* @author: lgq
* @create: 2024-04-16 11:25
*/
public class EasyExcelGeneralCsvListener extends AnalysisEventListener<Map<Integer, String>> {
/**
* 用于存储读取的数据
*/
private List<Map<Integer, String>> dataList = new ArrayList<>();
private ExcelWriter excelWriter;
private WriteSheet writeSheet;
public EasyExcelGeneralCsvListener() {
}
public EasyExcelGeneralCsvListener(ExcelWriter excelWriter, WriteSheet writeSheet) {
this.excelWriter = excelWriter;
this.writeSheet = writeSheet;
}
@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
// 数据add进入集合
dataList.add(data);
// size是否为2000条:这里其实就是分批.当数据等于2k的时候执行一次写入excel
if (dataList.size() >= ExcelConstants.PER_WRITE_EXCEL_ROW_COUNT) {
save2Excel();
// 清理集合便于GC回收
dataList.clear();
}
}
@Override
public void invokeHeadMap(Map<Integer, String> headers, AnalysisContext context) {
List<List<String>> titles = new ArrayList<>();
for (int i = 0; i < headers.size(); i++) {
titles.add(Collections.singletonList(headers.get(i)));
}
this.writeSheet.setHead(titles);
}
/**
* 保存数据到 excel
*/
private void save2Excel() {
if (dataList.size() > 0) {
List<List<String>> consumerDataList = new ArrayList<>();
dataList.stream().forEach( e ->
{
List<String> objects = new ArrayList<>();
for (int i = 0; i < e.size(); i++) {
objects.add(e.get(i));
}
consumerDataList.add(objects);
}
);
this.excelWriter.write(consumerDataList, writeSheet);
}
}
/**
* Excel 中所有数据解析完毕会调用此方法
*/
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
save2Excel();
dataList.clear();
}
}
VisiableThreadPoolTaskExecutor
5.
java
package com.xxx.xxx.task;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.util.concurrent.ListenableFuture;
/**
* description:VisiableThreadPoolTaskExecutor
*
* @author: lgq
* @create: 2024-04-17 10:52
*/
@Slf4j
public class VisiableThreadPoolTaskExecutor extends ThreadPoolTaskExecutor {
private void showThreadPoolInfo(String prefix){
ThreadPoolExecutor threadPoolExecutor = getThreadPoolExecutor();
if(null==threadPoolExecutor){
return;
}
log.info("{}, {},taskCount [{}], completedTaskCount [{}], activeCount [{}], queueSize [{}]",
this.getThreadNamePrefix(),
prefix,
threadPoolExecutor.getTaskCount(),
threadPoolExecutor.getCompletedTaskCount(),
threadPoolExecutor.getActiveCount(),
threadPoolExecutor.getQueue().size());
}
@Override
public void execute(Runnable task) {
showThreadPoolInfo("1. do execute");
super.execute(task);
}
@Override
public void execute(Runnable task, long startTimeout) {
showThreadPoolInfo("2. do execute");
super.execute(task, startTimeout);
}
@Override
public Future<?> submit(Runnable task) {
showThreadPoolInfo("1. do submit");
return super.submit(task);
}
@Override
public <T> Future<T> submit(Callable<T> task) {
showThreadPoolInfo("2. do submit");
return super.submit(task);
}
@Override
public ListenableFuture<?> submitListenable(Runnable task) {
showThreadPoolInfo("1. do submitListenable");
return super.submitListenable(task);
}
@Override
public <T> ListenableFuture<T> submitListenable(Callable<T> task) {
showThreadPoolInfo("2. do submitListenable");
return super.submitListenable(task);
}
}
ExcelAnalysisService
java
package com.xxx.xxx.service;
import java.io.OutputStream;
import java.io.InputStream;
import java.util.concurrent.Future;
/**
* description:excel文档分析处理类
*
* @author: lgq
* @create: 2024-04-17 11:42
*/
public interface ExcelAnalysisService {
/**
* csv文档转为excel文档
*/
Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream);
}
ExcelAnalysisServiceImpl
7.
java
package com.xxx.xxx.service.impl;
import java.io.OutputStream;
import java.nio.charset.Charset;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.write.metadata.WriteSheet;
import com.xxx.xxx.listener.EasyExcelGeneralCsvListener;
import com.xxx.xxx.service.ExcelAnalysisService;
import lombok.extern.slf4j.Slf4j;
import java.io.InputStream;
import java.util.concurrent.Future;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.AsyncResult;
import org.springframework.stereotype.Service;
/**
* description:ExcelAnalysisService实现类
*
* @author: lgq
* @create: 2024-04-17 14:53
*/
@Service
@Slf4j
public class ExcelAnalysisServiceImpl implements ExcelAnalysisService {
@Async("asyncExcelAnalysisServiceExecutor")
@Override
public Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream) {
try {
ExcelWriter writer = EasyExcel.write(outputStream).excelType(ExcelTypeEnum.XLSX).build();
EasyExcel.read(inputStream, new EasyExcelGeneralCsvListener(writer, new WriteSheet()))
.excelType(ExcelTypeEnum.CSV)
.charset(Charset.forName("UTF-8"))
.sheet()
.doRead();
writer.finish();
outputStream.flush();
} catch (Exception e) {
log.error("csv转为excel出错!", e.getMessage());
e.printStackTrace();
} finally {
if (outputStream != null) {
try {
outputStream.close();
} catch (Exception e) {
log.error("outputStream.close() -> csv转为excel出错!", e.getMessage());
e.printStackTrace();
}
}
if (inputStream != null) {
try {
inputStream.close();
} catch (Exception e) {
log.error("inputStream.close() -> csv转为excel出错!", e.getMessage());
e.printStackTrace();
}
}
}
return new AsyncResult<>("task complete!");
}
}
ExecutorConfig
8.
java
package com.xxx.xxx.config;
import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;
import com.xxx.xxx.task.VisiableThreadPoolTaskExecutor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
/**
* description:线程池配置类
*
* @author: lgq
* @create: 2024-04-17 10:28
*/
@Configuration
@Slf4j
@EnableAsync
public class ExecutorConfig {
private static int corePoolSize = Runtime.getRuntime().availableProcessors() + 1;
private static int maxPoolSize = Runtime.getRuntime().availableProcessors() + 1;
private static int queueCapacity = 100;
private static final String namePrefix = "ExcelAnalysis";
@Bean(name = "asyncExcelAnalysisServiceExecutor")
public Executor asyncExcelServiceExecutor() {
log.info("start asyncExcelAnalysisServiceExecutor----------------");
//ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
//使用可视化运行状态的线程池
ThreadPoolTaskExecutor executor = new VisiableThreadPoolTaskExecutor();
//配置核心线程数
executor.setCorePoolSize(corePoolSize);
//配置最大线程数
executor.setMaxPoolSize(maxPoolSize);
//配置队列大小
executor.setQueueCapacity(queueCapacity);
//配置线程池中的线程的名称前缀
executor.setThreadNamePrefix(namePrefix);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
//执行初始化
executor.initialize();
log.info("end asyncExcelAnalysisServiceExecutor------------");
return executor;
}
}
ExcelConstants
9.
java
package com.xxx.xxx.constants;
/**
* description:线程池配置类
*
* @author: lgq
* @create: 2024-04-17 10:28
*/
public class ExcelConstants {
public static final Integer PER_SHEET_ROW_COUNT = 100*10000;
public static final Integer PER_WRITE_ROW_COUNT = 20*10000;
public static final Integer PER_WRITE_EXCEL_ROW_COUNT = 2 * 1000;
public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_JDBC = 10*10000;
public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_MYBATIS = 5*10000;
}
配置文件
10.
```java
spring:
servlet:
multipart:
enabled: true
max-file-size: 100MB # 单个文件的最大值
max-request-size: 100MB # 上传文件总的最大值
```
pom依赖
```java
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.3.2</version>
</dependency>
```
#### 压测
- jvm参数(本地电脑,性能较差)
- -Xms2g -Xmx2g
- 导出日志
性能监控
压测结果