引入依赖
XML
<dependency>
<groupId>cn.afterturn</groupId>
<artifactId>easypoi-spring-boot-starter</artifactId>
</dependency>
<!-- 下面的版本需要对应上面依赖中的版本 否则可能会起冲突 -->
<!-- 下面的依赖主要是为了使用Apache原生的WordExtractor对doc后缀文件的解析 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.1</version>
</dependency>
<!-- 糊涂Api工具 -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-core</artifactId>
<version>5.8.10</version>
</dependency>
工具类封装
java
public class WordDocumentUtil {
/**
* 解析文档文件
*
* @param file 文档文件
* @return 文档内容
*/
public static String parseWord(MultipartFile file) {
String wordTxt = "";
InputStream stream = null;
try {
if (file.getOriginalFilename().endsWith(".doc")) {
stream = file.getInputStream();
// Apache Poi
WordExtractor ex = new WordExtractor(stream);
wordTxt = ex.getText();
} else if (file.getOriginalFilename().endsWith(".docx")) {
stream = file.getInputStream();
// EasyPoi
XWPFDocument document = new XWPFDocument(stream);
XWPFWordExtractor ex = new XWPFWordExtractor(document);
wordTxt = ex.getText();
}
} catch (Exception e) {
// 此处建议抛出异常 "文档解析有误"
e.printStackTrace();
} finally {
if (stream != null) {
try {
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return wordTxt;
}
/**
* 判断文档类型进行不同的分割方式
* ".doc"后缀需要以"\r\n"切割 而".docx"后缀需要以"\n"切割
*
* @param file 文件名:以file.getOriginalFilename()传入
* @param wordTxt 文件内容
* @return 内容数组
*/
public static String[] judgeType(String file, String wordTxt) {
boolean suffixFlag = file.endsWith(".doc");
return suffixFlag ? Arrays.stream(wordTxt.split("\r\n")).toArray(String[]::new)
: Arrays.stream(wordTxt.split("\n")).toArray(String[]::new);
}
/**
* 导出resources下的word模板表
*
* @param fileName 文件名
* @param response {@link HttpServletResponse}
*/
public void exportTemplate(String fileName, HttpServletResponse response) {
InputStream inputStream = null;
try {
String path = "/word/" + fileName;
inputStream = this.getClass().getResourceAsStream(path);
String newFileName = IdUtil.simpleUUID() + StrUtil.DOT + FileUtil.extName(fileName);
byte[] bytes = new byte[1024 * 1024];
// 输入流读取文件
if (inputStream != null) {
inputStream.read(bytes);
}
response.setCharacterEncoding("UTF-8");
response.setContentType("application/msword");
response.setHeader("Access-Control-Expose-Headers","Content-disposition");
response.setHeader("Content-Disposition","attachment;filename=" + newFileName);
response.getOutputStream().write(bytes);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
乱码问题
如果这里造成了读取resources下的文件返回前端乱码问题:除了HttpServletResponse响应中设置字体问题,还有可能是因为在编译期文件就已经乱码了,所以需要在pom.xml中增加以下配置。
XML
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.6</version>
<configuration>
<encoding>UTF-8</encoding>
<nonFilteredFileExtensions>
<nonFilteredFileExtension>doc</nonFilteredFileExtension>
</nonFilteredFileExtensions>
</configuration>
</plugin>
</plugins>
</build>