一、提取字段:
依赖
xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
代码
java
//modelMapValue:例如"${编码}":"6666"
//inputStream:输入的文件流
public InputStream wordReplaceModel(Map<String, String> modelMapValue, InputStream inputStream) {
if (modelMapValue == null || modelMapValue.isEmpty()) {
throw new BusinessException("500", "模板值参数不能为空");
}
try (XWPFDocument xwpfDocument = new XWPFDocument(inputStream);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
// 遍历所有段落
for (XWPFParagraph paragraph : xwpfDocument.getParagraphs()) {
List<XWPFRun> runs = paragraph.getRuns();
if (runs == null) {
continue; // 跳过没有 XWPFRun 的段落
}
for (XWPFRun run : runs) {
String text = run.getText(0);
if (text == null || text.trim().isEmpty()) {
continue;
}
// 替换文本
for (Map.Entry<String, String> entry : modelMapValue.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
if (text.contains(key)) {
text = text.replace(key, value);
run.setText(text, 0);
break; // 替换后跳出内层循环
}
}
}
}
xwpfDocument.write(outputStream);
return new ByteArrayInputStream(outputStream.toByteArray());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
二、word转pdf :安装libreOffice,安装中文字体,java调用
1、安装后路径如下:安装就不赘述了,有很多教程
2、java代码:因为很多场景是上传一个word文件,最后将转换好的文件保存到文件服务器,比如minio,所以入参出参中都包含输入流,可以适配很多场景。
输入一个word文档流,保存一个临时文件,在这个临时文件下生成一个转换好的PDF,名字和临时的word文件一样,转换成功以后读取转换后的流。最后删除临时文件。
java
@Getter
@AllArgsConstructor
public class FileDTO {
private InputStream inputStream;
private String fileName;
}
java
//wordInputStream:输入的流,比如从接口上传的文件
//fileName:上传的文件名
public FileDTO wordToPdf(InputStream wordInputStream, String fileName) {
FileDTO fileDTO = null;
File tempWordFile = null;
File pdfFile = null;
try {
// 创建临时文件
tempWordFile = File.createTempFile("tempContract", ".docx");
// 将输入流写入临时 Word 文件
try (FileOutputStream fos = new FileOutputStream(tempWordFile);
InputStream in = wordInputStream) {
byte[] buffer = new byte[1024];
int length;
while ((length = in.read(buffer)) != -1) {
fos.write(buffer, 0, length);
}
}
if (!tempWordFile.exists()) {
throw new BusinessException("500", "输入的word文件为空");
}
if (tempWordFile.length() == 0) {
throw new BusinessException("500", "输入的word文件为空");
}
log.info("输出目录" + tempWordFile.getParent());
// 构建转换命令**,/usr/bin/libreoffice 这个是我截图的路径,变成自己的就好了**
String command = String.format("/usr/bin/libreoffice --headless --convert-to pdf --outdir %s %s",
tempWordFile.getParent(), tempWordFile.getAbsolutePath());
// 执行命令
Process process = Runtime.getRuntime().exec(command);
int exitCode = process.waitFor();
log.info("命令执行结果是" + exitCode);
// 检查错误流
if (exitCode != 0) {
StringBuilder errorMessage = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
String line;
while ((line = reader.readLine()) != null) {
errorMessage.append(line).append("\n");
}
}
throw new BusinessException("500", "转换失败 code: " + exitCode + ". Error: " + errorMessage);
}
String pdfFilePath = tempWordFile.getAbsolutePath().replace(".docx", ".pdf");
pdfFile = new File(pdfFilePath);
// 检查 PDF 文件是否存在
if (pdfFile.exists()) {
log.info("找到 转换后的PDF 文件: " + pdfFile.getAbsolutePath());
FileInputStream fileInputStream = new FileInputStream(pdfFile);
log.info("成功读取 PDF 文件流");
fileDTO = new FileDTO(fileInputStream, changeFileExtensionToPdf(fileName));
}
} catch (Exception e) {
log.error("Error occurred: ", e);
return null;
} finally {
// 清理临时文件
if (tempWordFile != null) {
tempWordFile.delete();
log.info("删除word临时文件成功");
}
if (pdfFile != null) {
pdfFile.delete();
log.info("删除PDF临时文件成功");
}
}
return fileDTO;
}