将doc文件转为pdf
itextpdf+poi 方案,针对doc文件,非docx
缺点,不支持负责的格式,不支持图标,图片等。
需要的依赖
xml
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.2</version>
</dependency>
java代码
java
import com.google.common.io.Files;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import java.io.*;
/**
* @Description 将doc文件转换为pdf
*/
@Slf4j
public class DocToPdfUtil {
public static void toPdf(File orgFile, String outputFile) throws IOException {
File file = new File(outputFile);
try {
ByteArrayOutputStream outputStream = toPdf(orgFile);
Files.write(outputStream.toByteArray(), new File(outputFile));
} catch (Exception e) {
e.printStackTrace();
}
}
public static ByteArrayOutputStream toPdf(File file) throws IOException, DocumentException {
Document pdfDoc = null;
WordExtractor extractor = null;
try {
// 读取DOC文件
HWPFDocument doc = new HWPFDocument(new FileInputStream(file));
extractor = new WordExtractor(doc);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
// 创建PDF文档
pdfDoc = new Document();
PdfWriter.getInstance(pdfDoc, outputStream);
pdfDoc.open();
// 写入内容
String[] paragraphs = extractor.getParagraphText();
// 加载中文字体
BaseFont baseFont = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
Font font = new Font(baseFont, 12);
for (String text : paragraphs) {
Paragraph p = new Paragraph(text, font);
pdfDoc.add(p);
}
log.info("==========doc转换pdf成功===============" + file.getAbsolutePath());
return outputStream;
} catch (Exception e) {
log.info("==========doc转换pdf失败===============" + file.getAbsolutePath());
e.printStackTrace();
throw e;
} finally {
// 关闭资源
if (null != pdfDoc) {
pdfDoc.close();
}
if (null != extractor) {
extractor.close();
}
}
}
}
测试用例
java
DocToPdfUtil.toPdf(new File("C:\\mydoc\\明朝那点事.doc"), "C:\\mydoc\\明朝那点事" + System.currentTimeMillis() + ".pdf");

遇到的问题
转换的文件不显示中文
原因:字体问题,请设置字体。
java
// 加载中文字体
BaseFont baseFont = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
Font font = new Font(baseFont, 12);