使用 Apache POI 和 iTextPDF 将 Word 转换为 PDF 需要分两步操作:先用 POI 读取 Word 内容,再用 iText 生成 PDF。
apache poi官方文档:Apache POI™ - Javadocs
以下是详细的代码实现示例:
环境准备
在 pom.xml
中添加依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>3.0.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.6</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
完整代码示例
1.入口类
package org.example;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.*;
import java.util.List;
/**
* 用poi和itextpdf实现docx转pdf方法
*
* @author lyl
* @version v1.0
* @since 2025/4/11
*/
public class Main {
public static void main(String[] args) throws IOException {
// 打开 docx 文件
FileInputStream fis = new FileInputStream("D:\\testaa\\11.docx");
XWPFDocument doc=new XWPFDocument(fis);
//处理body
List<IBodyElement> bodyElementList=doc.getBodyElements();
Document pdfDoc = createPdf("D:\\testaa\\22.pdf");
try {
BodyElementUtil.createBody(bodyElementList,pdfDoc);
} catch (DocumentException e) {
throw new RuntimeException(e);
}
doc.close();
fis.close();
}
/**
* 生成pdf文档
*
* @param pdfFilePath
* @return
*/
public static Document createPdf(String pdfFilePath) {
try {
// 创建 PDF 文档
Document pdfDoc = new Document();
File htmlFile = new File(pdfFilePath);
if (!htmlFile.exists()) {
String dic = htmlFile.getParent();
if (!new File(dic).exists()) {
new File(dic).mkdirs();
}
htmlFile.createNewFile();
}
PdfWriter.getInstance(pdfDoc, new FileOutputStream(pdfFilePath));
pdfDoc.open();
return pdfDoc;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
2.处理body元素的工具类
package org.example;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import java.util.List;
/**
* 处理body元素的工具类
*
* @author lyl
* @version v1.0
* @since 2025/4/11
*/
public class BodyElementUtil {
/**
* 转换成itextpdf的格式
*
* @param bodyElementList
*/
public static void createBody(List<IBodyElement> bodyElementList, Document pdfDoc) throws DocumentException {
for (IBodyElement bodyElement : bodyElementList) {
System.out.println(bodyElement.getElementType().name());
switch (bodyElement.getElementType()) {
case PARAGRAPH:
List<Paragraph> paragraphs = ParagraphUtil.createParagraph(bodyElement);
for (Paragraph paragraph : paragraphs) {
pdfDoc.add(paragraph);
}
break;
case TABLE:
List<XWPFTable> tables = bodyElement.getBody().getTables();
PdfPTable allTable=new PdfPTable(1);
for(XWPFTable doctable:tables) {
PdfPTable pdfTable = TableUtil.createTable(doctable);
PdfPCell cell = new PdfPCell(pdfTable);
cell.setBorder(0);
allTable.addCell(cell);
}
pdfDoc.add(allTable);
break;
default:
break;
}
}
pdfDoc.close();
System.out.println("结束:");
}
}
3.处理段落,读取文字大小,字体类型转换成pdf的字体,文字大小
package org.example;
import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 段落文本处理类
*
* @author lyl
* @version v1.0
* @since 2025/4/11
*/
public class ParagraphUtil {
public static List<Paragraph> createParagraph(IBodyElement bodyElement) {
System.out.println("创建段落");
List<XWPFParagraph> paragraphs = bodyElement.getBody().getParagraphs();
List<Paragraph> paragraphsList = new ArrayList<>();
for (int i = 0; i < paragraphs.size(); i++) {
Paragraph p = new Paragraph();
XWPFParagraph paragraph = paragraphs.get(i);
System.out.println("段落内容:" + paragraph.getText());
System.out.println("part:" + paragraph.getPart().getPackagePart().getContentType());
System.out.println("part name:" + paragraph.getPart().getPackagePart().getPartName());
ParagraphAlignment alignment = paragraph.getAlignment();
System.out.println("段落对齐方式 name:" + alignment.name());
System.out.println("段落对齐方式 value:" + alignment.getValue());
System.out.println("style" + paragraph.getStyle());
System.out.println("文字对齐方式" + paragraph.getFontAlignment());
paragraphsList.add(getPhase(paragraph));
}
return paragraphsList;
}
/**
* 单元格解析
*
* @param paragraph
* @return
*/
private static Paragraph getPhase(XWPFParagraph paragraph) {
Paragraph pa = new Paragraph();
// 获取段落对齐方式
ParagraphAlignment alignment = paragraph.getAlignment();
// 遍历段落中的所有文本
for (XWPFRun run : paragraph.getRuns()) {
if (null == run) {
continue;
}
//设置图片
List<Image> images = setPicture(run);
if (null != images) {
for (Image image : images) {
pa.add(image);
}
}
if (null == run.getText(0)) {
return null;
}
Chunk chunk = new Chunk(run.getText(0), setFont(run));
pa.add(chunk);
}
if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {
pa.setAlignment(Element.ALIGN_CENTER);
} else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {
pa.setAlignment(Element.ALIGN_RIGHT);
} else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {
pa.setAlignment(Element.ALIGN_LEFT);
}
return pa;
}
/**
* 设置图片
*
* @param run
*/
private static List<Image> setPicture(XWPFRun run) {
if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {
List<Image> imagelist = new ArrayList<>();
for (XWPFPicture pic : run.getEmbeddedPictures()) {
try {
// Paragraph p = new Paragraph();
// p.add(getimage(pic.getPictureData()));
// p.setAlignment(Element.ALIGN_CENTER);
Image image = getimage(pic.getPictureData());
imagelist.add(image);
} catch (BadElementException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return imagelist;
}
return null;
}
/**
* 设置字体
*
* @param run
* @return
*/
private static Font setFont(XWPFRun run) {
try {
BaseFont bf = null;
bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font font = null;
if (run.isBold()) {
font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
} else {
font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);
if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {
//设置为黑体
font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
}
}
return font;
} catch (DocumentException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* 读取图片
*
* @param picdata
* @return
* @throws BadElementException
* @throws IOException
*/
private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {
byte[] bytepic = picdata.getData();
Image imag = Image.getInstance(bytepic);
return imag;
}
}
4.处理表格,包含行合并,列合并,以及表格里有图片
package org.example;
import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
/**
* 表格文本处理类
*
* @author lyl
* @version v1.0
* @since 2025/4/11
*/
public class TableUtil {
public static PdfPTable createTable(XWPFTable doctable) {
// 获取表格的列数
int cols = doctable.getRow(0).getTableCells().size();
PdfPTable allTable=new PdfPTable(1);
for (int i = 0; i < doctable.getRows().size(); i++) {
XWPFTableRow row = doctable.getRows().get(i);
//如果这一行都没有边框样式,则合并单元格
int noBorder = 0;
Set<PdfPCell> cells = new HashSet<>();
for (XWPFTableCell cell : row.getTableCells()) {
// 获取单元格背景颜色
CTTcPr cellPr = cell.getCTTc().getTcPr();
CTTcBorders cellBorders = null;
if (null != cellPr) {
// 获取单元格边框
cellBorders = cellPr.getTcBorders();
}
PdfPCell pdfCell = getPhase(cell);
if (null == pdfCell) {
pdfCell = new PdfPCell(new Paragraph(cell.getText()));
}
if (cellBorders != null) {
pdfCell = setBorder(pdfCell, cellBorders);
} else {
pdfCell.setBorder(0);
}
if (cellBorders == null) {
noBorder++;
}
cells.add(pdfCell);
}
PdfPTable ptable = new PdfPTable(cells.size());
//检 查是否需要合并
if (noBorder == cells.size()) {
//从第一格开始合并单元格
int number=0;
for (PdfPCell cell : cells) {
if(number==0){
//合并列格
cell.setColspan(cols);
//合并行
//cell.setRowspan(2);
}
ptable.addCell(cell);
}
} else {
for (PdfPCell cell : cells) {
ptable.addCell(cell);
}
}
PdfPCell lastCell= new PdfPCell(ptable);
lastCell.setBorder(0);
allTable.addCell(lastCell);
}
return allTable;
}
/**
* 单元格解析
*
* @param cell
* @return
*/
private static PdfPCell getPhase(XWPFTableCell cell) {
PdfPCell cell1 = new PdfPCell();
// 遍历单元格中的所有段落
for (XWPFParagraph paragraph : cell.getParagraphs()) {
Paragraph pdfParaghs = new Paragraph(paragraph.getText());
// 获取段落对齐方式
ParagraphAlignment alignment = paragraph.getAlignment();
if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {
pdfParaghs.setAlignment(Element.ALIGN_CENTER);
} else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {
pdfParaghs.setAlignment(Element.ALIGN_RIGHT);
} else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {
pdfParaghs.setAlignment(Element.ALIGN_LEFT);
}
// 遍历段落中的所有文本
for (XWPFRun run : paragraph.getRuns()) {
if (null == run) {
continue;
}
//设置图片
Set<Image> images = setPicture(run);
if (null != images) {
for (Image image : images) {
cell1.addElement(image);
}
} else if (null == run.getText(0)) {
// pdfParaghs.add(new Paragraph(""));
cell1.addElement(new Paragraph(""));
} else {
Chunk chunk = new Chunk(run.getText(0), setFont(run));
// pdfParaghs.add(chunk);
cell1.addElement(chunk);
}
}
}
return cell1;
}
/**
* 设置字体样式连框
*
* @param cellBorders
* @return
* @throws DocumentException
* @throws IOException
*/
private static PdfPCell setBorder(PdfPCell cell1, CTTcBorders cellBorders) {
if (null == cell1) {
cell1 = new PdfPCell();
}
cell1.setBorder(0);
if (null == cellBorders) {
return cell1;
}
//System.out.println("text:::"+text);
//printCellBorder( cellBorders);
if (null != cellBorders.getBottom()) {
cell1.setBorderColorBottom(BaseColor.BLACK);
cell1.setBorderWidthBottom(1);
}
if (null != cellBorders.getTop()) {
cell1.setBorderColorTop(BaseColor.BLACK);
cell1.setBorderWidthTop(1);
}
if (null != cellBorders.getLeft()) {
cell1.setBorderColorLeft(BaseColor.BLACK);
cell1.setBorderWidthLeft(1);
}
if (null != cellBorders.getRight()) {
cell1.setBorderWidthRight(1);
cell1.setBorderColorRight(BaseColor.BLACK);
}
return cell1;
}
/**
* 设置图片
*
* @param run
*/
private static Set<Image> setPicture(XWPFRun run) {
if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {
Set<Image> images = new HashSet<>();
for (XWPFPicture pic : run.getEmbeddedPictures()) {
try {
// Paragraph p = new Paragraph();
// p.add(getimage(pic.getPictureData()));
// p.setAlignment(Element.ALIGN_CENTER);
images.add(getimage(pic.getPictureData()));
} catch (BadElementException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return images;
}
return null;
}
/**
* 设置字体
*
* @param run
* @return
*/
private static Font setFont(XWPFRun run) {
try {
BaseFont bf = null;
bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font font = null;
if (run.isBold()) {
font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
} else {
font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);
if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {
//设置为黑体
font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
}
}
return font;
} catch (DocumentException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* 读取图片
*
* @param picdata
* @return
* @throws BadElementException
* @throws IOException
*/
private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {
byte[] bytepic = picdata.getData();
Image imag = Image.getInstance(bytepic);
return imag;
}
}