使用poi+itextpdf把word转成pdf

使用 Apache POI 和 iTextPDF 将 Word 转换为 PDF 需要分两步操作:先用 POI 读取 Word 内容,再用 iText 生成 PDF。

apache poi官方文档:Apache POI™ - Javadocs

以下是详细的代码实现示例:


环境准备

pom.xml 中添加依赖:

复制代码
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>${poi.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>${poi.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>${poi.version}</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>html2pdf</artifactId>
            <version>3.0.2</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.6</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext-asian</artifactId>
            <version>5.2.0</version>
        </dependency>

完整代码示例

1.入口类
复制代码
package org.example;


import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.*;
import java.util.List;

/**
 * 用poi和itextpdf实现docx转pdf方法
 *
 * @author lyl
 * @version v1.0
 * @since 2025/4/11
 */
public class Main {
    public static void main(String[] args) throws IOException {
        // 打开 docx 文件
        FileInputStream fis = new FileInputStream("D:\\testaa\\11.docx");
        XWPFDocument doc=new XWPFDocument(fis);
        //处理body
        List<IBodyElement> bodyElementList=doc.getBodyElements();
        Document pdfDoc = createPdf("D:\\testaa\\22.pdf");
        try {
            BodyElementUtil.createBody(bodyElementList,pdfDoc);
        } catch (DocumentException e) {
            throw new RuntimeException(e);
        }
      doc.close();
         fis.close();
    }

    /**
     * 生成pdf文档
     *
     * @param pdfFilePath
     * @return
     */
    public static Document createPdf(String pdfFilePath) {
        try {
            // 创建 PDF 文档
           Document pdfDoc = new Document();
            File htmlFile = new File(pdfFilePath);
            if (!htmlFile.exists()) {
                String dic = htmlFile.getParent();
                if (!new File(dic).exists()) {
                    new File(dic).mkdirs();
                }
                htmlFile.createNewFile();
            }
            PdfWriter.getInstance(pdfDoc, new FileOutputStream(pdfFilePath));
            pdfDoc.open();
            return pdfDoc;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
}
2.处理body元素的工具类
复制代码
package org.example;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.IBodyElement;

import org.apache.poi.xwpf.usermodel.XWPFTable;

import java.util.List;

/**
 * 处理body元素的工具类
 *
 * @author lyl
 * @version v1.0
 * @since 2025/4/11
 */
public class BodyElementUtil {

    /**
     * 转换成itextpdf的格式
     *
     * @param bodyElementList
     */
    public static void createBody(List<IBodyElement> bodyElementList, Document pdfDoc) throws DocumentException {
        for (IBodyElement bodyElement : bodyElementList) {
            System.out.println(bodyElement.getElementType().name());
            switch (bodyElement.getElementType()) {
                case PARAGRAPH:
                    List<Paragraph> paragraphs = ParagraphUtil.createParagraph(bodyElement);
                    for (Paragraph paragraph : paragraphs) {
                        pdfDoc.add(paragraph);
                    }
                    break;
                case TABLE:
                    List<XWPFTable> tables =   bodyElement.getBody().getTables();
                    PdfPTable allTable=new PdfPTable(1);
                    for(XWPFTable doctable:tables) {
                        PdfPTable pdfTable = TableUtil.createTable(doctable);
                        PdfPCell cell = new PdfPCell(pdfTable);
                        cell.setBorder(0);
                       allTable.addCell(cell);
                    }
                    pdfDoc.add(allTable);
                    break;
                default:
                    break;
            }
        }
         pdfDoc.close();
        System.out.println("结束:");
    }


}
3.处理段落,读取文字大小,字体类型转换成pdf的字体,文字大小
复制代码
package org.example;

import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * 段落文本处理类
 *
 * @author lyl
 * @version v1.0
 * @since 2025/4/11
 */
public class ParagraphUtil {
    public static List<Paragraph> createParagraph(IBodyElement bodyElement) {
        System.out.println("创建段落");
        List<XWPFParagraph> paragraphs = bodyElement.getBody().getParagraphs();
        List<Paragraph> paragraphsList = new ArrayList<>();
        for (int i = 0; i < paragraphs.size(); i++) {
            Paragraph p = new Paragraph();
            XWPFParagraph paragraph = paragraphs.get(i);
            System.out.println("段落内容:" + paragraph.getText());
            System.out.println("part:" + paragraph.getPart().getPackagePart().getContentType());
            System.out.println("part name:" + paragraph.getPart().getPackagePart().getPartName());
            ParagraphAlignment alignment = paragraph.getAlignment();
            System.out.println("段落对齐方式 name:" + alignment.name());
            System.out.println("段落对齐方式 value:" + alignment.getValue());
            System.out.println("style" + paragraph.getStyle());
            System.out.println("文字对齐方式" + paragraph.getFontAlignment());

            paragraphsList.add(getPhase(paragraph));
        }
        return paragraphsList;

    }

    /**
     * 单元格解析
     *
     * @param paragraph
     * @return
     */
    private static Paragraph getPhase(XWPFParagraph paragraph) {
        Paragraph pa = new Paragraph();
        // 获取段落对齐方式
        ParagraphAlignment alignment = paragraph.getAlignment();
        // 遍历段落中的所有文本
        for (XWPFRun run : paragraph.getRuns()) {
            if (null == run) {
                continue;
            }
            //设置图片
            List<Image> images = setPicture(run);
            if (null != images) {
                for (Image image : images) {
                    pa.add(image);
                }
            }
            if (null == run.getText(0)) {
                return null;
            }
            Chunk chunk = new Chunk(run.getText(0), setFont(run));
            pa.add(chunk);
        }

        if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {
            pa.setAlignment(Element.ALIGN_CENTER);
        } else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {
            pa.setAlignment(Element.ALIGN_RIGHT);
        } else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {
            pa.setAlignment(Element.ALIGN_LEFT);
        }

        return pa;
    }


    /**
     * 设置图片
     *
     * @param run
     */
    private static List<Image> setPicture(XWPFRun run) {
        if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {

            List<Image> imagelist = new ArrayList<>();
            for (XWPFPicture pic : run.getEmbeddedPictures()) {
                try {
//                    Paragraph p = new Paragraph();
//                    p.add(getimage(pic.getPictureData()));
//                    p.setAlignment(Element.ALIGN_CENTER);
                    Image image = getimage(pic.getPictureData());
                    imagelist.add(image);
                } catch (BadElementException e) {
                    throw new RuntimeException(e);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return imagelist;
        }
        return null;
    }

    /**
     * 设置字体
     *
     * @param run
     * @return
     */
    private static Font setFont(XWPFRun run) {
        try {
            BaseFont bf = null;
            bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
            Font font = null;
            if (run.isBold()) {
                font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
            } else {
                font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);
                if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {
                    //设置为黑体
                    font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
                }
            }
            return font;
        } catch (DocumentException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * 读取图片
     *
     * @param picdata
     * @return
     * @throws BadElementException
     * @throws IOException
     */
    private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {
        byte[] bytepic = picdata.getData();
        Image imag = Image.getInstance(bytepic);
        return imag;
    }

}

4.处理表格,包含行合并,列合并,以及表格里有图片

复制代码
package org.example;

import com.itextpdf.text.*;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcBorders;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/**
 * 表格文本处理类
 *
 * @author lyl
 * @version v1.0
 * @since 2025/4/11
 */
public class TableUtil {
    public static PdfPTable createTable(XWPFTable doctable) {
        // 获取表格的列数
        int cols = doctable.getRow(0).getTableCells().size();

        PdfPTable allTable=new PdfPTable(1);
        for (int i = 0; i < doctable.getRows().size(); i++) {

            XWPFTableRow row = doctable.getRows().get(i);
            //如果这一行都没有边框样式,则合并单元格
            int noBorder = 0;
            Set<PdfPCell> cells = new HashSet<>();
            for (XWPFTableCell cell : row.getTableCells()) {
                // 获取单元格背景颜色
                CTTcPr cellPr = cell.getCTTc().getTcPr();
                CTTcBorders cellBorders = null;
                if (null != cellPr) {
                    // 获取单元格边框
                    cellBorders = cellPr.getTcBorders();
                }
                PdfPCell  pdfCell = getPhase(cell);

                if (null == pdfCell) {
                    pdfCell = new PdfPCell(new Paragraph(cell.getText()));
                }
                if (cellBorders != null) {
                    pdfCell = setBorder(pdfCell, cellBorders);
                } else {
                    pdfCell.setBorder(0);
                }
                if (cellBorders == null) {
                    noBorder++;
                }
                cells.add(pdfCell);
            }
            PdfPTable ptable = new PdfPTable(cells.size());
            //检 查是否需要合并
            if (noBorder == cells.size()) {
                //从第一格开始合并单元格
                 int number=0;
                for (PdfPCell cell : cells) {
                    if(number==0){
                        //合并列格
                        cell.setColspan(cols);
                        //合并行
                        //cell.setRowspan(2);
                    }
                    ptable.addCell(cell);
                }
            } else {
                for (PdfPCell cell : cells) {
                    ptable.addCell(cell);
                }

            }
            PdfPCell lastCell= new PdfPCell(ptable);
            lastCell.setBorder(0);
            allTable.addCell(lastCell);
        }
        return allTable;
    }


    /**
     * 单元格解析
     *
     * @param cell
     * @return
     */
    private static PdfPCell getPhase(XWPFTableCell cell) {

         PdfPCell cell1 = new PdfPCell();
        // 遍历单元格中的所有段落
        for (XWPFParagraph paragraph : cell.getParagraphs()) {
            Paragraph pdfParaghs = new Paragraph(paragraph.getText());
            // 获取段落对齐方式
            ParagraphAlignment alignment = paragraph.getAlignment();
            if (alignment.getValue() == ParagraphAlignment.CENTER.getValue()) {
                pdfParaghs.setAlignment(Element.ALIGN_CENTER);
            } else if (alignment.getValue() == ParagraphAlignment.RIGHT.getValue()) {
                pdfParaghs.setAlignment(Element.ALIGN_RIGHT);
            } else if (alignment.getValue() == ParagraphAlignment.LEFT.getValue()) {
                pdfParaghs.setAlignment(Element.ALIGN_LEFT);
            }

            // 遍历段落中的所有文本
            for (XWPFRun run : paragraph.getRuns()) {
                if (null == run) {
                    continue;
                }
                //设置图片
                Set<Image> images = setPicture(run);
                if (null != images) {
                    for (Image image : images) {
                        cell1.addElement(image);
                    }
                } else if (null == run.getText(0)) {
                   // pdfParaghs.add(new Paragraph(""));
                    cell1.addElement(new Paragraph(""));
                } else {
                    Chunk chunk = new Chunk(run.getText(0), setFont(run));
                   // pdfParaghs.add(chunk);
                    cell1.addElement(chunk);
                }

            }


        }
        return cell1;
    }


    /**
     * 设置字体样式连框
     *
     * @param cellBorders
     * @return
     * @throws DocumentException
     * @throws IOException
     */
    private static PdfPCell setBorder(PdfPCell cell1, CTTcBorders cellBorders) {
        if (null == cell1) {
            cell1 = new PdfPCell();
        }
        cell1.setBorder(0);
        if (null == cellBorders) {
            return cell1;
        }

        //System.out.println("text:::"+text);
        //printCellBorder(  cellBorders);
        if (null != cellBorders.getBottom()) {
            cell1.setBorderColorBottom(BaseColor.BLACK);
            cell1.setBorderWidthBottom(1);
        }
        if (null != cellBorders.getTop()) {
            cell1.setBorderColorTop(BaseColor.BLACK);
            cell1.setBorderWidthTop(1);
        }
        if (null != cellBorders.getLeft()) {
            cell1.setBorderColorLeft(BaseColor.BLACK);
            cell1.setBorderWidthLeft(1);
        }
        if (null != cellBorders.getRight()) {
            cell1.setBorderWidthRight(1);
            cell1.setBorderColorRight(BaseColor.BLACK);
        }
        return cell1;
    }

    /**
     * 设置图片
     *
     * @param run
     */
    private static Set<Image> setPicture(XWPFRun run) {
        if (null != run.getEmbeddedPictures() && run.getEmbeddedPictures().size() > 0) {

            Set<Image> images = new HashSet<>();
            for (XWPFPicture pic : run.getEmbeddedPictures()) {
                try {
//                    Paragraph p = new Paragraph();
//                    p.add(getimage(pic.getPictureData()));
//                    p.setAlignment(Element.ALIGN_CENTER);
                    images.add(getimage(pic.getPictureData()));
                } catch (BadElementException e) {
                    throw new RuntimeException(e);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return images;
        }
        return null;
    }

    /**
     * 设置字体
     *
     * @param run
     * @return
     */
    private static Font setFont(XWPFRun run) {
        try {
            BaseFont bf = null;
            bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
            Font font = null;
            if (run.isBold()) {
                font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
            } else {
                font = new Font(bf, run.getFontSize(), Font.NORMAL, BaseColor.BLACK);
                if (null != run.getFontFamily() && run.getFontFamily().equals("黑体")) {
                    //设置为黑体
                    font = new Font(bf, run.getFontSize(), Font.BOLD, BaseColor.BLACK);
                }
            }
            return font;
        } catch (DocumentException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * 读取图片
     *
     * @param picdata
     * @return
     * @throws BadElementException
     * @throws IOException
     */
    private static Image getimage(XWPFPictureData picdata) throws BadElementException, IOException {
        byte[] bytepic = picdata.getData();
        Image imag = Image.getInstance(bytepic);
        return imag;
    }
}
相关推荐
java_强哥9 小时前
springboot+tabula解析pdf中的表格数据
spring boot·pdf·解析pdf表格
HarrisHaword10 小时前
JAVA 导出 word
java·开发语言·word
zooKevin11 小时前
前端实现docx格式word文件预览,可以兼容原生、vue2、以及uni-app 项目,详细步骤。
前端·uni-app·word·docx
我最厉害。,。13 小时前
XSS 跨站&SVG&PDF&Flash&MXSS&UXSS&配合上传&文件添加脚本
android·pdf·xss
java_强哥18 小时前
SpringBoot集成阿里云文档格式转换实现pdf转换word,excel
spring boot·阿里云·pdf
yyywxk1 天前
Word / WPS 页面顶部标题 段前间距 失效 / 不起作用 / 不显示,标题紧贴页眉 问题及解决
word·wps
段ヤシ.1 天前
.pdf,.docx,.doc文档在一页纸上显示4页内容(详细步骤)
pdf·文档·一张纸显示4页内容
hello_simon1 天前
免费在线文档工具,在线PDF添加空白页,免费在任意位置插入空白页,多样化的文件处理
pdf·html·excel·pdf转html·excel转pdf格式
_Hannibal_2 天前
通过百度OCR在线API识别带水印扫描图片文字
pdf·ocr