poi读取word中的目录大纲,导入

在做一个导入word的业务时,发现poi并不能很好的读取到文档中的目录和级别,这段代码做个记录

cpp 复制代码
 public ResponseResult importDirectory(MultipartFile file){
        try {
            InputStream inputStream = file.getInputStream();
            XWPFDocument xdoc = new XWPFDocument(inputStream);
            List<XWPFParagraph> paragraphs = xdoc.getParagraphs();
            List<ReadDto> readDtos = new ArrayList<>();
            for (XWPFParagraph paragraph : paragraphs) {
                String text = paragraph.getText();
                String titleLvl = getTitleLvl(xdoc, paragraph);
                if (StringUtil.isNotEmpty(titleLvl)) {
                    int level = Integer.valueOf(titleLvl);
//                System.out.println("text: " + text + ", titleLvl: " + titleLvl);
                    ReadDto readDto = new ReadDto();
                    readDto.setText(text);
                    readDto.setTitleLevel(level);
                    readDtos.add(readDto);
                }
            }
            int zeroCount = 0;//0出现的次数
            int oneCount = 0;//1出现的次数
            int twoCount = 0;//2出现的次数
            int threeCount = 0;//3出现的次数
            int curPoint = 0;//当前指针值
            for (int i = 0; i < readDtos.size(); i++) {
                int curLevel = readDtos.get(i).getTitleLevel();
                if (curLevel > 4) {
                    throw new BusinessException("暂不支持目录层级超过4层!!!");
                }
                if (curPoint == 0) {
                    zeroCount++;
                    curPoint = 1;
                    readDtos.get(i).setOne(zeroCount);
                } else if (curPoint == 1) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    }
                    if (curLevel == 1) {
                        curPoint++;
                        oneCount++;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    }
                } else if (curPoint == 2) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    } else if (curLevel == 1) {
                        oneCount++;
                        twoCount = 0;
                        curPoint = 2;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    } else if (curLevel == 2) {
                        curPoint = 3;
                        twoCount++;
                        threeCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                    }
                } else if (curPoint == 3) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    } else if (curLevel == 1) {
                        oneCount++;
                        curPoint = 2;
                        twoCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    } else if (curLevel == 2) {
                        curPoint = 3;
                        twoCount++;
                        threeCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                    } else if (curLevel == 3) {
                        threeCount++;
                        if (i < readDtos.size() - 1) {
                            int nextLevel = readDtos.get(i + 1).getTitleLevel();
                            if (nextLevel > 3) {
                                throw new BusinessException("暂不支持目录层级超过4层!!!");
                            }
                            if (nextLevel == 3) {
                                curPoint = 3;
                            } else if (nextLevel < 3) {
                                curPoint = nextLevel + 1;
                            }
                        }
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                        readDtos.get(i).setFou(threeCount);
                    }
                }
            }
            System.out.println(readDtos);//读取数据结果集合
            return status ? ResponseResult.success("操作成功") : ResponseResult.failure("操作失败");

        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
//        return ResponseResult.failure("操作失败");
    }

    private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
        String titleLvl = "";
        try {
            //判断该段落是否设置了大纲级别
            if (para.getCTP().getPPr().getOutlineLvl() != null) {
                return String.valueOf(para.getCTP().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式是否设置了大纲级别
            if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {
                return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式的基础样式是否设置了大纲级别
            if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
                    .getCTStyle().getPPr().getOutlineLvl() != null) {
                String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
                return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {

        }
        try {
            if (para.getStyleID() != null) {
                return para.getStyleID();
            }
        } catch (Exception e) {

        }

        return titleLvl;
    }
相关推荐
缺点内向2 天前
C#: 告别繁琐!轻松移除Word文档中的文本与图片水印
c#·自动化·word·.net
徐小夕@趣谈前端2 天前
拒绝重复造轮子?我们偏偏花365天,用Vue3写了款AI协同的Word编辑器
人工智能·编辑器·word
kingwebo'sZone2 天前
C#使用Aspose.Words把 word转成图片
前端·c#·word
科技D人生2 天前
Vue.js 学习总结(20)—— Vue-Office 实战:word、pdf、excel、ppt 多种文档的在线预览
vue.js·word·vue-pdf·stylesheet·docx-preview·vue-office
weixin_416660073 天前
技术分析:豆包生成带公式文案导出Word乱码的底层机理
人工智能·word·豆包
骆驼爱记录3 天前
Word样式库不显示的8种修复方法
word·wps·新人首发
苍煜3 天前
超简单 poi-tl 学习博客:从0到1掌握Word生成(无需模板+模板填充)
学习·word
请为小H留灯3 天前
Word论文 封面、目录、页码设置步骤!(2026详细版教程)
毕业设计·word·论文格式
星尘库4 天前
在word中怎么把段落回车替换成空 删除空行
word
weixin_416660074 天前
AI 导出 Word 不正规?10 类文档样式模板(可直接套用,含字体/字号/行距/缩进)
人工智能·word·论文·排版·数学公式