poi读取word中的目录大纲,导入

在做一个导入word的业务时,发现poi并不能很好的读取到文档中的目录和级别,这段代码做个记录

cpp 复制代码
 public ResponseResult importDirectory(MultipartFile file){
        try {
            InputStream inputStream = file.getInputStream();
            XWPFDocument xdoc = new XWPFDocument(inputStream);
            List<XWPFParagraph> paragraphs = xdoc.getParagraphs();
            List<ReadDto> readDtos = new ArrayList<>();
            for (XWPFParagraph paragraph : paragraphs) {
                String text = paragraph.getText();
                String titleLvl = getTitleLvl(xdoc, paragraph);
                if (StringUtil.isNotEmpty(titleLvl)) {
                    int level = Integer.valueOf(titleLvl);
//                System.out.println("text: " + text + ", titleLvl: " + titleLvl);
                    ReadDto readDto = new ReadDto();
                    readDto.setText(text);
                    readDto.setTitleLevel(level);
                    readDtos.add(readDto);
                }
            }
            int zeroCount = 0;//0出现的次数
            int oneCount = 0;//1出现的次数
            int twoCount = 0;//2出现的次数
            int threeCount = 0;//3出现的次数
            int curPoint = 0;//当前指针值
            for (int i = 0; i < readDtos.size(); i++) {
                int curLevel = readDtos.get(i).getTitleLevel();
                if (curLevel > 4) {
                    throw new BusinessException("暂不支持目录层级超过4层!!!");
                }
                if (curPoint == 0) {
                    zeroCount++;
                    curPoint = 1;
                    readDtos.get(i).setOne(zeroCount);
                } else if (curPoint == 1) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    }
                    if (curLevel == 1) {
                        curPoint++;
                        oneCount++;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    }
                } else if (curPoint == 2) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    } else if (curLevel == 1) {
                        oneCount++;
                        twoCount = 0;
                        curPoint = 2;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    } else if (curLevel == 2) {
                        curPoint = 3;
                        twoCount++;
                        threeCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                    }
                } else if (curPoint == 3) {
                    if (curLevel == 0) {
                        zeroCount++;
                        oneCount = 0;
                        twoCount = 0;
                        threeCount = 0;
                        curPoint = 1;
                        readDtos.get(i).setOne(zeroCount);
                    } else if (curLevel == 1) {
                        oneCount++;
                        curPoint = 2;
                        twoCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                    } else if (curLevel == 2) {
                        curPoint = 3;
                        twoCount++;
                        threeCount = 0;
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                    } else if (curLevel == 3) {
                        threeCount++;
                        if (i < readDtos.size() - 1) {
                            int nextLevel = readDtos.get(i + 1).getTitleLevel();
                            if (nextLevel > 3) {
                                throw new BusinessException("暂不支持目录层级超过4层!!!");
                            }
                            if (nextLevel == 3) {
                                curPoint = 3;
                            } else if (nextLevel < 3) {
                                curPoint = nextLevel + 1;
                            }
                        }
                        readDtos.get(i).setOne(zeroCount);
                        readDtos.get(i).setTwo(oneCount);
                        readDtos.get(i).setThr(twoCount);
                        readDtos.get(i).setFou(threeCount);
                    }
                }
            }
            System.out.println(readDtos);//读取数据结果集合
            return status ? ResponseResult.success("操作成功") : ResponseResult.failure("操作失败");

        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
//        return ResponseResult.failure("操作失败");
    }

    private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
        String titleLvl = "";
        try {
            //判断该段落是否设置了大纲级别
            if (para.getCTP().getPPr().getOutlineLvl() != null) {
                return String.valueOf(para.getCTP().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式是否设置了大纲级别
            if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {
                return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {
        }
        try {
            //判断该段落的样式的基础样式是否设置了大纲级别
            if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
                    .getCTStyle().getPPr().getOutlineLvl() != null) {
                String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
                return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
            }
        } catch (Exception e) {

        }
        try {
            if (para.getStyleID() != null) {
                return para.getStyleID();
            }
        } catch (Exception e) {

        }

        return titleLvl;
    }
相关推荐
Ιτ-ωoгκεг19 小时前
在 Java 中使用 Apache POI 为 Word 文档添加水印
java·word·apache·poi·水印
LiQiang3319 小时前
WPS Word中英文混杂空格和行间距不一致调整方案
word·wps
HACKNOE2 天前
python从入门到精通(二十六):python文件操作之Word全攻略(基于python-docx)
python·c#·word
inxunoffice2 天前
删除或替换 Word 中的首页、尾页以及其它指定范围的页
word
inxunoffice2 天前
批量在 Word 的指定位置插入页,如插入封面、末尾插入页面
word
勘察加熊人2 天前
python将pdf转换成word
python·pdf·word
LongtengGensSupreme3 天前
Net8 Spire最新版去水印,去页数限制,转word/pptx/ofd等
word
万物得其道者成3 天前
Vue 3 实现 HTML 内容预览功能:仿 Word 样式与内存安全实践
vue.js·html·word
懂点技术的abin3 天前
使用Word时无法粘贴,弹出错误提示:运行时错误‘53‘:文件未找到:MathPage.WLL
word
inxunoffice3 天前
批量将 Word 拆分成多个文件
word