使用 AsposeWord 向 word 中的文字添加标签

背景

最近做套打服务,产品想感知到点击模板中系统插入的占位符时,需要在前端能够弹出属性设置页面。 在线编辑服务使用的是 onlyoffice, 此文不讨论 onlyoffice 如何感知到点击哪个标签的。仅讨论功能完成后如何将历史的模板升级到带标签的模板。

相关工具

在线编辑服务:onlyoffice 社区版 ------免费开源 文档处理工具:Aspose Word For Java ------ 需要购买,也有破解版。

单元测试

服务中插入的占位符是 ${xxx.xxx} 格式的,所以测试用例中根据此进行书签的添加。有需要的话可以根据自身条件编写不同的日志。

java 复制代码
    @Test
    public void testAddBookMark() {
        try (InputStream resourceAsStream = DocumentTest.class.getClassLoader().getResourceAsStream("历史模板.docx")) {
            Document doc = new Document(resourceAsStream);
            // 需要添加书签的目标文本列表
            Map<String, String> bookmarkData = new LinkedHashMap<>();
            processDocument(doc, bookmarkData);

            bookmarkData.forEach((k, v) -> System.out.println(k + " -> " + v));
            // 保存文档
            doc.save("E:\\project\\java\\ggfw-upgrade-tool-op\\upgrade-service\\src\\test\\resources\\历史模板升级后.docx");

            for (Map.Entry<String, String> entry : bookmarkData.entrySet()) {
                System.out.println("UUID: " + entry.getKey() + ", 标签文本: " + entry.getValue());
            }
        } catch (Exception e) {
            log.error("error:", e);
        }
    }

    private static final Pattern TAG_PATTERN = Pattern.compile("\\$\\{.*?}");

    private static void processDocument(Document doc, Map<String, String> bookmarkData) throws Exception {
        List<RunInfo> allRuns = collectRunInfo(doc);
        String fullText = buildFullText(allRuns);

        Matcher matcher = TAG_PATTERN.matcher(fullText);
        while (matcher.find()) {
            String target = matcher.group();
            int globalStart = matcher.start();
            int globalEnd = matcher.end();

            List<RunSegment> segments = locateSegments(allRuns, globalStart, globalEnd);
            if (!segments.isEmpty()) {
                String bookmarkName = UUID.randomUUID().toString();
                applyBookmarks(doc, segments, bookmarkName);
                bookmarkData.put(bookmarkName, target);
            }
        }
    }

    // 收集所有Run信息
    private static List<RunInfo> collectRunInfo(Document doc) throws Exception {
        List<RunInfo> runs = new ArrayList<>();
        int currentPosition = 0;

        for (Paragraph para : (Iterable<Paragraph>) doc.getChildNodes(NodeType.PARAGRAPH, true)) {
            for (Run run : (Iterable<Run>) para.getChildNodes(NodeType.RUN, true)) {
                String text = run.getText();
                runs.add(new RunInfo(run, currentPosition, text.length()));
                currentPosition += text.length();
            }
        }
        return runs;
    }

    // 构建完整文本
    private static String buildFullText(List<RunInfo> runs) {
        StringBuilder sb = new StringBuilder();
        for (RunInfo ri : runs) {
            sb.append(ri.run.getText());
        }
        return sb.toString();
    }

    // 定位目标文本所在的Run段
    private static List<RunSegment> locateSegments(List<RunInfo> allRuns, int globalStart, int globalEnd) {
        List<RunSegment> segments = new ArrayList<>();
        int remainingLength = globalEnd - globalStart;
        int currentGlobal = globalStart;

        for (RunInfo ri : allRuns) {
            int runStart = ri.globalStart;
            int runEnd = ri.globalStart + ri.length;

            if (runEnd <= currentGlobal) continue;
            if (runStart >= globalEnd) break;

            int localStart = Math.max(currentGlobal - runStart, 0);
            int localEnd = Math.min(localStart + remainingLength, ri.length);

            segments.add(new RunSegment(ri.run, localStart, localEnd));

            remainingLength -= (localEnd - localStart);
            currentGlobal += (localEnd - localStart);

            if (remainingLength <= 0) break;
        }
        return segments;
    }

    // 应用书签到文档
    private static void applyBookmarks(Document doc, List<RunSegment> segments, String bookmarkName) throws Exception {
        List<Node> newNodes = new ArrayList<>();
        Run firstRun = segments.get(0).run;
        CompositeNode parent = firstRun.getParentNode();

        // 处理第一个Run
        RunSegment firstSeg = segments.get(0);
        splitRun(firstSeg.run, firstSeg.start, firstSeg.end, newNodes, true);

        // 处理中间Run
        for (int i = 1; i < segments.size() - 1; i++) {
            RunSegment seg = segments.get(i);
            splitRun(seg.run, seg.start, seg.end, newNodes, false);
        }

        // 处理最后一个Run
        if (segments.size() > 1) {
            RunSegment lastSeg = segments.get(segments.size() - 1);
            splitRun(lastSeg.run, lastSeg.start, lastSeg.end, newNodes, false);
        }

        // 插入书签
        BookmarkStart start = new BookmarkStart(doc, bookmarkName);
        BookmarkEnd end = new BookmarkEnd(doc, bookmarkName);

        Node targetStart = newNodes.get(1); // 第一个目标段
        Node targetEnd = newNodes.get(newNodes.size() - 1); // 最后一个目标段

        parent.insertBefore(start, targetStart);
        parent.insertAfter(end, targetEnd);
    }

    // 分割Run并保留格式
    private static void splitRun(Run original, int start, int end, List<Node> newNodes, boolean isFirst) {
        String text = original.getText();
        CompositeNode parent = original.getParentNode();

        // 创建前段
        if (isFirst && start > 0) {
            Run before = (Run) original.deepClone(true);
            before.setText(text.substring(0, start));
            newNodes.add(before);
        }

        // 创建目标段
        Run target = (Run) original.deepClone(true);
        target.setText(text.substring(start, end));
        newNodes.add(target);

        // 创建后段
        if (end < text.length()) {
            Run after = (Run) original.deepClone(true);
            after.setText(text.substring(end));
            newNodes.add(after);
        }

        // 替换原始节点
        for (Node node : newNodes) {
            parent.insertBefore(node, original);
        }
        parent.removeChild(original);
    }

    // Helper classes
    private static class RunInfo {
        Run run;
        int globalStart;
        int length;

        RunInfo(Run run, int globalStart, int length) {
            this.run = run;
            this.globalStart = globalStart;
            this.length = length;
        }
    }

    private static class RunSegment {
        Run run;
        int start;
        int end;

        RunSegment(Run run, int start, int end) {
            this.run = run;
            this.start = start;
            this.end = end;
        }
    }
相关推荐
Asthenia041229 分钟前
Elasticsearch分片与副本设置/拼写纠错原理/Linux下部署优化/安装依赖组件/服务器启动流程/Cluster与Node简述/数据库对比/映射
后端
Asthenia04121 小时前
ES:高量级数据聚合/ES数据类型/ES存储原理/ES怎么读文档/ES怎么删文档/为何脑裂/集群监控/如何调优
后端
kkk哥1 小时前
基于springboot的旅游网站(013)
java·spring boot·后端
Asthenia04121 小时前
ES:数据一致性/和Lucene关系/中文分词/最小主节点与脑裂/路由选定特定节点/更删文档过程
后端
Real_man1 小时前
JSON-RPC 详解
后端
Asthenia04121 小时前
ES:数据模型/搜索过程/Master选举细节/索引文档过程
后端
my11217169511 小时前
Spring Boot拦截器(Interceptor)与过滤器(Filter)深度解析:区别、实现与实战指南
java·spring boot·后端
xjz18422 小时前
Liquibase管理数据库变更脚本的最佳实践
后端
遥不可及3872 小时前
Spring Boot 配置文件切换的全面指南
后端
idMiFeng2 小时前
Go语言反射机制在数据库同步中的实战应用 —— 动态赋值与类型转换详解
数据库·后端·程序员