基于Java对于PostgreSQL多层嵌套JSON 字段判重

场景:把复杂的 CommonCondition 条件树以 JSON 形式存入 PostgreSQL,并要求:

  • 子条件顺序、

  • valueList 重复值、

  • 展示用字段(如别名)
    不影响 "相同业务逻辑"的判定。

    本文给出一条 Java → PostgreSQL 的端到端可复制方案。


1. 数据库层设计(PostgreSQL)

复制代码
CREATE TABLE report_condition (
    id        bigserial PRIMARY KEY,
    condition jsonb        NOT NULL,
    signature char(64)     NOT NULL,   -- SHA-256 长度
    created_at timestamptz DEFAULT now()
);

-- 查询时直接按 signature 去重
CREATE UNIQUE INDEX uk_signature ON report_condition(signature);

触发器可选:如果希望完全由 DB 计算签名,可用 plpython3u 调用 Python 归一化脚本;

下文演示 Java 端计算签名后写入,逻辑更清晰。


2. Java 端核心实现

2.1 枚举:白名单字段规则

复制代码
package com.example.condition;

import java.util.List;

/** 不同业务场景下保留的字段集合 */
public enum ConditionNormalizeRule {
    DEFAULT(List.of("table","field","fieldType","type","logicalOperator","operator","valueList","commonConditions")),
    REPORT (List.of("table","field","type","operator","valueList","commonConditions")), // 去掉 fieldType
    AUDIT  (List.of("table","field","fieldType","type","operator","valueList","commonConditions"));

    private final List<String> whiteList;
    ConditionNormalizeRule(List<String> whiteList) { this.whiteList = whiteList; }
    public List<String> getWhiteList() { return whiteList; }
}

2.2 工具类 ConditionHash

复制代码
@Slf4j
public final class ConditionHashUtil {
    /** 全局 ObjectMapper,线程安全 */
    private static final ObjectMapper MAPPER = new ObjectMapper()
            .setSerializationInclusion(JsonInclude.Include.NON_NULL) // 忽略 null
            .configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true); // 键排序

    /**
     * 计算业务等价哈希
     * @param root  待计算的 CommonCondition 树
     * @param rule  决定保留哪些字段的枚举
     * @return      SHA-256 十六进制字符串(64 位)
     */
    public static String sha256(CommonCondition root, ConditionNormalizeRule rule) {
        try {
            JsonNode tree = normalize(MAPPER.valueToTree(root), rule);
            String jsonStr = MAPPER.writeValueAsString(tree);
            return DigestUtils.sha256Hex(jsonStr);
        } catch (Exception e) {
            throw new IllegalStateException("compute hash failed", e);
        }
    }

    /* ---------- 私有归一化逻辑 ---------- */

    /** 递归归一化:保留白名单字段 + 去重排序 */
    private static JsonNode normalize(JsonNode node, ConditionNormalizeRule rule) {
        if (node == null || node.isNull()) return node;
        if (!node.isObject()) return node;

        ObjectNode obj = MAPPER.createObjectNode();
        List<String> white = rule.getWhiteList();

        // 按照字段名排序,确保处理顺序一致
        List<String> sortedKeys = white.stream()
                .filter(node::has)
                .sorted()
                .toList();

        for (String key : sortedKeys) {
            JsonNode val = node.get(key);
            switch (key) {
                case "valueList":
                    obj.set(key, normalizeValueList(val));         // 去重+排序
                    break;
                case "commonConditions":
                    obj.set(key, normalizeChildren(val, rule));    // 递归+排序
                    break;
                default:
                    if (val != null && !val.isNull()) {
                        obj.set(key, val);                         // 直接保留非空值
                    }
                    // null值不添加到结果中
            }
        }
        System.out.println("obj:"+obj);
        return obj;
    }

    /** valueList 去重+字典序排序 */
    private static JsonNode normalizeValueList(JsonNode listNode) {
        if (listNode == null || !listNode.isArray() || listNode.isEmpty()) {
            return MAPPER.createArrayNode();
        }
        List<String> list = MAPPER.convertValue(listNode, new TypeReference<List<String>>() {});
        // 过滤null值,去重并按字典序排序
        list = list.stream()
                .filter(Objects::nonNull)
                .distinct()
                .sorted()
                .collect(Collectors.toList());
        return MAPPER.valueToTree(list);
    }

    /** commonConditions 递归归一化后按字符串排序 */
    private static JsonNode normalizeChildren(JsonNode childrenNode, ConditionNormalizeRule rule) {
        if (childrenNode == null || !childrenNode.isArray() || childrenNode.isEmpty()) {
            return MAPPER.createArrayNode();
        }
        
        List<JsonNode> normalizedChildren = new ArrayList<>();
        for (JsonNode child : childrenNode) {
            if (child != null && !child.isNull()) {
                normalizedChildren.add(normalize(child, rule));
            }
        }
        
        // 按照标准化后的字符串表示排序
        List<JsonNode> sorted = normalizedChildren.stream()
                .filter(Objects::nonNull)
                .sorted(Comparator.comparing(node -> {
                    try {
                        return MAPPER.writeValueAsString(node);
                    } catch (Exception e) {
                        return node.toString();
                    }
                }))
                .toList();
        
        ArrayNode result = MAPPER.createArrayNode();
        sorted.forEach(result::add);
        return result;
    }

    private ConditionHashUtil() {}

}

3. 使用示例

3.1 构造两个"业务等价"的对象

复制代码
CommonCondition condA = CommonCondition.builder()
        .type("logical")
        .logicalOperator("AND")
        .commonConditions(List.of(
                CommonCondition.builder()
                        .type("base")
                        .table("user")
                        .field("age")
                        .operator("GT")
                        .valueList(List.of("18", "18", "20"))  // 重复值
                        .build(),
                CommonCondition.builder()
                        .type("base")
                        .table("user")
                        .field("status")
                        .operator("IN")
                        .valueList(List.of("ACTIVE", "LOCKED"))
                        .build()
        ))
        .build();

/* 把子条件顺序颠倒,再加一个别名字段,但业务含义不变 */
CommonCondition condB = CommonCondition.builder()
        .type("logical")
        .logicalOperator("AND")
        .tableNameAlias("u")            // 展示用,不参与哈希
        .commonConditions(List.of(
                CommonCondition.builder()
                        .type("base")
                        .table("user")
                        .field("status")
                        .operator("IN")
                        .valueList(List.of("LOCKED", "ACTIVE", "ACTIVE")) // 重复+乱序
                        .build(),
                CommonCondition.builder()
                        .type("base")
                        .table("user")
                        .field("age")
                        .operator("GT")
                        .valueList(List.of("20", "18"))   // 乱序
                        .build()
        ))
        .build();

3.2 计算哈希并判重

复制代码
String sigA = ConditionHash.sha256(condA, ConditionNormalizeRule.DEFAULT);
String sigB = ConditionHash.sha256(condB, ConditionNormalizeRule.DEFAULT);

System.out.println("sigA = " + sigA);
System.out.println("sigB = " + sigB);
System.out.println("same = " + sigA.equals(sigB));   // true

3.3 插入 PostgreSQL

复制代码
String sql = """
    INSERT INTO report_condition(condition, signature)
    VALUES (?::jsonb, ?)
    ON CONFLICT (signature) DO NOTHING
    """;
try (PreparedStatement ps = conn.prepareStatement(sql)) {
    ps.setString(1, new ObjectMapper().writeValueAsString(condA));
    ps.setString(2, sigA);
    ps.executeUpdate();
}

ON CONFLICT (signature) DO NOTHING 利用唯一索引实现幂等写入,天然判重。


4. 性能 & 扩展

子条件规模 耗时(MacBook M2) 建议
< 100 < 1 ms 无需优化
100~1 000 1~8 ms 缓存哈希
> 1 000 > 10 ms 并行流 + 缓存

缓存示例(Lombok):

复制代码
@Getter(lazy = true)
private final String hash = ConditionHash.sha256(this, ConditionNormalizeRule.DEFAULT);

5. 小结

  1. 数据库 :JSONB + 唯一索引 signature,一行 SQL 完成判重。

  2. JavaConditionHash.sha256(root, rule) 统一出口,顺序、重复、别名全部抹平。

  3. 枚举 :新增业务场景只需再加一个 ConditionNormalizeRule 值,零侵入。