【MongoDB的RLE压缩数据存储】

MongoDB的RLE压缩数据存储

1. RLE(Run-Length Encoding)算法详解

1.1 RLE基本概念

RLE是一种简单的无损数据压缩算法,特别适用于处理连续重复数据。其核心思想是将连续的重复数据值序列替换为一个值和重复次数。

1.2 RLE编码原理

原始数据: [1,1,1,1,1,2,2,3,3,3,3]

RLE编码: [(1,5), (2,2), (3,4)]

1.3 RLE优势与适用场景

优势: 算法简单、压缩效率高(针对重复数据)、解压快速

适用场景: 图像处理、日志数据、传感器数据等包含大量连续重复值的场景

  1. MongoDB与Spring Data MongoDB配置
    2.1 添加依赖
    xml
java 复制代码
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-mongodb</artifactId>
</dependency>

2.2 配置文件

yaml

java 复制代码
spring:
  data:
    mongodb:
      uri: mongodb://localhost:27017/record_db
      database: record_db
  1. 完整项目实现
    3.1 数据库文档设计
    3.1.1 RLE编码文档结构
    json
java 复制代码
{
  "_id": ObjectId("..."),
  "type": "q",
  "version": "123",
  "createTime": ISODate("2024-01-01T00:00:00Z"),
  "compressed": true,
  "rleValues": [
    {"value": 1.0, "count": 100},
    {"value": 2.0, "count": 50}
  ],
  "originalSize": 150,
  "lastUpdateTime": ISODate("2024-01-01T01:00:00Z")
}

3.1.2 未压缩文档结构(小数据量)

json

java 复制代码
{
  "_id": ObjectId("..."),
  "type": "q", 
  "version": "124",
  "createTime": ISODate("2024-01-01T00:00:00Z"),
  "compressed": false,
  "values": [1.23, 4.21, 5.90],
  "lastUpdateTime": ISODate("2024-01-01T01:00:00Z")
}

3.2 实体类设计

3.2.1 RLE条目类

java

java 复制代码
package com.example.entity;

import org.springframework.data.annotation.PersistenceCreator;
import org.springframework.data.mongodb.core.mapping.Field;

public class RLEEntry {
    @Field("value")
    private float value;
    
    @Field("count")
    private int count;
    
    public RLEEntry(float value, int count) {
        this.value = value;
        this.count = count;
    }
    
    @PersistenceCreator
    public RLEEntry() {}
    
    // Getters and Setters
    public float getValue() { return value; }
    public void setValue(float value) { this.value = value; }
    public int getCount() { return count; }
    public void setCount(int count) { this.count = count; }
}

3.2.2 主文档实体类

java

java 复制代码
package com.example.entity;

import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.index.CompoundIndex;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;

import java.util.Date;
import java.util.List;

@Document(collection = "records")
@CompoundIndex(name = "type_version_idx", def = "{'type': 1, 'version': 1}", unique = true)
public class RecordDocument {
    @Id
    private String id;
    
    private String type;
    private String version;
    
    @Field("createTime")
    private Date createTime;
    
    @Field("lastUpdateTime")
    private Date lastUpdateTime;
    
    @Field("compressed")
    private boolean compressed;
    
    @Field("values")
    private List<Float> values;
    
    @Field("rleValues")
    private List<RLEEntry> rleValues;
    
    @Field("originalSize")
    private Integer originalSize;
    
    // Constructors
    public RecordDocument() {
        this.createTime = new Date();
        this.lastUpdateTime = new Date();
    }
    
    public RecordDocument(String type, String version, List<Float> values) {
        this();
        this.type = type;
        this.version = version;
        this.values = values;
        this.compressed = false;
    }
    
    // Getters and Setters
    public String getId() { return id; }
    public void setId(String id) { this.id = id; }
    
    public String getType() { return type; }
    public void setType(String type) { this.type = type; }
    
    public String getVersion() { return version; }
    public void setVersion(String version) { this.version = version; }
    
    public Date getCreateTime() { return createTime; }
    public void setCreateTime(Date createTime) { this.createTime = createTime; }
    
    public Date getLastUpdateTime() { return lastUpdateTime; }
    public void setLastUpdateTime(Date lastUpdateTime) { this.lastUpdateTime = lastUpdateTime; }
    
    public boolean isCompressed() { return compressed; }
    public void setCompressed(boolean compressed) { this.compressed = compressed; }
    
    public List<Float> getValues() { return values; }
    public void setValues(List<Float> values) { this.values = values; }
    
    public List<RLEEntry> getRleValues() { return rleValues; }
    public void setRleValues(List<RLEEntry> rleValues) { this.rleValues = rleValues; }
    
    public Integer getOriginalSize() { return originalSize; }
    public void setOriginalSize(Integer originalSize) { this.originalSize = originalSize; }
}

3.3 RLE工具类

java

java 复制代码
package com.example.util;

import com.example.entity.RLEEntry;
import java.util.ArrayList;
import java.util.List;

public class RLEUtil {
    
    // 压缩阈值:当连续重复值超过此阈值时使用RLE压缩
    private static final int COMPRESSION_THRESHOLD = 10;
    
    /**
     * RLE编码压缩
     */
    public static List<RLEEntry> encode(List<Float> values) {
        if (values == null || values.isEmpty()) {
            return new ArrayList<>();
        }
        
        List<RLEEntry> encoded = new ArrayList<>();
        float currentValue = values.get(0);
        int count = 1;
        
        for (int i = 1; i < values.size(); i++) {
            if (values.get(i).equals(currentValue)) {
                count++;
            } else {
                encoded.add(new RLEEntry(currentValue, count));
                currentValue = values.get(i);
                count = 1;
            }
        }
        encoded.add(new RLEEntry(currentValue, count));
        
        return encoded;
    }
    
    /**
     * RLE解码解压
     */
    public static List<Float> decode(List<RLEEntry> rleEntries) {
        List<Float> decoded = new ArrayList<>();
        if (rleEntries == null) return decoded;
        
        for (RLEEntry entry : rleEntries) {
            for (int i = 0; i < entry.getCount(); i++) {
                decoded.add(entry.getValue());
            }
        }
        return decoded;
    }
    
    /**
     * 判断是否应该使用压缩(基于数据特征)
     */
    public static boolean shouldCompress(List<Float> values) {
        if (values == null || values.size() < COMPRESSION_THRESHOLD) {
            return false;
        }
        
        // 检查连续重复模式
        int maxRunLength = 1;
        int currentRunLength = 1;
        
        for (int i = 1; i < values.size(); i++) {
            if (values.get(i).equals(values.get(i - 1))) {
                currentRunLength++;
                maxRunLength = Math.max(maxRunLength, currentRunLength);
            } else {
                currentRunLength = 1;
            }
        }
        
        return maxRunLength >= COMPRESSION_THRESHOLD;
    }
    
    /**
     * 计算压缩比
     */
    public static double calculateCompressionRatio(List<Float> original, List<RLEEntry> compressed) {
        if (original == null || compressed == null) return 0.0;
        
        int originalSize = original.size();
        int compressedSize = compressed.size() * 2; // 每个RLE条目包含值和计数
        
        return originalSize == 0 ? 0.0 : (double) compressedSize / originalSize;
    }
}

3.4 Repository接口

java

java 复制代码
package com.example.repository;

import com.example.entity.RecordDocument;
import org.springframework.data.mongodb.repository.MongoRepository;
import org.springframework.data.mongodb.repository.Query;
import org.springframework.data.mongodb.repository.Update;

import java.util.Date;
import java.util.List;
import java.util.Optional;

public interface RecordRepository extends MongoRepository<RecordDocument, String> {
    
    Optional<RecordDocument> findByTypeAndVersion(String type, String version);
    
    List<RecordDocument> findByType(String type);
    
    List<RecordDocument> findByCompressed(boolean compressed);
    
    @Query("{'type': ?0, 'version': ?1}")
    @Update("{$set: {'lastUpdateTime': ?2}}")
    void updateLastUpdateTime(String type, String version, Date lastUpdateTime);
    
    @Query("{'type': ?0, 'version': ?1, 'compressed': false}")
    @Update("{$push: {'values': {$each: ?2}}, $set: {'lastUpdateTime': ?3}}")
    void appendValues(String type, String version, List<Float> newValues, Date lastUpdateTime);
    
    @Query("{'type': ?0, 'version': ?1, 'compressed': true}")
    @Update("{$set: {'rleValues': ?2, 'lastUpdateTime': ?3, 'originalSize': ?4}}")
    void updateRleValues(String type, String version, List<RLEEntry> rleValues, Date lastUpdateTime, int originalSize);
    
    @Query("{'createTime': {$gte: ?0, $lte: ?1}}")
    List<RecordDocument> findByCreateTimeBetween(Date start, Date end);
}

3.5 服务实现类

java

package com.example.service;

import com.example.entity.RLEEntry;

import com.example.entity.RecordDocument;

import com.example.repository.RecordRepository;

import com.example.util.RLEUtil;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.data.domain.Page;

import org.springframework.data.domain.Pageable;

import org.springframework.stereotype.Service;

import java.util.ArrayList;

import java.util.Date;

import java.util.List;

import java.util.Optional;

@Service

public class RecordService {

复制代码
@Autowired
private RecordRepository recordRepository;

// 压缩阈值配置
private static final int COMPRESSION_SIZE_THRESHOLD = 1000; // 数据量超过1000条时考虑压缩

/**
 * 插入或更新记录(自动处理压缩)
 */
public RecordDocument saveOrUpdate(String type, String version, List<Float> newValues) {
    Optional<RecordDocument> existingRecord = recordRepository.findByTypeAndVersion(type, version);
    
    if (existingRecord.isPresent()) {
        return appendToExistingRecord(existingRecord.get(), newValues);
    } else {
        return createNewRecord(type, version, newValues);
    }
}

/**
 * 创建新记录
 */
private RecordDocument createNewRecord(String type, String version, List<Float> values) {
    RecordDocument record = new RecordDocument(type, version, values);
    
    // 判断是否需要压缩
    if (shouldCompress(values)) {
        compressRecord(record);
    }
    
    return recordRepository.save(record);
}

/**
 * 向现有记录追加数据
 */
private RecordDocument appendToExistingRecord(RecordDocument existingRecord, List<Float> newValues) {
    List<Float> allValues = getAllValues(existingRecord);
    allValues.addAll(newValues);
    
    // 检查是否需要重新评估压缩策略
    if (existingRecord.isCompressed()) {
        // 已压缩的记录,直接更新RLE编码
        updateCompressedRecord(existingRecord, allValues);
    } else {
        // 未压缩的记录,检查是否需要压缩
        if (shouldCompress(allValues)) {
            compressRecord(existingRecord);
            existingRecord.setValues(null);
            recordRepository.save(existingRecord);
        } else {
            // 保持未压缩状态,直接追加
            recordRepository.appendValues(
                existingRecord.getType(), 
                existingRecord.getVersion(), 
                newValues, 
                new Date()
            );
        }
    }
    
    return recordRepository.findByTypeAndVersion(
        existingRecord.getType(), 
        existingRecord.getVersion()
    ).orElse(null);
}

/**
 * 获取记录的所有值(自动解压)
 */
public List<Float> getValues(String type, String version) {
    Optional<RecordDocument> record = recordRepository.findByTypeAndVersion(type, version);
    return record.map(this::getAllValues).orElse(new ArrayList<>());
}

/**
 * 根据ID查找记录
 */
public Optional<RecordDocument> findById(String id) {
    return recordRepository.findById(id);
}

/**
 * 根据类型查找记录
 */
public List<RecordDocument> findByType(String type) {
    return recordRepository.findByType(type);
}

/**
 * 分页查询
 */
public Page<RecordDocument> findAll(Pageable pageable) {
    return recordRepository.findAll(pageable);
}

/**
 * 删除记录
 */
public void deleteRecord(String type, String version) {
    Optional<RecordDocument> record = recordRepository.findByTypeAndVersion(type, version);
    record.ifPresent(recordRepository::delete);
}

/**
 * 手动触发压缩
 */
public RecordDocument compressRecord(String type, String version) {
    Optional<RecordDocument> recordOpt = recordRepository.findByTypeAndVersion(type, version);
    if (recordOpt.isPresent()) {
        RecordDocument record = recordOpt.get();
        if (!record.isCompressed() && shouldCompress(getAllValues(record))) {
            compressRecord(record);
            return recordRepository.save(record);
        }
    }
    return recordOpt.orElse(null);
}

/**
 * 手动解压缩
 */
public RecordDocument decompressRecord(String type, String version) {
    Optional<RecordDocument> recordOpt = recordRepository.findByTypeAndVersion(type, version);
    if (recordOpt.isPresent()) {
        RecordDocument record = recordOpt.get();
        if (record.isCompressed()) {
            List<Float> values = RLEUtil.decode(record.getRleValues());
            record.setValues(values);
            record.setRleValues(null);
            record.setCompressed(false);
            record.setOriginalSize(null);
            return recordRepository.save(record);
        }
    }
    return recordOpt.orElse(null);
}

/**
 * 获取统计信息
 */
public RecordStats getStats(String type, String version) {
    Optional<RecordDocument> recordOpt = recordRepository.findByTypeAndVersion(type, version);
    if (recordOpt.isPresent()) {
        RecordDocument record = recordOpt.get();
        List<Float> values = getAllValues(record);
        
        RecordStats stats = new RecordStats();
        stats.setTotalValues(values.size());
        stats.setCompressed(record.isCompressed());
        stats.setCompressionRatio(record.isCompressed() ? 
            RLEUtil.calculateCompressionRatio(values, record.getRleValues()) : 1.0);
        stats.setCreateTime(record.getCreateTime());
        stats.setLastUpdateTime(record.getLastUpdateTime());
        
        return stats;
    }
    return null;
}

// 内部辅助方法
private List<Float> getAllValues(RecordDocument record) {
    if (record.isCompressed()) {
        return RLEUtil.decode(record.getRleValues());
    } else {
        return record.getValues() != null ? record.getValues() : new ArrayList<>();
    }
}

private boolean shouldCompress(List<Float> values) {
    return values.size() > COMPRESSION_SIZE_THRESHOLD && RLEUtil.shouldCompress(values);
}

private void compressRecord(RecordDocument record) {
    List<Float> values = getAllValues(record);
    List<RLEEntry> rleValues = RLEUtil.encode(values);
    
    record.setRleValues(rleValues);
    record.setValues(null);
    record.setCompressed(true);
    record.setOriginalSize(values.size());
    record.setLastUpdateTime(new Date());
}

private void updateCompressedRecord(RecordDocument record, List<Float> allValues) {
    List<RLEEntry> rleValues = RLEUtil.encode(allValues);
    recordRepository.updateRleValues(
        record.getType(),
        record.getVersion(),
        rleValues,
        new Date(),
        allValues.size()
    );
}

// 统计信息类
public static class RecordStats {
    private int totalValues;
    private boolean compressed;
    private double compressionRatio;
    private Date createTime;
    private Date lastUpdateTime;
    
    // Getters and Setters
    public int getTotalValues() { return totalValues; }
    public void setTotalValues(int totalValues) { this.totalValues = totalValues; }
    
    public boolean isCompressed() { return compressed; }
    public void setCompressed(boolean compressed) { this.compressed = compressed; }
    
    public double getCompressionRatio() { return compressionRatio; }
    public void setCompressionRatio(double compressionRatio) { this.compressionRatio = compressionRatio; }
    
    public Date getCreateTime() { return createTime; }
    public void setCreateTime(Date createTime) { this.createTime = createTime; }
    
    public Date getLastUpdateTime() { return lastUpdateTime; }
    public void setLastUpdateTime(Date lastUpdateTime) { this.lastUpdateTime = lastUpdateTime; }
}

}

3.6 控制器类

java

package com.example.controller;

import com.example.entity.RecordDocument;

import com.example.service.RecordService;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.data.domain.Page;

import org.springframework.data.domain.PageRequest;

import org.springframework.http.ResponseEntity;

import org.springframework.web.bind.annotation.*;

import java.util.List;

@RestController

@RequestMapping("/api/records")

public class RecordController {

复制代码
@Autowired
private RecordService recordService;

@PostMapping
public ResponseEntity<RecordDocument> createOrUpdateRecord(
        @RequestParam String type,
        @RequestParam String version,
        @RequestBody List<Float> values) {
    
    RecordDocument record = recordService.saveOrUpdate(type, version, values);
    return ResponseEntity.ok(record);
}

@GetMapping
public ResponseEntity<RecordDocument> getRecord(
        @RequestParam String type,
        @RequestParam String version) {
    
    List<Float> values = recordService.getValues(type, version);
    RecordDocument record = new RecordDocument();
    record.setType(type);
    record.setVersion(version);
    // 注意:这里返回的是包含解压后数据的简化对象
    record.setValues(values);
    
    return ResponseEntity.ok(record);
}

@GetMapping("/{id}")
public ResponseEntity<RecordDocument> getRecordById(@PathVariable String id) {
    return recordService.findById(id)
            .map(ResponseEntity::ok)
            .orElse(ResponseEntity.notFound().build());
}

@GetMapping("/type/{type}")
public ResponseEntity<List<RecordDocument>> getRecordsByType(@PathVariable String type) {
    List<RecordDocument> records = recordService.findByType(type);
    return ResponseEntity.ok(records);
}

@GetMapping("/page")
public ResponseEntity<Page<RecordDocument>> getRecordsPage(
        @RequestParam(defaultValue = "0") int page,
        @RequestParam(defaultValue = "10") int size) {
    
    Page<RecordDocument> records = recordService.findAll(PageRequest.of(page, size));
    return ResponseEntity.ok(records);
}

@PostMapping("/compress")
public ResponseEntity<RecordDocument> compressRecord(
        @RequestParam String type,
        @RequestParam String version) {
    
    RecordDocument record = recordService.compressRecord(type, version);
    return record != null ? ResponseEntity.ok(record) : ResponseEntity.notFound().build();
}

@PostMapping("/decompress")
public ResponseEntity<RecordDocument> decompressRecord(
        @RequestParam String type,
        @RequestParam String version) {
    
    RecordDocument record = recordService.decompressRecord(type, version);
    return record != null ? ResponseEntity.ok(record) : ResponseEntity.notFound().build();
}

@GetMapping("/stats")
public ResponseEntity<RecordService.RecordStats> getRecordStats(
        @RequestParam String type,
        @RequestParam String version) {
    
    RecordService.RecordStats stats = recordService.getStats(type, version);
    return stats != null ? ResponseEntity.ok(stats) : ResponseEntity.notFound().build();
}

@DeleteMapping
public ResponseEntity<Void> deleteRecord(
        @RequestParam String type,
        @RequestParam String version) {
    
    recordService.deleteRecord(type, version);
    return ResponseEntity.ok().build();
}

}

3.7 配置类

java

package com.example.config;

import org.springframework.context.annotation.Configuration;

import org.springframework.data.mongodb.config.EnableMongoAuditing;

import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;

@Configuration

@EnableMongoRepositories(basePackages = "com.example.repository")

@EnableMongoAuditing

public class MongoConfig {

}

  1. 使用示例

4.1 插入数据示例

java

// 插入第一条数据

List values1 = Arrays.asList(1.23f, 4.21f, 5.90f);

recordService.saveOrUpdate("q", "123", values1);

// 插入第二条数据(自动追加)

List values2 = Arrays.asList(5.78f, 9.88f);

recordService.saveOrUpdate("q", "123", values2);

// 插入大量重复数据(自动压缩)

List repeatedValues = new ArrayList<>();

for (int i = 0; i < 1000000; i++) {

repeatedValues.add(1.0f);

}

recordService.saveOrUpdate("q", "124", repeatedValues);

4.2 查询数据示例

java

// 查询数据(自动解压)

List values = recordService.getValues("q", "123");

// 获取统计信息

RecordService.RecordStats stats = recordService.getStats("q", "124");

System.out.println("压缩比: " + stats.getCompressionRatio());

  1. 性能优化建议

5.1 索引优化

java

// 在实体类上添加复合索引

@CompoundIndex(name = "type_version_idx", def = "{'type': 1, 'version': 1}", unique = true)

5.2 批量操作优化

对于大批量数据插入,考虑使用MongoDB的批量操作

实现数据分片策略,避免单个文档过大

5.3 监控与调优

监控压缩比和查询性能

根据实际数据特征调整压缩阈值

定期清理历史数据

  1. 总结
    本项目实现了基于Spring Boot和MongoDB的智能数据存储系统,具有以下特点:

自动压缩: 根据数据特征自动选择是否使用RLE压缩

透明操作: 提供统一的API,压缩/解压对用户透明

高性能: 针对重复数据优化存储效率

灵活配置: 可调整压缩阈值和策略

完整功能: 提供增删改查、统计等完整功能

相关推荐
安当加密3 小时前
通过TDE透明加密实现人大金仓数据库的免改造存储加密方案
数据库·金仓·透明加密
养生技术人3 小时前
Oracle OCP认证考试题目详解082系列第49题
运维·数据库·sql·oracle·database·开闭原则·ocp
white-persist3 小时前
SQL 注入详解:从原理到实战
前端·网络·数据库·sql·安全·web安全·原型模式
Databend4 小时前
Raft 中的 IO 执行顺序:内存状态与持久化状态的陷阱
数据库
兜兜风d'4 小时前
redis字符串命令
数据库·redis·缓存
忧郁的蛋~5 小时前
EFcore查询a表中符合b表列的值
数据库
xwz小王子6 小时前
ManipulationNet:开启真实世界机器人操作基准测试新时代
数据库·机器人
咯哦哦哦哦6 小时前
关于QT 打印中文 乱码问题
java·数据库·qt
野犬寒鸦6 小时前
从零起步学习Redis || 第十二章:Redis Cluster集群如何解决Redis单机模式的性能瓶颈及高可用分布式部署方案详解
java·数据库·redis·后端·缓存