【架构实战】时序数据库选型:InfluxDB vs TDengine

一、为什么需要时序数据库

传统数据库处理时序数据的问题:

关系型数据库的问题:

  • 写入性能差(无批量写入优化)
  • 存储成本高(无压缩)
  • 查询性能差(无预聚合)
  • 容量有限

时序数据的特征:

  • 数据按时间顺序写入
  • 数据量大(IoT设备每秒产生大量数据)
  • 写入多,查询少
  • 通常需要聚合分析

二、InfluxDB实战

1. 核心概念

复制代码
Measurement(测量)
    ├── Tags(标签)- 索引列
    ├── Fields(字段)- 值列
    └── Timestamp(时间戳)

2. Docker部署

yaml 复制代码
version: '3'
services:
  influxdb:
    image: influxdb:2.7
    container_name: influxdb
    ports:
      - "8086:8086"
    volumes:
      - ./data:/var/lib/influxdb2
      - ./config:/etc/influxdb2
    environment:
      - DOCKER_INFLUXDB_INIT_MODE=setup
      - DOCKER_INFLUXDB_INIT_USERNAME=admin
      - DOCKER_INFLUXDB_INIT_PASSWORD=password
      - DOCKER_INFLUXDB_INIT_ORG=myorg
      - DOCKER_INFLUXDB_INIT_BUCKET=metrics

3. InfluxQL查询

sql 复制代码
-- 创建数据库
CREATE DATABASE sensor_data;

-- 写入数据
INSERT temperature,device_id=sensor_001,location=beijing value=25.5 1704067200000000000

-- 查询最近1小时的数据
SELECT * FROM temperature 
WHERE time > now() - 1h 
AND device_id = 'sensor_001'

-- 按设备分组统计
SELECT MEAN(value), MAX(value), MIN(value) 
FROM temperature 
WHERE time > now() - 24h 
GROUP BY device_id

-- 按时间窗口聚合
SELECT MEAN(value) 
FROM temperature 
WHERE time > now() - 7d 
GROUP BY time(1h), device_id

-- 连续查询(预聚合)
CREATE CONTINUOUS QUERY cpu_avg_1h ON sensor_data
BEGIN
  SELECT MEAN(value) 
  INTO cpu_avg_1h 
  FROM cpu_usage 
  GROUP BY time(1h), device_id
END

4. Java客户端

xml 复制代码
<dependency>
    <groupId>com.influxdb</groupId>
    <artifactId>influxdb-client-java</artifactId>
    <version>6.8.0</version>
</dependency>
java 复制代码
@Configuration
public class InfluxDBConfig {
    
    @Bean
    public InfluxDBClient influxDBClient() {
        return InfluxDBClientFactory
            .create("http://localhost:8086", "admin".toCharArray(), "password".toCharArray());
    }
}

@Service
@Slf4j
public class MetricsService {
    
    @Autowired
    private InfluxDBClient influxDBClient;
    
    // 写入数据
    public void writeMetrics(List<Metric> metrics) {
        WriteApi writeApi = influxDBClient.makeWriteApi();
        
        List<Point> points = metrics.stream()
            .map(m -> Point.measurement("sensor_data")
                .addTag("device_id", m.getDeviceId())
                .addTag("location", m.getLocation())
                .addField("temperature", m.getTemperature())
                .addField("humidity", m.getHumidity())
                .addField("pressure", m.getPressure())
                .time(m.getTimestamp(), WritePrecision.NS)
                .build())
            .collect(Collectors.toList());
        
        writeApi.writePoints(points);
        writeApi.close();
    }
    
    // 查询数据
    public List<Metric> queryMetrics(String deviceId, Duration duration) {
        QueryApi queryApi = influxDBClient.getQueryApi();
        
        String flux = String.format("""
            from(bucket: "sensor_data")
              |> range(start: -%s)
              |> filter(fn: (r) => r["device_id"] == "%s")
              |> aggregateWindow(every: 1m, fn: mean)
            """, duration.toString(), deviceId);
        
        Query query = Query.builder()
            .rawQuery(flux)
            .org("myorg")
            .build();
        
        List<FluxRecord> records = queryApi.query(query);
        
        return records.stream()
            .map(r -> Metric.builder()
                .deviceId(r.getMeasurement())
                .timestamp(r.getTime())
                .temperature(getFieldValue(r, "temperature"))
                .humidity(getFieldValue(r, "humidity"))
                .build())
            .collect(Collectors.toList());
    }
    
    // 查询聚合数据
    public Map<String, Double> getHourlyAvg(String deviceId, Duration duration) {
        QueryApi queryApi = influxDBClient.getQueryApi();
        
        String flux = String.format("""
            from(bucket: "sensor_data")
              |> range(start: -%s)
              |> filter(fn: (r) => r["_measurement"] == "sensor_data")
              |> filter(fn: (r) => r["device_id"] == "%s")
              |> filter(fn: (r) => r["_field"] == "temperature")
              |> aggregateWindow(every: 1h, fn: mean)
            """, duration.toString(), deviceId);
        
        // 处理结果
        return new HashMap<>();
    }
}

三、TDengine实战

1. 核心概念

复制代码
TDengine采用超级表+子表的设计:

超级表(Super Table)
├── 标签(Tag)- 静态属性
├── 表(Sub Table)- 每个设备一张表
└── 数据(Columns)- 时序数据

2. Docker部署

yaml 复制代码
version: '3'
services:
  tdengine:
    image: tdengine/tdengine:3.0
    container_name: tdengine
    ports:
      - "6030:6030"  # REST API
      - "6035:6035"  # TCP
    volumes:
      - ./data:/var/lib/taos
      - ./log:/var/log/taos
    environment:
      - TZ=Asia/Shanghai

3. SQL语法

sql 复制代码
-- 创建数据库
CREATE DATABASE sensor_db KEEP 365 DAYS BLOCKS 10;

-- 创建超级表
CREATE STABLE sensor_data (
    ts TIMESTAMP,
    temperature FLOAT,
    humidity FLOAT,
    pressure FLOAT
) TAGS (
    device_id BINARY(32),
    location BINARY(32),
    device_type BINARY(16)
);

-- 创建子表(自动创建)
CREATE TABLE t_001 USING sensor_data TAGS('sensor_001', 'beijing', 'indoor');
CREATE TABLE t_002 USING sensor_data TAGS('sensor_002', 'shanghai', 'outdoor');

-- 写入数据
INSERT INTO t_001 VALUES('2024-01-15 10:00:00.000', 25.5, 60.0, 101.3);
INSERT INTO t_001 VALUES('2024-01-15 10:01:00.000', 25.6, 60.1, 101.3);

-- 批量写入
INSERT INTO t_001 VALUES
    ('2024-01-15 10:00:00.000', 25.5, 60.0, 101.3),
    ('2024-01-15 10:01:00.000', 25.6, 60.1, 101.3),
    ('2024-01-15 10:02:00.000', 25.7, 60.2, 101.3);

-- 按时间窗口聚合
SELECT AVG(temperature), MAX(temperature), MIN(temperature)
FROM sensor_data
WHERE ts > '2024-01-15 00:00:00'
AND ts < '2024-01-16 00:00:00'
PARTITION BY TBNAME;

-- 按标签分组聚合
SELECT device_id, AVG(temperature), MAX(temperature)
FROM sensor_data
WHERE ts > NOW() - 1d
GROUP BY device_id;

-- 降采样查询
SELECT AVG(temperature)
FROM sensor_data
WHERE ts > NOW() - 7d
INTERVAL(1h);

-- 连续查询
CREATE CQ cpu_avg_1h ON sensor_data
BEGIN
  SELECT AVG(temperature)
  INTO cpu_avg_1h
  FROM sensor_data
  PARTITION BY TBNAME
  INTERVAL(1h)
END;

4. Java客户端

xml 复制代码
<dependency>
    <groupId>com.taosdata.jdbc</groupId>
    <artifactId>taos-jdbcdriver</artifactId>
    <version>3.2.10</version>
</dependency>
java 复制代码
@Configuration
public class TDengineConfig {
    
    @Bean
    public DataSource dataSource() {
        TDengineDataSource ds = new TDengineDataSource();
        ds.setHost("localhost");
        ds.setPort(6030);
        ds.setUsername("root");
        ds.setPassword("taosdata");
        ds.setDbName("sensor_db");
        return ds;
    }
}

@Service
@Slf4j
public class SensorDataService {
    
    @Autowired
    private DataSource dataSource;
    
    // 创建超级表
    public void createSuperTable() {
        String sql = """
            CREATE STABLE IF NOT EXISTS sensor_data (
                ts TIMESTAMP,
                temperature FLOAT,
                humidity FLOAT,
                pressure FLOAT
            ) TAGS (
                device_id BINARY(32),
                location BINARY(32),
                device_type BINARY(16)
            )
            """;
        
        try (Connection conn = dataSource.getConnection();
             Statement stmt = conn.createStatement()) {
            stmt.execute(sql);
        } catch (SQLException e) {
            log.error("创建超级表失败", e);
        }
    }
    
    // 创建子表并写入数据
    public void insertData(String deviceId, List<SensorReading> readings) {
        StringBuilder sql = new StringBuilder();
        sql.append("INSERT INTO ").append(deviceId).append(" VALUES");
        
        for (SensorReading r : readings) {
            sql.append(String.format(" (%d, %.2f, %.2f, %.2f)",
                r.getTimestamp().toEpochNano(),
                r.getTemperature(),
                r.getHumidity(),
                r.getPressure()));
        }
        
        try (Connection conn = dataSource.getConnection();
             Statement stmt = conn.createStatement()) {
            stmt.execute(sql.toString());
        } catch (SQLException e) {
            log.error("写入数据失败", e);
        }
    }
    
    // 查询聚合数据
    public List<Map<String, Object>> queryHourlyAvg(String deviceId, Duration duration) {
        String sql = String.format("""
            SELECT AVG(temperature) as avg_temp,
                   MAX(temperature) as max_temp,
                   MIN(temperature) as min_temp
            FROM %s
            WHERE ts > NOW() - %dh
            INTERVAL(1h)
            """, deviceId, duration.toHours());
        
        List<Map<String, Object>> results = new ArrayList<>();
        
        try (Connection conn = dataSource.getConnection();
             Statement stmt = conn.createStatement();
             ResultSet rs = stmt.executeQuery(sql)) {
            
            while (rs.next()) {
                Map<String, Object> row = new HashMap<>();
                row.put("ts", rs.getTimestamp("ts"));
                row.put("avg_temp", rs.getDouble("avg_temp"));
                row.put("max_temp", rs.getDouble("max_temp"));
                row.put("min_temp", rs.getDouble("min_temp"));
                results.add(row);
            }
        } catch (SQLException e) {
            log.error("查询失败", e);
        }
        
        return results;
    }
    
    // 按设备分组查询
    public List<Map<String, Object>> queryByTag() {
        String sql = """
            SELECT device_id, AVG(temperature), MAX(temperature)
            FROM sensor_data
            WHERE ts > NOW() - 1d
            GROUP BY device_id
            """;
        
        List<Map<String, Object>> results = new ArrayList<>();
        
        try (Connection conn = dataSource.getConnection();
             Statement stmt = conn.createStatement();
             ResultSet rs = stmt.executeQuery(sql)) {
            
            while (rs.next()) {
                Map<String, Object> row = new HashMap<>();
                row.put("device_id", rs.getString("device_id"));
                row.put("avg_temp", rs.getDouble("AVG(temperature)"));
                row.put("max_temp", rs.getDouble("MAX(temperature)"));
                results.add(row);
            }
        } catch (SQLException e) {
            log.error("查询失败", e);
        }
        
        return results;
    }
}

四、InfluxDB vs TDengine对比

维度 InfluxDB TDengine
数据模型 Measurement + Tag + Field Super Table + Sub Table
SQL支持 InfluxQL(类SQL) 标准SQL
压缩率 3-10x 10-20x
查询性能 更快(列式存储)
生态 丰富(Telegraf等) 发展中
集群 企业版支持 开源支持
学习成本
社区活跃度 较高

五、选型建议

选择InfluxDB的场景:

  • 需要丰富的生态系统
  • 需要支持Flux查询语言
  • 已有Telegraf等工具
  • 需要企业级支持

选择TDengine的场景:

  • 超大规模时序数据(百万级设备)
  • 需要高压缩比节省存储
  • 已有Kafka/Flink生态
  • 成本敏感

六、总结

时序数据库是处理IoT和监控数据的利器:

  • InfluxDB:生态丰富,上手简单
  • TDengine:超高性能,高压缩比
  • 超级表设计:便于管理和聚合
  • 连续查询:预聚合提升查询性能

实施建议:

  1. 根据数据量选择合适的数据库
  2. 合理设计表结构
  3. 使用连续查询预聚合
  4. 做好数据生命周期管理

个人观点,仅供参考

相关推荐
lishutong10062 小时前
Android 性能诊断 V2:基于 Agent Skill 的原生 IDE 融合架构
android·ide·架构
wok1572 小时前
WebMVC 和 WebFlux 架构选型
java·spring·架构·mvc
zs宝来了2 小时前
Prometheus 监控体系原理:Pull 模式与 TSDB 时序数据库
prometheus·时序数据库·监控·tsdb·pull模式
fire-flyer2 小时前
ClickHouse系列(六):Kafka 到 ClickHouse 的生产级写入架构
clickhouse·架构·kafka
Bohemian—Rhapsody2 小时前
麒麟v10-arm架构部署rabbitmq
arm开发·架构·rabbitmq
猫仍在2 小时前
Playwright 架构UI 自动化质量保障平台
ui·架构·自动化
2603_954708312 小时前
微电网主从控制架构:集中式调度与分布式执行的协同机制
人工智能·分布式·物联网·架构·系统架构·能源
小猿姐5 小时前
# KubeBlocks for MSSQL 高可用实现
数据库·架构·sql server
古译汉书11 小时前
【IoT死磕系列】Day 9:架构一台“自动驾驶物流车”,看8种协议如何协同作战
网络·arm开发·单片机·物联网·tcp/ip·架构·自动驾驶