一、为什么需要时序数据库
传统数据库处理时序数据的问题:
关系型数据库的问题:
- 写入性能差(无批量写入优化)
- 存储成本高(无压缩)
- 查询性能差(无预聚合)
- 容量有限
时序数据的特征:
- 数据按时间顺序写入
- 数据量大(IoT设备每秒产生大量数据)
- 写入多,查询少
- 通常需要聚合分析
二、InfluxDB实战
1. 核心概念
Measurement(测量)
├── Tags(标签)- 索引列
├── Fields(字段)- 值列
└── Timestamp(时间戳)
2. Docker部署
yaml
version: '3'
services:
influxdb:
image: influxdb:2.7
container_name: influxdb
ports:
- "8086:8086"
volumes:
- ./data:/var/lib/influxdb2
- ./config:/etc/influxdb2
environment:
- DOCKER_INFLUXDB_INIT_MODE=setup
- DOCKER_INFLUXDB_INIT_USERNAME=admin
- DOCKER_INFLUXDB_INIT_PASSWORD=password
- DOCKER_INFLUXDB_INIT_ORG=myorg
- DOCKER_INFLUXDB_INIT_BUCKET=metrics
3. InfluxQL查询
sql
-- 创建数据库
CREATE DATABASE sensor_data;
-- 写入数据
INSERT temperature,device_id=sensor_001,location=beijing value=25.5 1704067200000000000
-- 查询最近1小时的数据
SELECT * FROM temperature
WHERE time > now() - 1h
AND device_id = 'sensor_001'
-- 按设备分组统计
SELECT MEAN(value), MAX(value), MIN(value)
FROM temperature
WHERE time > now() - 24h
GROUP BY device_id
-- 按时间窗口聚合
SELECT MEAN(value)
FROM temperature
WHERE time > now() - 7d
GROUP BY time(1h), device_id
-- 连续查询(预聚合)
CREATE CONTINUOUS QUERY cpu_avg_1h ON sensor_data
BEGIN
SELECT MEAN(value)
INTO cpu_avg_1h
FROM cpu_usage
GROUP BY time(1h), device_id
END
4. Java客户端
xml
<dependency>
<groupId>com.influxdb</groupId>
<artifactId>influxdb-client-java</artifactId>
<version>6.8.0</version>
</dependency>
java
@Configuration
public class InfluxDBConfig {
@Bean
public InfluxDBClient influxDBClient() {
return InfluxDBClientFactory
.create("http://localhost:8086", "admin".toCharArray(), "password".toCharArray());
}
}
@Service
@Slf4j
public class MetricsService {
@Autowired
private InfluxDBClient influxDBClient;
// 写入数据
public void writeMetrics(List<Metric> metrics) {
WriteApi writeApi = influxDBClient.makeWriteApi();
List<Point> points = metrics.stream()
.map(m -> Point.measurement("sensor_data")
.addTag("device_id", m.getDeviceId())
.addTag("location", m.getLocation())
.addField("temperature", m.getTemperature())
.addField("humidity", m.getHumidity())
.addField("pressure", m.getPressure())
.time(m.getTimestamp(), WritePrecision.NS)
.build())
.collect(Collectors.toList());
writeApi.writePoints(points);
writeApi.close();
}
// 查询数据
public List<Metric> queryMetrics(String deviceId, Duration duration) {
QueryApi queryApi = influxDBClient.getQueryApi();
String flux = String.format("""
from(bucket: "sensor_data")
|> range(start: -%s)
|> filter(fn: (r) => r["device_id"] == "%s")
|> aggregateWindow(every: 1m, fn: mean)
""", duration.toString(), deviceId);
Query query = Query.builder()
.rawQuery(flux)
.org("myorg")
.build();
List<FluxRecord> records = queryApi.query(query);
return records.stream()
.map(r -> Metric.builder()
.deviceId(r.getMeasurement())
.timestamp(r.getTime())
.temperature(getFieldValue(r, "temperature"))
.humidity(getFieldValue(r, "humidity"))
.build())
.collect(Collectors.toList());
}
// 查询聚合数据
public Map<String, Double> getHourlyAvg(String deviceId, Duration duration) {
QueryApi queryApi = influxDBClient.getQueryApi();
String flux = String.format("""
from(bucket: "sensor_data")
|> range(start: -%s)
|> filter(fn: (r) => r["_measurement"] == "sensor_data")
|> filter(fn: (r) => r["device_id"] == "%s")
|> filter(fn: (r) => r["_field"] == "temperature")
|> aggregateWindow(every: 1h, fn: mean)
""", duration.toString(), deviceId);
// 处理结果
return new HashMap<>();
}
}
三、TDengine实战
1. 核心概念
TDengine采用超级表+子表的设计:
超级表(Super Table)
├── 标签(Tag)- 静态属性
├── 表(Sub Table)- 每个设备一张表
└── 数据(Columns)- 时序数据
2. Docker部署
yaml
version: '3'
services:
tdengine:
image: tdengine/tdengine:3.0
container_name: tdengine
ports:
- "6030:6030" # REST API
- "6035:6035" # TCP
volumes:
- ./data:/var/lib/taos
- ./log:/var/log/taos
environment:
- TZ=Asia/Shanghai
3. SQL语法
sql
-- 创建数据库
CREATE DATABASE sensor_db KEEP 365 DAYS BLOCKS 10;
-- 创建超级表
CREATE STABLE sensor_data (
ts TIMESTAMP,
temperature FLOAT,
humidity FLOAT,
pressure FLOAT
) TAGS (
device_id BINARY(32),
location BINARY(32),
device_type BINARY(16)
);
-- 创建子表(自动创建)
CREATE TABLE t_001 USING sensor_data TAGS('sensor_001', 'beijing', 'indoor');
CREATE TABLE t_002 USING sensor_data TAGS('sensor_002', 'shanghai', 'outdoor');
-- 写入数据
INSERT INTO t_001 VALUES('2024-01-15 10:00:00.000', 25.5, 60.0, 101.3);
INSERT INTO t_001 VALUES('2024-01-15 10:01:00.000', 25.6, 60.1, 101.3);
-- 批量写入
INSERT INTO t_001 VALUES
('2024-01-15 10:00:00.000', 25.5, 60.0, 101.3),
('2024-01-15 10:01:00.000', 25.6, 60.1, 101.3),
('2024-01-15 10:02:00.000', 25.7, 60.2, 101.3);
-- 按时间窗口聚合
SELECT AVG(temperature), MAX(temperature), MIN(temperature)
FROM sensor_data
WHERE ts > '2024-01-15 00:00:00'
AND ts < '2024-01-16 00:00:00'
PARTITION BY TBNAME;
-- 按标签分组聚合
SELECT device_id, AVG(temperature), MAX(temperature)
FROM sensor_data
WHERE ts > NOW() - 1d
GROUP BY device_id;
-- 降采样查询
SELECT AVG(temperature)
FROM sensor_data
WHERE ts > NOW() - 7d
INTERVAL(1h);
-- 连续查询
CREATE CQ cpu_avg_1h ON sensor_data
BEGIN
SELECT AVG(temperature)
INTO cpu_avg_1h
FROM sensor_data
PARTITION BY TBNAME
INTERVAL(1h)
END;
4. Java客户端
xml
<dependency>
<groupId>com.taosdata.jdbc</groupId>
<artifactId>taos-jdbcdriver</artifactId>
<version>3.2.10</version>
</dependency>
java
@Configuration
public class TDengineConfig {
@Bean
public DataSource dataSource() {
TDengineDataSource ds = new TDengineDataSource();
ds.setHost("localhost");
ds.setPort(6030);
ds.setUsername("root");
ds.setPassword("taosdata");
ds.setDbName("sensor_db");
return ds;
}
}
@Service
@Slf4j
public class SensorDataService {
@Autowired
private DataSource dataSource;
// 创建超级表
public void createSuperTable() {
String sql = """
CREATE STABLE IF NOT EXISTS sensor_data (
ts TIMESTAMP,
temperature FLOAT,
humidity FLOAT,
pressure FLOAT
) TAGS (
device_id BINARY(32),
location BINARY(32),
device_type BINARY(16)
)
""";
try (Connection conn = dataSource.getConnection();
Statement stmt = conn.createStatement()) {
stmt.execute(sql);
} catch (SQLException e) {
log.error("创建超级表失败", e);
}
}
// 创建子表并写入数据
public void insertData(String deviceId, List<SensorReading> readings) {
StringBuilder sql = new StringBuilder();
sql.append("INSERT INTO ").append(deviceId).append(" VALUES");
for (SensorReading r : readings) {
sql.append(String.format(" (%d, %.2f, %.2f, %.2f)",
r.getTimestamp().toEpochNano(),
r.getTemperature(),
r.getHumidity(),
r.getPressure()));
}
try (Connection conn = dataSource.getConnection();
Statement stmt = conn.createStatement()) {
stmt.execute(sql.toString());
} catch (SQLException e) {
log.error("写入数据失败", e);
}
}
// 查询聚合数据
public List<Map<String, Object>> queryHourlyAvg(String deviceId, Duration duration) {
String sql = String.format("""
SELECT AVG(temperature) as avg_temp,
MAX(temperature) as max_temp,
MIN(temperature) as min_temp
FROM %s
WHERE ts > NOW() - %dh
INTERVAL(1h)
""", deviceId, duration.toHours());
List<Map<String, Object>> results = new ArrayList<>();
try (Connection conn = dataSource.getConnection();
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(sql)) {
while (rs.next()) {
Map<String, Object> row = new HashMap<>();
row.put("ts", rs.getTimestamp("ts"));
row.put("avg_temp", rs.getDouble("avg_temp"));
row.put("max_temp", rs.getDouble("max_temp"));
row.put("min_temp", rs.getDouble("min_temp"));
results.add(row);
}
} catch (SQLException e) {
log.error("查询失败", e);
}
return results;
}
// 按设备分组查询
public List<Map<String, Object>> queryByTag() {
String sql = """
SELECT device_id, AVG(temperature), MAX(temperature)
FROM sensor_data
WHERE ts > NOW() - 1d
GROUP BY device_id
""";
List<Map<String, Object>> results = new ArrayList<>();
try (Connection conn = dataSource.getConnection();
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(sql)) {
while (rs.next()) {
Map<String, Object> row = new HashMap<>();
row.put("device_id", rs.getString("device_id"));
row.put("avg_temp", rs.getDouble("AVG(temperature)"));
row.put("max_temp", rs.getDouble("MAX(temperature)"));
results.add(row);
}
} catch (SQLException e) {
log.error("查询失败", e);
}
return results;
}
}
四、InfluxDB vs TDengine对比
| 维度 | InfluxDB | TDengine |
|---|---|---|
| 数据模型 | Measurement + Tag + Field | Super Table + Sub Table |
| SQL支持 | InfluxQL(类SQL) | 标准SQL |
| 压缩率 | 3-10x | 10-20x |
| 查询性能 | 快 | 更快(列式存储) |
| 生态 | 丰富(Telegraf等) | 发展中 |
| 集群 | 企业版支持 | 开源支持 |
| 学习成本 | 中 | 低 |
| 社区活跃度 | 高 | 较高 |
五、选型建议
选择InfluxDB的场景:
- 需要丰富的生态系统
- 需要支持Flux查询语言
- 已有Telegraf等工具
- 需要企业级支持
选择TDengine的场景:
- 超大规模时序数据(百万级设备)
- 需要高压缩比节省存储
- 已有Kafka/Flink生态
- 成本敏感
六、总结
时序数据库是处理IoT和监控数据的利器:
- InfluxDB:生态丰富,上手简单
- TDengine:超高性能,高压缩比
- 超级表设计:便于管理和聚合
- 连续查询:预聚合提升查询性能
实施建议:
- 根据数据量选择合适的数据库
- 合理设计表结构
- 使用连续查询预聚合
- 做好数据生命周期管理
个人观点,仅供参考