Apache Flink State TTL (Time-To-Live) 详解
1. 基本概念
State TTL(Time-To-Live,生存时间)是 Flink 提供的一种状态管理机制,允许为状态设置过期时间。当状态数据超过设定的生存时间后,Flink 会自动清理这些过期数据,从而避免状态无限增长。
1.1 核心特性
- 自动清理:过期状态数据会被自动清理
- 灵活配置:支持多种更新和清理策略
- 性能优化:减少状态存储空间和访问开销
- 兼容性:与所有状态类型兼容
1.2 工作原理
State TTL 通过以下方式工作:
- 为每个状态值关联一个时间戳
- 根据配置的 TTL 策略判断状态是否过期
- 在适当的时候清理过期状态
2. 适用场景
2.1 会话窗口状态管理
java
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
/**
* 会话窗口状态 TTL 示例
* 管理用户会话状态,过期自动清理
*/
public class SessionStateTTLExample {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 创建用户活动数据流
DataStream<UserActivity> activities = env.fromElements(
new UserActivity("user1", 1000L, "login"),
new UserActivity("user1", 2000L, "browse"),
new UserActivity("user2", 3000L, "login"),
new UserActivity("user1", 8000L, "logout") // 会话间隔超过5秒
);
// 使用会话窗口处理用户活动,配置状态 TTL
activities
.assignTimestampsAndWatermarks(WatermarkStrategy
.<UserActivity>forMonotonousTimestamps()
.withTimestampAssigner((event, timestamp) -> event.timestamp))
.keyBy(activity -> activity.userId)
.window(EventTimeSessionWindows.withGap(Time.seconds(5)))
.process(new SessionWindowFunction())
.print();
env.execute("Session State TTL Example");
}
/**
* 会话窗口处理函数
*/
public static class SessionWindowFunction extends ProcessWindowFunction<UserActivity, String, String, TimeWindow> {
@Override
public void process(String userId, Context context, Iterable<UserActivity> activities, Collector<String> out) {
StringBuilder sessionActivities = new StringBuilder();
long startTime = Long.MAX_VALUE;
long endTime = Long.MIN_VALUE;
for (UserActivity activity : activities) {
sessionActivities.append(activity.action).append(" ");
startTime = Math.min(startTime, activity.timestamp);
endTime = Math.max(endTime, activity.timestamp);
}
out.collect("User " + userId + " session: " + sessionActivities.toString().trim() +
" (Duration: " + (endTime - startTime) + "ms)");
}
}
/**
* 用户活动记录
*/
public static class UserActivity {
public String userId;
public long timestamp;
public String action;
public UserActivity() {}
public UserActivity(String userId, long timestamp, String action) {
this.userId = userId;
this.timestamp = timestamp;
this.action = action;
}
}
}
2.2 缓存状态管理
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
/**
* 缓存状态 TTL 示例
* 缓存用户信息,过期自动清理
*/
public class CacheStateTTLExample extends KeyedProcessFunction<String, UserProfileRequest, String> {
private ValueState<UserProfile> cacheState;
@Override
public void open(Configuration parameters) {
// 配置状态 TTL
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.minutes(30)) // 30分钟过期
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) // 创建和写入时更新时间戳
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) // 不返回过期状态
.cleanupFullSnapshot() // 在完整快照中清理
.build();
ValueStateDescriptor<UserProfile> descriptor = new ValueStateDescriptor<>(
"user-profile-cache",
UserProfile.class
);
descriptor.enableTimeToLive(ttlConfig);
cacheState = getRuntimeContext().getState(descriptor);
}
@Override
public void processElement(UserProfileRequest request, Context ctx, Collector<String> out) throws Exception {
UserProfile profile = cacheState.value();
if (profile == null) {
// 缓存未命中,模拟从外部系统获取数据
profile = fetchUserProfileFromExternalSystem(request.userId);
cacheState.update(profile);
out.collect("Cache miss for user " + request.userId + ", fetched and cached: " + profile);
} else {
// 缓存命中
out.collect("Cache hit for user " + request.userId + ": " + profile);
}
}
/**
* 模拟从外部系统获取用户信息
*/
private UserProfile fetchUserProfileFromExternalSystem(String userId) {
// 模拟外部系统调用延迟
try {
Thread.sleep(100);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
return new UserProfile(userId, "Name_" + userId, System.currentTimeMillis());
}
/**
* 用户信息请求
*/
public static class UserProfileRequest {
public String userId;
public UserProfileRequest() {}
public UserProfileRequest(String userId) {
this.userId = userId;
}
}
/**
* 用户信息
*/
public static class UserProfile {
public String userId;
public String name;
public long timestamp;
public UserProfile() {}
public UserProfile(String userId, String name, long timestamp) {
this.userId = userId;
this.name = name;
this.timestamp = timestamp;
}
@Override
public String toString() {
return "UserProfile{userId='" + userId + "', name='" + name + "', timestamp=" + timestamp + "}";
}
}
}
3. State TTL 配置详解
3.1 基本配置
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* State TTL 基本配置示例
*/
public class BasicStateTTLConfiguration {
public static ValueStateDescriptor<String> configureBasicTTL() {
// 创建 TTL 配置
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1)) // 设置 TTL 为 1 小时
.build();
// 应用到状态描述符
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"basic-ttl-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
3.2 更新类型配置
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* State TTL 更新类型配置示例
*/
public class UpdateTypeConfiguration {
/**
* OnCreateAndWrite 更新类型
* 在创建和写入状态时更新时间戳
*/
public static ValueStateDescriptor<String> configureOnCreateAndWrite() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"on-create-write-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
/**
* OnReadAndWrite 更新类型
* 在读取和写入状态时更新时间戳
*/
public static ValueStateDescriptor<String> configureOnReadAndWrite() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.setUpdateType(StateTtlConfig.UpdateType.OnReadAndWrite)
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"on-read-write-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
3.3 状态可见性配置
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* State TTL 状态可见性配置示例
*/
public class StateVisibilityConfiguration {
/**
* NeverReturnExpired 可见性
* 永远不返回过期状态
*/
public static ValueStateDescriptor<String> configureNeverReturnExpired() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"never-return-expired-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
/**
* ReturnExpiredIfNotCleanedUp 可见性
* 如果未清理则返回过期状态
*/
public static ValueStateDescriptor<String> configureReturnExpiredIfNotCleanedUp() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.setStateVisibility(StateTtlConfig.StateVisibility.ReturnExpiredIfNotCleanedUp)
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"return-expired-if-not-cleaned-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
3.4 清理策略配置
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* State TTL 清理策略配置示例
*/
public class CleanupStrategyConfiguration {
/**
* 完整快照清理策略
* 在完整状态快照中清理过期状态
*/
public static ValueStateDescriptor<String> configureFullSnapshotCleanup() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.cleanupFullSnapshot()
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"full-snapshot-cleanup-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
/**
* 增量清理策略
* 在处理过程中增量清理过期状态
*/
public static ValueStateDescriptor<String> configureIncrementalCleanup() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.cleanupIncrementally(10, true) // 每次清理10个条目,启用清理
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"incremental-cleanup-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
/**
* RocksDB 紧凑清理策略
* 使用 RocksDB 紧凑操作清理过期状态
*/
public static ValueStateDescriptor<String> configureRocksDBCompactCleanup() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.cleanupInRocksdbCompactFilter(1000) // 每1000次紧凑操作清理一次
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"rocksdb-compact-cleanup-state",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
4. 完整配置示例
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
/**
* State TTL 完整配置示例
*/
public class CompleteStateTTLExample extends KeyedProcessFunction<String, String, String> {
private ValueState<String> completeTTLState;
@Override
public void open(Configuration parameters) {
// 完整的 State TTL 配置
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.minutes(30)) // 30分钟 TTL
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) // 创建和写入时更新
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) // 不返回过期状态
.cleanupIncrementally(10, true) // 增量清理,每次10个条目
.build();
ValueStateDescriptor<String> descriptor = new ValueStateDescriptor<>(
"complete-ttl-state",
String.class,
"default-value" // 默认值
);
descriptor.enableTimeToLive(ttlConfig);
completeTTLState = getRuntimeContext().getState(descriptor);
}
@Override
public void processElement(String value, Context ctx, Collector<String> out) throws Exception {
// 获取当前状态值
String currentValue = completeTTLState.value();
// 更新状态值
completeTTLState.update(value);
// 输出结果
out.collect("Key: " + ctx.getCurrentKey() + ", Previous: " + currentValue + ", Current: " + value);
}
}
5. 不同状态类型的 TTL 配置
5.1 ValueState TTL
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* ValueState TTL 配置示例
*/
public class ValueStateTTLExample {
public static ValueStateDescriptor<Integer> configureValueStateTTL() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(1))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.cleanupFullSnapshot()
.build();
ValueStateDescriptor<Integer> descriptor = new ValueStateDescriptor<>(
"value-state-ttl",
Integer.class,
0
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
5.2 ListState TTL
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* ListState TTL 配置示例
*/
public class ListStateTTLExample {
public static ListStateDescriptor<String> configureListStateTTL() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(2))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.cleanupIncrementally(5, true)
.build();
ListStateDescriptor<String> descriptor = new ListStateDescriptor<>(
"list-state-ttl",
String.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
5.3 MapState TTL
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.time.Time;
/**
* MapState TTL 配置示例
*/
public class MapStateTTLExample {
public static MapStateDescriptor<String, Integer> configureMapStateTTL() {
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.hours(3))
.setUpdateType(StateTtlConfig.UpdateType.OnReadAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.ReturnExpiredIfNotCleanedUp)
.cleanupInRocksdbCompactFilter(100)
.build();
MapStateDescriptor<String, Integer> descriptor = new MapStateDescriptor<>(
"map-state-ttl",
String.class,
Integer.class
);
descriptor.enableTimeToLive(ttlConfig);
return descriptor;
}
}
6. 实际应用场景
6.1 会话超时检测
java
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.util.Collector;
/**
* 会话超时检测示例
* 使用 State TTL 自动清理超时会话
*/
public class SessionTimeoutDetection {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 创建用户活动数据流
DataStream<UserEvent> events = env.fromElements(
new UserEvent("user1", 1000L, "login"),
new UserEvent("user1", 2000L, "browse"),
new UserEvent("user2", 3000L, "login"),
new UserEvent("user1", 4000L, "browse"),
new UserEvent("user2", 5000L, "browse")
// user1 在 9000ms 时超时(超过5秒无活动)
);
// 处理用户事件,检测会话超时
events
.assignTimestampsAndWatermarks(WatermarkStrategy
.<UserEvent>forMonotonousTimestamps()
.withTimestampAssigner((event, timestamp) -> event.timestamp))
.keyBy(event -> event.userId)
.process(new SessionTimeoutFunction())
.print();
env.execute("Session Timeout Detection");
}
/**
* 会话超时处理函数
*/
public static class SessionTimeoutFunction extends KeyedProcessFunction<String, UserEvent, String> {
private ValueState<Long> lastActivityTimeState;
@Override
public void open(Configuration parameters) {
// 配置状态 TTL 为 5 秒
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.seconds(5))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.cleanupFullSnapshot()
.build();
ValueStateDescriptor<Long> descriptor = new ValueStateDescriptor<>(
"last-activity-time",
Long.class
);
descriptor.enableTimeToLive(ttlConfig);
lastActivityTimeState = getRuntimeContext().getState(descriptor);
}
@Override
public void processElement(UserEvent event, Context ctx, Collector<String> out) throws Exception {
// 更新最后活动时间
lastActivityTimeState.update(event.timestamp);
// 设置 5 秒后检查会话是否超时
ctx.timerService().registerEventTimeTimer(event.timestamp + 5000);
out.collect("User " + event.userId + " performed " + event.action + " at " + event.timestamp);
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
// 检查会话是否仍然存在
Long lastActivityTime = lastActivityTimeState.value();
if (lastActivityTime != null && timestamp - lastActivityTime >= 5000) {
out.collect("Session timeout for user " + ctx.getCurrentKey() + " at " + timestamp);
}
}
}
/**
* 用户事件
*/
public static class UserEvent {
public String userId;
public long timestamp;
public String action;
public UserEvent() {}
public UserEvent(String userId, long timestamp, String action) {
this.userId = userId;
this.timestamp = timestamp;
this.action = action;
}
}
}
6.2 热点数据统计
java
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
/**
* 热点数据统计示例
* 使用 State TTL 自动清理不活跃的数据项
*/
public class HotDataStatistics extends KeyedProcessFunction<String, DataItem, String> {
private MapState<String, Integer> itemCountState;
@Override
public void open(Configuration parameters) {
// 配置状态 TTL 为 10 分钟
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.minutes(10))
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
.cleanupIncrementally(20, true)
.build();
MapStateDescriptor<String, Integer> descriptor = new MapStateDescriptor<>(
"item-count",
String.class,
Integer.class
);
descriptor.enableTimeToLive(ttlConfig);
itemCountState = getRuntimeContext().getMapState(descriptor);
}
@Override
public void processElement(DataItem item, Context ctx, Collector<String> out) throws Exception {
// 更新数据项计数
Integer currentCount = itemCountState.get(item.itemId);
if (currentCount == null) {
currentCount = 0;
}
itemCountState.put(item.itemId, currentCount + 1);
// 输出当前统计
out.collect("Category " + ctx.getCurrentKey() + " - Item " + item.itemId +
": " + (currentCount + 1) + " times");
// 定期输出热点数据(每100个事件)
if ((currentCount + 1) % 100 == 0) {
out.collect("HOT ITEM in category " + ctx.getCurrentKey() + ": " + item.itemId +
" (count: " + (currentCount + 1) + ")");
}
}
/**
* 数据项
*/
public static class DataItem {
public String category;
public String itemId;
public DataItem() {}
public DataItem(String category, String itemId) {
this.category = category;
this.itemId = itemId;
}
}
}
7. 最佳实践建议
7.1 配置建议
-
合理设置 TTL 时间:
- 根据业务需求设置合适的过期时间
- 避免设置过短的 TTL 导致频繁清理
- 避免设置过长的 TTL 导致状态膨胀
-
选择合适的更新类型:
- OnCreateAndWrite:适用于写入频繁但读取较少的场景
- OnReadAndWrite:适用于读取和写入都频繁的场景
-
选择合适的可见性策略:
- NeverReturnExpired:确保不返回过期数据
- ReturnExpiredIfNotCleanedUp:在清理不及时时仍可访问数据
7.2 性能优化建议
-
清理策略选择:
- 小状态:使用 cleanupFullSnapshot
- 中等状态:使用 cleanupIncrementally
- 大状态(RocksDB):使用 cleanupInRocksdbCompactFilter
-
增量清理配置:
- 合理设置每次清理的条目数
- 避免清理操作影响主处理流程
-
监控和调优:
- 监控状态大小和增长趋势
- 根据实际使用情况调整 TTL 配置
7.3 注意事项
-
状态恢复:
- 从检查点恢复时,过期状态可能仍存在
- 需要处理恢复后状态的清理
-
兼容性:
- State TTL 需要 Flink 1.6+ 版本支持
- 确保所有节点版本一致
-
测试验证:
- 充分测试 TTL 行为
- 验证清理策略的有效性
通过合理使用 State TTL,可以有效管理 Flink 应用程序中的状态生命周期,避免状态无限增长,提高系统性能和稳定性。