去重 是流处理中非常重要的操作,它确保只发射与上一个值不同的值,避免不必要的处理和更新。
一、基本概念和用法
1. 基本语法
kotlin
// 基本用法:基于 equals 比较
fun <T> Flow<T>.distinctUntilChanged(): Flow<T>
// 自定义比较器
fun <T> Flow<T>.distinctUntilChanged(
areEquivalent: (old: T, new: T) -> Boolean
): Flow<T>
// 基于 key 的比较
fun <T, K> Flow<T>.distinctUntilChanged(
keySelector: (T) -> K
): Flow<T>
2. 工作原理
text
输入流: A → A → B → B → C → A → A
↓ ↓ ↓ ↓ ↓ ↓
distinct: ✓ ✗ ✓ ✗ ✓ ✓ ✗
输出流: A B C A
二、基本使用示例
1. 基本数据类型去重
kotlin
fun main() = runBlocking {
val numbers = flow {
emit(1)
emit(1) // 重复,被过滤
emit(2)
emit(2) // 重复,被过滤
emit(1) // 与前一个不同,发射
emit(3)
emit(3) // 重复,被过滤
}
numbers
.distinctUntilChanged()
.collect { println("去重结果: $it") }
// 输出: 1, 2, 1, 3
}
2. 字符串去重
kotlin
fun searchFlowExample() = runBlocking {
val searchQueries = flow {
emit("apple")
emit("apple") // 重复
emit("banana")
emit("banana") // 重复
emit("apple") // 再次出现,但与前一个不同
emit("cherry")
}
searchQueries
.distinctUntilChanged()
.onEach { query ->
println("搜索: $query")
// 这里可以触发搜索请求
}
.collect()
// 输出: apple, banana, apple, cherry
}
三、自定义对象去重
1. 数据类去重
kotlin
data class User(val id: Int, val name: String, val age: Int)
fun main() = runBlocking {
val userFlow = flow {
emit(User(1, "Alice", 25))
emit(User(1, "Alice", 25)) // 完全相同,被过滤
emit(User(2, "Bob", 30))
emit(User(1, "Alice", 26)) // age不同,发射
emit(User(1, "Alice", 26)) // 重复,被过滤
}
userFlow
.distinctUntilChanged()
.collect { println("User: ${it.id}, ${it.name}, ${it.age}") }
// 输出: User(1,Alice,25), User(2,Bob,30), User(1,Alice,26)
}
2. 自定义比较逻辑
kotlin
data class Product(
val id: String,
val name: String,
val price: Double,
val lastUpdated: Long
)
fun main() = runBlocking {
val productFlow = flow {
emit(Product("P1", "Phone", 999.99, 1000))
emit(Product("P1", "Phone", 899.99, 2000)) // 价格变化,发射
emit(Product("P1", "Phone", 899.99, 3000)) // 仅时间变化,可过滤
}
// 只关心ID和价格是否变化
productFlow
.distinctUntilChanged { old, new ->
old.id == new.id && old.price == new.price
}
.collect { println("产品更新: ${it.name} - ${it.price}") }
// 输出: Phone - 999.99, Phone - 899.99
}
3. 使用 keySelector
kotlin
fun main() = runBlocking {
val products = flow {
emit(Product("P1", "Phone", 999.99, 1000))
emit(Product("P1", "Phone Pro", 1299.99, 2000)) // 名称变化,但ID相同
emit(Product("P2", "Tablet", 499.99, 3000))
emit(Product("P1", "Phone", 999.99, 4000)) // ID重复,被过滤
}
// 只根据ID去重
products
.distinctUntilChanged { it.id }
.collect { println("按ID去重: ${it.id} - ${it.name}") }
// 输出: P1 - Phone, P2 - Tablet
// 或者根据多个属性
products
.distinctUntilChanged { "${it.id}-${it.price}" }
.collect { println("按ID和价格去重: ${it.id} - ${it.price}") }
// 输出: P1 - 999.99, P1 - 1299.99, P2 - 499.99, P1 - 999.99
}
四、复杂场景应用
1. 列表/集合去重
kotlin
fun main() = runBlocking {
val listFlow = flow {
emit(listOf(1, 2, 3))
emit(listOf(1, 2, 3)) // 相同列表,被过滤
emit(listOf(1, 2))
emit(listOf(1, 2, 3, 4))
emit(listOf(1, 2, 3, 4)) // 重复,被过滤
}
// 方法1:直接比较(结构相等)
listFlow
.distinctUntilChanged()
.collect { println("列表: $it") }
// 输出: [1, 2, 3], [1, 2], [1, 2, 3, 4]
// 方法2:只根据大小去重
listFlow
.distinctUntilChanged { old, new ->
old.size == new.size
}
.collect { println("按大小去重: $it") }
// 输出: [1, 2, 3], [1, 2], [1, 2, 3, 4]
}
2. 网络状态管理
kotlin
sealed class NetworkState {
object Loading : NetworkState()
data class Success(val data: String) : NetworkState()
data class Error(val message: String) : NetworkState()
object Idle : NetworkState()
}
class NetworkViewModel {
private val _networkState = MutableStateFlow<NetworkState>(NetworkState.Idle)
val uiState = _networkState
.distinctUntilChanged()
.map { state ->
when (state) {
is NetworkState.Loading -> UiState.Loading
is NetworkState.Success -> UiState.Success(state.data)
is NetworkState.Error -> UiState.Error(state.message)
is NetworkState.Idle -> UiState.Idle
}
}
.stateIn(viewModelScope, SharingStarted.Lazily, UiState.Idle)
// 防止重复的 Loading 状态
fun loadData() {
viewModelScope.launch {
_networkState.value = NetworkState.Loading
// 模拟网络请求
delay(1000)
_networkState.value = NetworkState.Success("Data loaded")
// 再次设置相同的 Success 状态(会被过滤)
_networkState.value = NetworkState.Success("Data loaded")
}
}
}
3. 实时数据流处理
kotlin
data class SensorData(
val sensorId: String,
val value: Double,
val timestamp: Long
)
class SensorMonitor {
private val _sensorReadings = MutableSharedFlow<SensorData>()
val processedReadings = _sensorReadings
.filter { it.value.isFinite() } // 过滤无效值
.distinctUntilChanged { old, new ->
// 只有值变化超过阈值才视为不同
abs(old.value - new.value) < 0.01 &&
old.sensorId == new.sensorId
}
.map { reading ->
// 数据转换
ProcessedReading(
id = reading.sensorId,
value = reading.value,
normalized = normalize(reading.value),
timestamp = reading.timestamp
)
}
.shareIn(
scope = CoroutineScope(Dispatchers.Default),
started = SharingStarted.Lazily,
replay = 1
)
}
五、高级用法和技巧
1. 多条件组合去重
kotlin
data class Order(
val id: String,
val status: OrderStatus,
val amount: Double,
val customerId: String
)
enum class OrderStatus { PENDING, PROCESSING, SHIPPED, DELIVERED }
fun main() = runBlocking {
val orderFlow = flow {
emit(Order("O1", OrderStatus.PENDING, 100.0, "C1"))
emit(Order("O1", OrderStatus.PROCESSING, 100.0, "C1")) // 状态变化
emit(Order("O1", OrderStatus.PROCESSING, 150.0, "C1")) // 金额变化
emit(Order("O1", OrderStatus.PROCESSING, 150.0, "C1")) // 完全重复
emit(Order("O2", OrderStatus.PENDING, 200.0, "C2")) // 新订单
}
// 复杂比较逻辑
orderFlow
.distinctUntilChanged { old, new ->
old.id == new.id &&
old.status == new.status &&
old.amount == new.amount &&
old.customerId == new.customerId
}
.collect { println("订单更新: ${it.id} - ${it.status} - ${it.amount}") }
}
2. 基于时间的去重
kotlin
data class TimedEvent(
val eventId: String,
val data: Any,
val timestamp: Long
)
fun <T> Flow<T>.distinctUntilChangedByTime(
timeWindow: Long,
timestampExtractor: (T) -> Long
): Flow<T> = flow {
var lastEmitted: T? = null
var lastEmissionTime = 0L
collect { value ->
val currentTime = timestampExtractor(value)
if (lastEmitted == null ||
value != lastEmitted ||
currentTime - lastEmissionTime >= timeWindow) {
emit(value)
lastEmitted = value
lastEmissionTime = currentTime
}
}
}
// 使用示例
timedEventFlow
.distinctUntilChangedByTime(
timeWindow = 5000, // 5秒内重复事件被过滤
timestampExtractor = { it.timestamp }
)
3. 窗口化去重
kotlin
/**
* 在滑动窗口内去重
*/
fun <T> Flow<T>.distinctInWindow(
windowSize: Int,
areEquivalent: (T, T) -> Boolean = { a, b -> a == b }
): Flow<T> = flow {
val window = ArrayDeque<T>()
collect { value ->
// 检查窗口内是否有重复
val isDuplicate = window.any { areEquivalent(it, value) }
if (!isDuplicate) {
emit(value)
}
// 维护窗口大小
window.addLast(value)
if (window.size > windowSize) {
window.removeFirst()
}
}
}
// 使用:在最近的10个元素中去重
searchQueryFlow
.distinctInWindow(10)
.collect { /* ... */ }
六、与其他操作符的组合
1. 与防抖、节流组合
kotlin
class SearchViewModel {
private val _query = MutableStateFlow("")
val searchResults = _query
.debounce(300) // 防抖:停止输入300ms
.filter { it.length >= 2 } // 过滤短查询
.distinctUntilChanged() // 去重:相同查询不重复搜索
.flatMapLatest { query ->
performSearch(query)
.retry(2) // 失败重试
.catch { emit(emptyList()) }
}
.stateIn(viewModelScope, SharingStarted.Lazily, emptyList())
}
// 实时数据监控组合
sensorFlow
.throttleFirst(100) // 节流:100ms采样一次
.filter { it.isValid } // 过滤无效数据
.distinctUntilChanged { old, new ->
abs(old.value - new.value) > 0.1 // 显著变化才视为不同
}
.onEach { processData(it) }
.collect()
2. 与转换操作符组合
kotlin
// 在转换后去重
userFlow
.map { user ->
UserView(
id = user.id,
displayName = "${user.firstName} ${user.lastName}",
initials = "${user.firstName.first()}${user.lastName.first()}"
)
}
.distinctUntilChanged { old, new ->
old.displayName == new.displayName
}
.collect()
// 在合并流中去重
val mergedFlow = merge(
localDataFlow,
remoteDataFlow
).distinctUntilChanged { old, new ->
old.id == new.id && old.version == new.version
}
3. 与状态管理组合
kotlin
class StateManager {
private val _state = MutableStateFlow(AppState.initial())
val uiState = _state
.map { state ->
UiState(
isLoading = state.isLoading,
data = state.data,
error = state.error,
timestamp = System.currentTimeMillis()
)
}
.distinctUntilChanged { old, new ->
// 忽略时间戳变化,只关注实际状态
old.isLoading == new.isLoading &&
old.data == new.data &&
old.error == new.error
}
.stateIn(viewModelScope, SharingStarted.Lazily, UiState.initial())
}
七、性能优化和注意事项
1. 避免不必要的比较
kotlin
// 不好的写法:每次都创建新对象
dataFlow
.map { DataWrapper(it) } // 每次都创建新对象
.distinctUntilChanged() // 总是不同,去重无效
// 好的写法:缓存或重用对象
val cachedData = mutableMapOf<Int, DataWrapper>()
dataFlow
.map { value ->
cachedData.getOrPut(value.id) { DataWrapper(value) }
}
.distinctUntilChanged() // 现在可以正确去重
2. 合理选择比较策略
kotlin
// 场景1:精确比较(默认)
.distinctUntilChanged()
// 场景2:基于关键字段
.distinctUntilChanged { it.id }
// 场景3:自定义比较逻辑
.distinctUntilChanged { old, new ->
old.primaryKey == new.primaryKey &&
old.status == new.status
}
// 场景4:忽略某些字段
.distinctUntilChanged { old, new ->
old.copy(timestamp = 0) == new.copy(timestamp = 0)
}
3. 内存管理
kotlin
class MemoryEfficientDistinct<T, K>(
private val keySelector: (T) -> K,
private val cacheSize: Int = 100
) {
private val cache = LinkedHashMap<K, Unit>(cacheSize, 0.75f, true)
suspend fun process(flow: Flow<T>): Flow<T> = flow {
flow.collect { value ->
val key = keySelector(value)
if (!cache.containsKey(key)) {
emit(value)
cache[key] = Unit
// 保持缓存大小
if (cache.size > cacheSize) {
val eldest = cache.entries.first()
cache.remove(eldest.key)
}
}
}
}
}
八、测试去重行为
1. 单元测试示例
kotlin
@Test
fun testDistinctUntilChanged() = runTest {
// 准备测试数据
val testFlow = flow {
emit(1)
emit(1) // 应该被过滤
emit(2)
emit(2) // 应该被过滤
emit(1) // 应该发射
emit(3)
}
val results = mutableListOf<Int>()
val job = launch {
testFlow.distinctUntilChanged().collect { results.add(it) }
}
// 等待流完成
advanceUntilIdle()
job.cancel()
// 验证结果
assertEquals(listOf(1, 2, 1, 3), results)
}
@Test
fun testDistinctUntilChangedWithKey() = runTest {
data class Item(val id: Int, val value: String)
val flow = flow {
emit(Item(1, "A"))
emit(Item(1, "B")) // ID相同,被过滤
emit(Item(2, "C"))
emit(Item(1, "D")) // ID重复但与前一个不同,发射
}
val results = mutableListOf<Item>()
flow.distinctUntilChanged { it.id }
.collect { results.add(it) }
assertEquals(3, results.size)
assertEquals(1, results[0].id)
assertEquals(2, results[1].id)
assertEquals(1, results[2].id)
}
2. 性能测试
kotlin
@Test
fun testDistinctPerformance() = runTest {
val largeFlow = flow {
repeat(100000) {
emit(it % 100) // 大量重复数据
}
}
val startTime = System.currentTimeMillis()
val distinctCount = largeFlow
.distinctUntilChanged()
.count()
val endTime = System.currentTimeMillis()
println("处理耗时: ${endTime - startTime}ms")
println("去重后数量: $distinctCount") // 应该是100
}
九、常见问题和解决方案
1. 数据类 equals 问题
kotlin
// 问题:数据类包含时间戳,总是不同
data class Event(val id: String, val data: Any, val timestamp: Long)
// 解决方案1:自定义比较器
eventFlow.distinctUntilChanged { old, new ->
old.id == new.id && old.data == new.data
}
// 解决方案2:创建比较专用的副本
eventFlow
.map { it.copy(timestamp = 0) }
.distinctUntilChanged()
2. 异步数据竞争
kotlin
// 问题:快速连续更新可能导致中间状态被跳过
val stateFlow = MutableStateFlow(State.A)
// 快速更新
viewModelScope.launch {
stateFlow.value = State.B
stateFlow.value = State.C // A→B→C,但B可能被跳过
}
// 解决方案:确保重要状态不被过滤
stateFlow
.distinctUntilChanged { old, new ->
// 允许特定状态的重复
if (old == State.LOADING && new == State.LOADING) {
false // 不视为相同,允许重复发射
} else {
old == new
}
}
3. 首次发射问题
kotlin
// distinctUntilChanged 会发射第一个元素
// 如果需要跳过初始值:
val flowWithInitial = flowOf("initial", "value1", "value1", "value2")
flowWithInitial
.drop(1) // 跳过初始值
.distinctUntilChanged()
.collect()
十、最佳实践总结
-
选择合适的去重策略
- 简单相等比较:
.distinctUntilChanged() - 基于关键字段:
.distinctUntilChanged { it.key } - 自定义比较逻辑:提供比较函数
- 简单相等比较:
-
结合具体场景
- UI状态:防止界面闪烁
- 网络请求:避免重复请求
- 数据同步:忽略不必要更新
- 实时数据:过滤微小变化
-
性能考虑
- 对于大数据集,考虑使用基于key的去重
- 避免在去重前创建大量临时对象
- 合理使用缓存
-
测试验证
- 验证去重逻辑是否正确
- 测试边界条件和竞争情况
- 监控内存使用情况
distinctUntilChanged 是 Kotlin Flow 中非常实用的操作符,合理使用可以显著提高应用性能和用户体验。