准备数据
java
// 原始列表
List<Person> persons = Arrays.asList(new Person("li", 34, "135555"), new Person("jiang", 33, "1366666"), new Person("yang", 22, "135555"), new Person("huang", 32, "1366666"), new Person("han", 31, "1376666"));
java
@Data
public static class Person {
private String name;
private int age;
private String phoneNumber;
public Person(String name, int age, String phoneNumber) {
this.name = name;
this.age = age;
this.phoneNumber = phoneNumber;
}
}
方式一、使用Collectors.groupingBy按属性分组后筛选计数大于1的条目
根据单个属性提取重复对象
java
List<Person> list =
persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber,Collectors.toList()))
.values()
.stream()
.filter(group -> group.size() > 1)
.flatMap(List::stream)
.collect(Collectors.toList());
System.out.println(JSON.toJSONString(list));
根据多个属性提取重复对象
通过自定义组合键实现多属性去重:
java
List<Person> duplicates = persons.stream()
.collect(Collectors.groupingBy(
p -> Arrays.asList(p.getName(), p.getAge()),
Collectors.toList()
))
.values().stream()
.filter(group -> group.size() > 1)
.flatMap(List::stream)
.collect(Collectors.toList());
转为map
这会返回属性值(如phoneNumber)与对应对象列表的Map,其中value.size()>1表示该属性值重复
java
Map<String, List<Person>> map = persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber))
.entrySet()
.stream()
.filter(entry -> entry.getValue().size() > 1)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
System.out.println(JSON.toJSONString(map));
提取重复属性值:转为字符串"1,2,3,4"的形式
java
String duplicates =
persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber, Collectors.toList()))
.entrySet()
.stream()
.filter(en -> en.getValue().size() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.joining(","));
System.out.println(duplicates);
方式二、使用toMap计数过滤。
结合Collectors.toMap统计出现次数:
直接返回重复的属性值列表
java
List<String> duplicateValues = persons.stream()
.collect(Collectors.toMap(
Person::getPhoneNumber,
e -> 1,
Integer::sum
))
.entrySet()
.stream()
.filter(entry -> entry.getValue() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
System.out.println(duplicateValues);
提取重复属性值:转为字符串"1,2,3,4"的形式
java
String dupNumbers = persons.stream()
.collect(Collectors.toMap(
Person::getPhoneNumber,
e -> 1,
Integer::sum
))
.entrySet()
.stream()
.filter(entry -> entry.getValue() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.joining(","));
方式三、根据特定属性去重。
若需基于属性去重而非提取重复值,可用:
通过TreeSet按phoneNumber去重
java
List<Person> distinct = persons.stream()
.collect(Collectors.collectingAndThen(
Collectors.toCollection(
() -> new TreeSet<>(Comparator.comparing(Person::getPhoneNumber))
),
ArrayList::new
));
System.out.println(distinct);
完整代码示例:
java
public static void main(String[] args) {
// 原始列表
List<Person> persons = Arrays.asList(new Person("li", 34, "135555"), new Person("jiang", 33, "1366666"), new Person("yang", 22, "135555"), new Person("huang", 32, "1366666"), new Person("han", 31, "1376666"));
//方式一、使用Collectors.groupingBy按属性分组后筛选计数大于1的条目
List<Person> list =
persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber,Collectors.toList()))
.values()
.stream()
.filter(group -> group.size() > 1)
.flatMap(List::stream)
.collect(Collectors.toList());
System.out.println(JSON.toJSONString(list));
//转为map,这会返回属性值(如phoneNumber)与对应对象列表的Map,其中value.size()>1表示该属性值重复
Map<String, List<Person>> map = persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber))
.entrySet()
.stream()
.filter(entry -> entry.getValue().size() > 1)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
System.out.println(JSON.toJSONString(map));
//提取重复属性值
String duplicates =
persons.stream()
.collect(Collectors.groupingBy(Person::getPhoneNumber, Collectors.toList()))
.entrySet()
.stream()
.filter(en -> en.getValue().size() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.joining(","));
System.out.println(duplicates);
//方式二、使用toMap计数过滤。结合Collectors.toMap统计出现次数:
//直接返回重复的属性值列表
List<String> duplicateValues = persons.stream()
.collect(Collectors.toMap(
Person::getPhoneNumber,
e -> 1,
Integer::sum
))
.entrySet()
.stream()
.filter(entry -> entry.getValue() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
System.out.println(duplicateValues);
//转为字符串形式:"1,2,3,4"的形式
String dupNumbers = persons.stream()
.collect(Collectors.toMap(
Person::getPhoneNumber,
e -> 1,
Integer::sum
))
.entrySet()
.stream()
.filter(entry -> entry.getValue() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.joining(","));
//根据特定属性去重。若需基于属性去重而非提取重复值,可用:
//通过TreeSet按phoneNumber去重
List<Person> distinct = persons.stream()
.collect(Collectors.collectingAndThen(
Collectors.toCollection(
() -> new TreeSet<>(Comparator.comparing(Person::getPhoneNumber))
),
ArrayList::new
));
System.out.println(distinct);
@Data
public static class Person {
private String name;
private int age;
private String phoneNumber;
public Person(String name, int age, String phoneNumber) {
this.name = name;
this.age = age;
this.phoneNumber = phoneNumber;
}
}
}
注意事项
- 对象需正确实现
equals()
和hashCode()
方法,否则分组可能失效5; - 并行流处理时需注意线程安全问题9;
- 复杂对象建议使用
Comparator.comparing()
指定比较逻辑。