ES使用聚合aggregations实战(2025.04.02更新)

**前提:**本文环境-

elasticsearch-7.16.3

kibana-7.16.3

elasticsearch-analysis-ik-7.16.3

logstash-7.16.3

pom.xml文件引入

xml 复制代码
<!-- Java Low Level REST Client -->
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-client</artifactId>
    <version>7.16.3</version>
</dependency>

<!-- Java High Level REST Client -->
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>elasticsearch-rest-high-level-client</artifactId>
    <version>7.16.3</version>
</dependency>

同步表数据stu_sign学生签到记录表结构

复制代码
CREATE TABLE `stu_sign` (
 `id` int(11) NOT NULL AUTO_INCREMENT,
 `create_date` datetime DEFAULT NULL,
 `modify_date` datetime DEFAULT NULL,
 `class_num_begin` varchar(32) DEFAULT NULL,
 `class_num_end` varchar(32) DEFAULT NULL,
 `classroom` varchar(128) DEFAULT NULL,
 `classroom_id` int(11) DEFAULT NULL,
 `course_id` int(11) DEFAULT NULL,
 `course_name` varchar(256) DEFAULT NULL,
 `course_sched_id` int(11) DEFAULT NULL,
 `roll_call_date` datetime DEFAULT NULL,
 `semester_id` int(11) DEFAULT NULL,
 `sign_status` int(11) DEFAULT NULL,
 `sing_date` datetime DEFAULT NULL,
 `stu_id` int(11) DEFAULT NULL,
 `stu_name` varchar(256) DEFAULT NULL,
 `teach_time` datetime DEFAULT NULL,
 `teach_time_str` varchar(32) DEFAULT NULL,
 `teacher_id` int(11) DEFAULT NULL,
 `teacher_name` varchar(256) DEFAULT NULL,
 `roll_call_status` int(11) DEFAULT NULL,
 `stu_num` varchar(64) DEFAULT NULL,
 `stu_sign_status` int(11) DEFAULT NULL,
 `academy_id` int(11) DEFAULT NULL,
 `academy_superior_id` int(11) DEFAULT NULL,
 `class_id` int(11) DEFAULT NULL,
 `grade_id` varchar(50) DEFAULT NULL,
 `class_begin_time` varchar(100) DEFAULT '',
 `class_end_time` varchar(100) DEFAULT '',
 `sign_type` int(11) DEFAULT NULL,
 `appeal_msg` varchar(255) DEFAULT NULL,
 `appeal_status` int(11) DEFAULT NULL,
 `appeal_time` varchar(255) DEFAULT NULL,
 `late_status` int(11) DEFAULT NULL,
 `leave_status` int(11) DEFAULT NULL,
 `absence_status` int(11) DEFAULT NULL COMMENT '1、事假    2、病假   3、公假',
 `modify_userId` varchar(255) DEFAULT NULL COMMENT '修改人id',
 `modify_userName` varchar(256) DEFAULT NULL COMMENT '修改人姓名',
 `modify_time` varchar(100) DEFAULT NULL COMMENT '修改时间',
 PRIMARY KEY (`id`),
 KEY `schedIdIndex` (`course_sched_id`) USING BTREE,
 KEY `index_course_id` (`course_id`) USING BTREE,
 KEY `index_course_sched_id` (`course_sched_id`) USING BTREE,
 KEY `index_semester_id` (`semester_id`) USING BTREE,
 KEY `index_sign_status` (`sign_status`) USING BTREE,
 KEY `index_stu_id` (`stu_id`) USING BTREE,
 KEY `index_teacher_id` (`teacher_id`) USING BTREE,
 KEY `index_stu_num` (`stu_num`) USING BTREE,
 KEY `index_class_num_begin` (`class_num_begin`) USING BTREE,
 KEY `index_class_num_end` (`class_num_end`) USING BTREE,
 KEY `class_begin_time_index` (`class_begin_time`),
 KEY `class_end_time_index` (`class_end_time`),
 KEY `class_id_index` (`class_id`),
 KEY `teach_time_index` (`teach_time`),
 KEY `class_begin_end_time_index` (`class_begin_time`,`class_end_time`,`teach_time`),
 KEY `course_sched_id_stu_id_index` (`course_sched_id`,`stu_id`),
 KEY `idx_stu_sign_class_begin_time_course_id_stu_id` (`class_begin_time`,`course_id`,`stu_id`)
) ENGINE=InnoDB AUTO_INCREMENT=7923 DEFAULT CHARSET=utf8;

在es中的kibana执行创建索引对应mapping,由于很多查询基于class_begin_time字段去做,但是在表结构中该字段存储为varchar,特殊的设置其mapping的type为date和format时间格式

稍微解释一下下面的配置文件:根据追踪日志的文件last_run_stu_login.txt中记录的时间,并根据字段modify_date去增量同步stu_sign表中的数据,并设置每五分钟同步一次

复制代码
input {
   jdbc {
     # 配置数据库信息
     jdbc_connection_string => "jdbc:mysql://188.18.66.185:3306/eschool?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai"
     jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
     jdbc_user => "root"
     jdbc_password => "JYD.2015.internet"
     jdbc_paging_enabled => "true"
     jdbc_page_size => "50000"
     jdbc_default_timezone => "UTC"
 	  # mysql驱动所在位置
     jdbc_driver_library => "D:\environment\apache-maven-3.8.8\maven_repository\mysql\mysql-connector-java\8.0.27\mysql-connector-java-8.0.27.jar"
     #sql执行语句
     statement => "SELECT * FROM `stu_sign` WHERE modify_date > :sql_last_value AND modify_date < NOW() "
     use_column_value => true
     tracking_column => "modify_date"
     tracking_column_type => "timestamp"
     last_run_metadata_path => "E:\software\logstash\logstash-7.16.3\conf\metadata\last_run_stu_login.txt"
     schedule => "*/5 * * * *"
     lowercase_column_names => false
   }
}
output {
   elasticsearch {
       hosts => ["127.0.0.1:9200"]
       index => "stu_sign"
       document_id => "%{id}"
   }
   stdout {
       codec => json_lines
   }
}

在logstash文件下bin目录下启动命令

复制代码
logstash.bat -f E:\software\logstash\logstash-7.16.3\conf\stu_sign.conf

需求一:根据stu_id分组统计时间段内的各个签到率出勤率迟到率等

完成分页操作和统计结果按照缺勤率降序排序

对应mapper.xml方法中的sql代码为

xml 复制代码
<select id="getStuAttendanceListJson" resultType="java.util.Map">
   SELECT
   ifnull(a.NAME,'') AS college_name,
   s.stu_id,
   s.stu_num,
   s.stu_name,
   count( s.id ) AS total_num,(
   ifnull( sum( sign_status ), 0 )- ifnull( sum( late_status ), 0 )) AS yes_num,
   ifnull( sum( late_status ), 0 ) AS late_num,(
   count( s.id )- ifnull( sum( sign_status ), 0 )) AS no_num,
   FORMAT((( ifnull( sum( sign_status ), 0 )- ifnull( sum( late_status ), 0 ))/ count( s.id )* 100 ), 1 ) AS
   yes_lv,
   FORMAT(( ifnull( sum( late_status ), 0 )/ count( s.id )* 100 ), 1 ) AS late_lv,
   FORMAT((( count( s.id )- ifnull( sum( sign_status ), 0 ))/ count( s.id )* 100 ), 1 ) AS no_lv
   FROM
   eschool.stu_sign s
   LEFT JOIN eschool.USER u ON s.stu_id = u.id
   LEFT JOIN eschool.academy a ON a.id = u.academy_id
   WHERE
   s.class_begin_time &lt; now()
   <if test="t.collegeId!=null and t.collegeId!=''">
       and u.academy_id = #{t.collegeId}
   </if>
   <if test="t.beginDate!=null and t.beginDate!=''">
       and s.class_begin_time &gt; CONCAT(#{t.beginDate},' 00:00:01')
   </if>
   <if test="t.endDate!=null and t.endDate!=''">
       and s.class_begin_time &lt; CONCAT(#{t.endDate},' 23:59:59')
   </if>
   <if test="t.keyWord!=null and t.keyWord!=''">
       and (s.stu_num like concat('%',#{t.keyWord},'%') or s.stu_name like concat('%',#{t.keyWord},'%'))
   </if>
   GROUP BY s.stu_id ORDER BY (( ifnull(sum(sign_status), 0) - ifnull(sum(late_status), 0)) / count(s.id)) DESC
</select>

java实现代码如下:

java 复制代码
public BaseResponse getStuAttendanceListJson(UserSignRequest requestParam) {
      Map<String, Object> result = new HashMap<>();
      try {
          SearchRequest searchRequest = new SearchRequest("stu_sign");
          SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
          BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();

          RangeQueryBuilder timeRangeQuery = QueryBuilders.rangeQuery("class_begin_time").lt("now");
          if (requestParam.getBeginDate() != null && requestParam.getEndDate() != null) {

              timeRangeQuery.gte(requestParam.getBeginDate()+" 00:00:00")
                      .lte(requestParam.getEndDate()+" 23:59:59");
          }
          boolQueryBuilder.must(timeRangeQuery);

          if (requestParam.getCollegeId() != null) {
              boolQueryBuilder.must(QueryBuilders.termQuery("academy_id", requestParam.getCollegeId()));
          }
          // 关键词查询
          if (StringUtils.isNotBlank(requestParam.getKeyWord())) {
              BoolQueryBuilder keywordQuery = QueryBuilders.boolQuery()
                      .should(QueryBuilders.wildcardQuery("stu_name.keyword", "*"+requestParam.getKeyWord()+"*"))
                      .should(QueryBuilders.wildcardQuery("stu_num.keyword", "*"+requestParam.getKeyWord()+"*"))
                      .minimumShouldMatch(1);
              boolQueryBuilder.must(keywordQuery);
          }
          searchSourceBuilder.query(boolQueryBuilder);

          TopHitsAggregationBuilder topHits = AggregationBuilders.topHits("student_info")
                  .size(1)
                  .fetchField("stu_num")
                  .fetchField("stu_name")
                  .fetchField("academy_id");
          // 构建聚合
          TermsAggregationBuilder aggregation = AggregationBuilders.terms("group_by_stu_id")
                  .field("stu_id")
                  .size(10000)  // 根据实际情况调整
                  .subAggregation(AggregationBuilders.sum("sum_sign").field("sign_status"))
                  .subAggregation(AggregationBuilders.sum("sum_late").field("late_status"))
                  .subAggregation(topHits);

          searchSourceBuilder.aggregation(aggregation);
          searchSourceBuilder.size(0);
          // 执行查询
          searchRequest.source(searchSourceBuilder);
          SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

          // 处理聚合结果
          Map<String, Object> academyNameMap = getAcademyNameMap();
          List<Map<String, Object>> attendanceList = new ArrayList<>();
          Terms terms = response.getAggregations().get("group_by_stu_id");
          for (Terms.Bucket bucket : terms.getBuckets()) {
              Map<String, Object> item = new HashMap<>();
              item.put("stu_id", bucket.getKeyAsString());
              SearchHit student_info = ((ParsedTopHits) bucket.getAggregations().get("student_info")).getHits().getHits()[0];
              item.put("stu_num", student_info.getDocumentFields().get("stu_num").getValue());
              item.put("stu_name", student_info.getDocumentFields().get("stu_name").getValue());
              if(student_info.getDocumentFields().get("academy_id") != null){
                  String academyId = student_info.getDocumentFields().get("academy_id").getValue().toString();
                  item.put("college_id", academyId);
                  item.put("college_name",academyNameMap.getOrDefault(academyId,""));
              }

              item.put("total_num", bucket.getDocCount());
              Sum sumSign = bucket.getAggregations().get("sum_sign");
              Sum sumLate = bucket.getAggregations().get("sum_late");

              double sign = sumSign.getValue();
              double late = sumLate.getValue();
              item.put("yes_num", (int) (sign - late));
              item.put("late_num", (int) late);
              item.put("no_num", (int) (bucket.getDocCount() - sign));

              BigDecimal yesLv = new BigDecimal((sign - late) / bucket.getDocCount() * 100);
              BigDecimal lateLv = new BigDecimal(late / bucket.getDocCount() * 100);
              BigDecimal noLv = new BigDecimal((bucket.getDocCount() - sign) / bucket.getDocCount() * 100);
              DecimalFormat df = new DecimalFormat("#.0");
              item.put("yes_lv", yesLv.compareTo(BigDecimal.ZERO) == 0 ? "0.0" : df.format(yesLv));
              item.put("late_lv", lateLv.compareTo(BigDecimal.ZERO) == 0 ? "0.0" : df.format(lateLv));
              item.put("no_lv", noLv.compareTo(BigDecimal.ZERO) == 0 ? "0.0" : df.format(noLv));

              attendanceList.add(item);
          }

          // 按正常签到率排序,如果正常签到率一样,按照stu_id升序
          attendanceList = attendanceList.stream()
                  .sorted((o1, o2) -> {
                      double yesLv1 = Double.parseDouble(o1.get("yes_lv").toString());
                      double yesLv2 = Double.parseDouble(o2.get("yes_lv").toString());

                      if (yesLv1 != yesLv2) {
                          return Double.compare(yesLv2, yesLv1);
                      } else {
                          return ((String) o1.get("stu_id")).compareTo((String) o2.get("stu_id"));
                      }
                  })
                  .collect(Collectors.toList());


          result.put("total_count", attendanceList.size());
          int pages = (int) Math.ceil((double) attendanceList.size() / requestParam.getPageSize());
          result.put("total_pages",pages);
          result.put("size",requestParam.getPageSize());
          //截取list
          attendanceList = attendanceList.subList((requestParam.getPage() - 1) * requestParam.getPageSize(), Math.min(requestParam.getPage() * requestParam.getPageSize(), attendanceList.size()));
          result.put("list", attendanceList);
          return BaseResponseBuilder.success(result);
      } catch (Exception e) {
          return BaseResponseBuilder.failure("查询失败",e.getMessage());
      }
  }
相关推荐
TDengine (老段)3 小时前
TDengine 中的关联查询
大数据·javascript·网络·物联网·时序数据库·tdengine·iotdb
这个懒人5 小时前
深入解析Translog机制:Elasticsearch的数据守护者
数据库·elasticsearch·nosql·translog
直裾7 小时前
Mapreduce的使用
大数据·数据库·mapreduce
LCY1339 小时前
spring 中的DAO是什么
运维·git·jenkins
wangjun51599 小时前
jenkins 参数化发布到服务器 publish over ssh、label、Parameterized publishing
服务器·ssh·jenkins
愿你天黑有灯下雨有伞9 小时前
Docker 安装 Elasticsearch 教程
运维·elasticsearch·docker
遇见火星9 小时前
自动化发布工具CI/CD实践Jenkins常用工具和插件的使用
运维·ci/cd·自动化·jenkins·自动化发布
麻芝汤圆9 小时前
使用 MapReduce 进行高效数据清洗:从理论到实践
大数据·linux·服务器·网络·数据库·windows·mapreduce
树莓集团10 小时前
树莓集团海南落子:自贸港布局的底层逻辑
大数据
不剪发的Tony老师10 小时前
Hue:一个大数据查询工具
大数据