1、es常用的聚合查询有三种
桶聚合
指标聚合
管道聚合
首先我们创建一个product的索引,并插入数据
bash
PUT /product
{
"mappings": {
"properties": {
"category": { "type": "keyword" },
"price": { "type": "float" },
"timestamp": { "type": "date" }
}
}
}
POST /product/_doc/1
{
"category": "iphone",
"price": 1200,
"timestamp": "2024-04-01"
}
POST /product/_doc/2
{
"category": "Electronics",
"price": 800,
"timestamp": "2024-04-10"
}
POST /product/_doc/3
{
"category": "Clothing",
"price": 50,
"timestamp": "2024-04-10"
}
POST /product/_doc/4
{
"category": "Clothing",
"price": 30,
"timestamp": "2024-04-15"
}
POST /product/_doc/5
{
"category": "Electronics",
"price": 1500,
"timestamp": "2024-05-21"
}
2、桶聚合:常用的桶聚合如下
Terms聚合 - 类似SQL的group by,根据字段唯一值分组;
Histogram聚合 - 根据数值间隔分组,例如: 价格按100间隔分组,0、100、200、300等等;
Date histogram聚合 - 根据时间间隔分组,例如:按月、按天、按小时分组;
Range聚合 - 按数值范围分组,例如: 0-150一组,150-200一组,200-500一组;
比如:我想根据category字段唯一值来分组
bash
GET /product/_search?size=0
{
"aggs": {
"shop": { //聚合查询的名字,随便取个名字
"terms": { //聚合类型为: terms
"field": "category" //要聚合分组的字段
}
}
}
}
以上好比sql为
bash
select category, count(*) from product group by category
结果为:
bash
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"shop" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Clothing", //key是category的各种情况
"doc_count" : 2 //是每种category的次数
},
{
"key" : "Electronics",
"doc_count" : 2
},
{
"key" : "iphone",
"doc_count" : 1
}
]
}
}
}
以上这种写法经常用到下拉框列表的聚合分组查询。
2、按照产品类别进行分组,并计算每个类别下的平均价格
bash
GET /product/_search
{
"size": 0,
"aggs": {
"category_buckets": {
"terms": {
"field": "category"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
结果如下:
bash
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"category_buckets" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Clothing",
"doc_count" : 2,
"avg_price" : {
"value" : 40.0
}
},
{
"key" : "Electronics",
"doc_count" : 2,
"avg_price" : {
"value" : 1150.0
}
},
{
"key" : "iphone",
"doc_count" : 1,
"avg_price" : {
"value" : 1200.0
}
}
]
}
}
}
3、指标聚合:指标聚合对文档中的数值字段执行统计操作,如求和、平均值、最大值、最小值等
比如:计算所有产品的平均价格。
bash
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"avg_price" : {
"value" : 716.0
}
}
}
比如:计算所有商品的最大价格
bash
GET /product/_search
{
"size": 0,
"aggs": {
"avg_price": {
"max": {
"field": "price"
}
}
}
}
4、写一个复杂的聚合查询,并配合query查询
比如我想筛出 category = Electronics 和Clothing 的商品,然后在这基础上对category分组,求分组后category的平均值及合计两个字段
bash
GET /product/_search
{
"size": 0, //size=0代表不需要返回query查询结果,仅仅返回aggs统计结果
"query": { //query查询category=Electronics 和Clothing的数据
"terms": {
"category": [
"Electronics",
"Clothing"
]
}
},
"aggs": { //开始对category字段聚合分组
"product_category": { //聚合名称
"terms": {
"field": "category"
},
"aggs": { //聚合名称
"avg_price": {
"avg": { // 指标聚合类型为avg
"field": "price"
}
},
"sum_price":{ //聚合名称
"sum": { //指标聚合类型为sum
"field": "price"
}
}
}
}
}
}
结果如下:
bash
{
"took" : 27,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"product_category" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Clothing",
"doc_count" : 2,
"avg_price" : {
"value" : 40.0
},
"sum_price" : {
"value" : 80.0
}
},
{
"key" : "Electronics",
"doc_count" : 2,
"avg_price" : {
"value" : 1150.0
},
"sum_price" : {
"value" : 2300.0
}
}
]
}
}
}
后续更新管道聚合