es 聚合操作(二)

书接上文,示例数据在上一篇,这里就不展示了

一、Pipeline Aggregation

支持对聚合分析的结果,再次进行聚合分析。

Pipeline 的分析结果会输出到原结果中,根据位置的不同,分为两类:

  • Sibling - 结果和现有分析结果同级
    • Max,min,Avg & Sum Bucket
    • Stats,Extended Status Bucket
    • Percentiles Bucket
  • Parent -结果内嵌到现有的聚合分析结果之中
    • Derivative(求导)
    • Cumultive Sum(累计求和)
    • Moving Function(移动平均值 )

min_bucket

在员工数最多的工种里,找出平均工资最低的工种

#在员工数最多的工种里,找出平均工资最低的工种
POST /employees/_search
{
  "size": 0, 
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "min_salary_by_job": {
      "min_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Java Programmer",
          "doc_count" : 7,
          "avg_salary" : {
            "value" : 25571.428571428572
          }
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4,
          "avg_salary" : {
            "value" : 19250.0
          }
        },
        {
          "key" : "QA",
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 21000.0
          }
        },
        {
          "key" : "DBA",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          }
        },
        {
          "key" : "Dev Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 50000.0
          }
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 35000.0
          }
        }
      ]
    },
    "min_salary_by_job" : {
      "value" : 19250.0,
      "keys" : [
        "Javascript Programmer"
      ]
    }
  }
}

可以看到上面所有工种 里面 最低薪资如下:

  • min_salary_by_job 结果和 jobs 的聚合同级
  • min_bucket 求之前结果的最小值
  • 通过 bucket_path 关键字指定路径

stats_bucket

所有工种里面 薪资的统计数据

#所有工种里面 薪资的统计数据
POST /employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "stats_salary_by_job": {
      "stats_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Java Programmer",
          "doc_count" : 7,
          "avg_salary" : {
            "value" : 25571.428571428572
          }
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4,
          "avg_salary" : {
            "value" : 19250.0
          }
        },
        {
          "key" : "QA",
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 21000.0
          }
        },
        {
          "key" : "DBA",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          }
        },
        {
          "key" : "Dev Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 50000.0
          }
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 35000.0
          }
        }
      ]
    },
    "stats_salary_by_job" : {
      "count" : 7,
      "min" : 19250.0,
      "max" : 50000.0,
      "avg" : 27974.48979591837,
      "sum" : 195821.42857142858
    }
  }
}

可以看到薪资的统计值:

percentiles

平均工资的百分位数

#平均工资的百分位数
POST /employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "percentiles_salary_by_job": {
      "percentiles_bucket": {
        "buckets_path": "jobs>avg_salary"
      }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Java Programmer",
          "doc_count" : 7,
          "avg_salary" : {
            "value" : 25571.428571428572
          }
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4,
          "avg_salary" : {
            "value" : 19250.0
          }
        },
        {
          "key" : "QA",
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 21000.0
          }
        },
        {
          "key" : "DBA",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          }
        },
        {
          "key" : "Dev Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 50000.0
          }
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 35000.0
          }
        }
      ]
    },
    "percentiles_salary_by_job" : {
      "values" : {
        "1.0" : 19250.0,
        "5.0" : 19250.0,
        "25.0" : 21000.0,
        "50.0" : 25000.0,
        "75.0" : 35000.0,
        "95.0" : 50000.0,
        "99.0" : 50000.0
      }
    }
  }
}

Cumulative_sum

累计求和

# 累计求和
POST /employees/_search
{
  "size": 0, 
  "aggs": {
    "age": {
      "histogram": {
        "field": "age",
        "min_doc_count": 0, 
        "interval": 1
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        },
        "cumulative_salary": {
          "cumulative_sum": {
            "buckets_path": "avg_salary"
          }
        }
      }
    }
  }
}

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "age" : {
      "buckets" : [
        {
          "key" : 20.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 9000.0
          },
          "cumulative_salary" : {
            "value" : 9000.0
          }
        },
        {
          "key" : 21.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 16000.0
          },
          "cumulative_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : 22.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : 23.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : 24.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : 25.0,
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 17333.333333333332
          },
          "cumulative_salary" : {
            "value" : 42333.33333333333
          }
        },
        {
          "key" : 26.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 22000.0
          },
          "cumulative_salary" : {
            "value" : 64333.33333333333
          }
        },
        {
          "key" : 27.0,
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          },
          "cumulative_salary" : {
            "value" : 84333.33333333333
          }
        },
        {
          "key" : 28.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 84333.33333333333
          }
        },
        {
          "key" : 29.0,
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          },
          "cumulative_salary" : {
            "value" : 104333.33333333333
          }
        },
        {
          "key" : 30.0,
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 30000.0
          },
          "cumulative_salary" : {
            "value" : 134333.3333333333
          }
        },
        {
          "key" : 31.0,
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 28500.0
          },
          "cumulative_salary" : {
            "value" : 162833.3333333333
          }
        },
        {
          "key" : 32.0,
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 27333.333333333332
          },
          "cumulative_salary" : {
            "value" : 190166.66666666666
          }
        },
        {
          "key" : 33.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 28000.0
          },
          "cumulative_salary" : {
            "value" : 218166.66666666666
          }
        },
        {
          "key" : 34.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 218166.66666666666
          }
        },
        {
          "key" : 35.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 218166.66666666666
          }
        },
        {
          "key" : 36.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 38000.0
          },
          "cumulative_salary" : {
            "value" : 256166.66666666666
          }
        },
        {
          "key" : 37.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 256166.66666666666
          }
        },
        {
          "key" : 38.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 256166.66666666666
          }
        },
        {
          "key" : 39.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 256166.66666666666
          }
        },
        {
          "key" : 40.0,
          "doc_count" : 0,
          "avg_salary" : {
            "value" : null
          },
          "cumulative_salary" : {
            "value" : 256166.66666666666
          }
        },
        {
          "key" : 41.0,
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 50000.0
          },
          "cumulative_salary" : {
            "value" : 306166.6666666666
          }
        }
      ]
    }
  }
}

二、聚合的作用范围

默认情况下,es 会对 query 中所有范围进行聚合操作

也可以指定范围:

Filter

找到所有工种,并计算年龄大于35的平均薪资

#找到所有工种,并计算年龄大于35的平均薪资
POST /employees/_search
{
  "size": 0, 
  "aggs": {
    "older_person": {
      "filter": {
        "range": {
          "age": {
            "gte": 35
          }
        }
      },
      "aggs": {
        "avg_salary": {
          "avg": {
            "field": "salary"
          }
        }
      }
    },
    "all_jobs": {
      "terms": {
        "field": "job.keyword"
      }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "older_person" : {
      "doc_count" : 2,
      "avg_salary" : {
        "value" : 44000.0
      }
    },
    "all_jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Java Programmer",
          "doc_count" : 7
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4
        },
        {
          "key" : "QA",
          "doc_count" : 3
        },
        {
          "key" : "DBA",
          "doc_count" : 2
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2
        },
        {
          "key" : "Dev Manager",
          "doc_count" : 1
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1
        }
      ]
    }
  }
}

Post field

找到所有工种,并匹配符合条件的工种

POST /employees/_search
{
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "size": 10
      }
    }
  },
  "post_filter": {
    "match": {
      "job.keyword": "Dev Manager"
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "employees",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "name" : "Underwood",
          "age" : 41,
          "job" : "Dev Manager",
          "gender" : "male",
          "salary" : 50000
        }
      }
    ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Java Programmer",
          "doc_count" : 7
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4
        },
        {
          "key" : "QA",
          "doc_count" : 3
        },
        {
          "key" : "DBA",
          "doc_count" : 2
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2
        },
        {
          "key" : "Dev Manager",
          "doc_count" : 1
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1
        }
      ]
    }
  }
}

global

默认基于 query 条件聚合,可以使用 global 进行全部数据聚合

POST employees/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {
        "gte": 40
      }
    }
  },
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword"
      }
    },
    "all":{
      "global":{},
      "aggs":{
        "salary_avg":{
          "avg":{
            "field":"salary"
          }
        }
      }
    }
  }
}

可以看到 大于 40 的工种只有一个,但是平均薪资是计算的全部员工

三、排序

指定order,按照count和key进行排序:

  • 默认情况,按照count降序排序
  • 指定size,就能返回相应的桶

找到所有工种,先以 Count 升序,如果 count 一样,然后再根据 key 降序

POST /employees/_search
{
  "size": 0, 
  "query": {
    "range": {
      "age": {
        "gte": 20
      }
    }
  },
  "aggs": {
    "jobs": {
      "terms": {
        "field": "job.keyword",
        "order": [
          {"_count": "asc"},
          {"_key": "desc"}
        ]
      }
    }
  }
}

根据平均薪资降序,并统计平均薪资

POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword",
        "order":[  {
            "avg_salary":"desc"
          }]
        
        
      },
    "aggs": {
      "avg_salary": {
        "avg": {
          "field":"salary"
        }
      }
    }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Dev Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 50000.0
          }
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1,
          "avg_salary" : {
            "value" : 35000.0
          }
        },
        {
          "key" : "Java Programmer",
          "doc_count" : 7,
          "avg_salary" : {
            "value" : 25571.428571428572
          }
        },
        {
          "key" : "DBA",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 25000.0
          }
        },
        {
          "key" : "QA",
          "doc_count" : 3,
          "avg_salary" : {
            "value" : 21000.0
          }
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2,
          "avg_salary" : {
            "value" : 20000.0
          }
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4,
          "avg_salary" : {
            "value" : 19250.0
          }
        }
      ]
    }
  }
}

对工种分类,并根据最低薪资降序,并统计薪资

POST employees/_search
{
  "size": 0,
  "aggs": {
    "jobs": {
      "terms": {
        "field":"job.keyword",
        "order":[  {
            "stats_salary.min":"desc"
          }]
        
        
      },
    "aggs": {
      "stats_salary": {
        "stats": {
          "field":"salary"
        }
      }
    }
    }
  }
}

结果:

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 20,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "jobs" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Dev Manager",
          "doc_count" : 1,
          "stats_salary" : {
            "count" : 1,
            "min" : 50000.0,
            "max" : 50000.0,
            "avg" : 50000.0,
            "sum" : 50000.0
          }
        },
        {
          "key" : "Product Manager",
          "doc_count" : 1,
          "stats_salary" : {
            "count" : 1,
            "min" : 35000.0,
            "max" : 35000.0,
            "avg" : 35000.0,
            "sum" : 35000.0
          }
        },
        {
          "key" : "DBA",
          "doc_count" : 2,
          "stats_salary" : {
            "count" : 2,
            "min" : 20000.0,
            "max" : 30000.0,
            "avg" : 25000.0,
            "sum" : 50000.0
          }
        },
        {
          "key" : "QA",
          "doc_count" : 3,
          "stats_salary" : {
            "count" : 3,
            "min" : 18000.0,
            "max" : 25000.0,
            "avg" : 21000.0,
            "sum" : 63000.0
          }
        },
        {
          "key" : "Web Designer",
          "doc_count" : 2,
          "stats_salary" : {
            "count" : 2,
            "min" : 18000.0,
            "max" : 22000.0,
            "avg" : 20000.0,
            "sum" : 40000.0
          }
        },
        {
          "key" : "Javascript Programmer",
          "doc_count" : 4,
          "stats_salary" : {
            "count" : 4,
            "min" : 16000.0,
            "max" : 25000.0,
            "avg" : 19250.0,
            "sum" : 77000.0
          }
        },
        {
          "key" : "Java Programmer",
          "doc_count" : 7,
          "stats_salary" : {
            "count" : 7,
            "min" : 9000.0,
            "max" : 38000.0,
            "avg" : 25571.428571428572,
            "sum" : 179000.0
          }
        }
      ]
    }
  }
}

感谢观看!!!感兴趣的小伙伴可以关注收藏,持续更新中~~~

相关推荐
zhixingheyi_tian4 小时前
Spark 之 Aggregate
大数据·分布式·spark
PersistJiao4 小时前
Spark 分布式计算中网络传输和序列化的关系(一)
大数据·网络·spark
宅小海7 小时前
scala String
大数据·开发语言·scala
小白的白是白痴的白7 小时前
11.17 Scala练习:梦想清单管理
大数据
java1234_小锋7 小时前
Elasticsearch是如何实现Master选举的?
大数据·elasticsearch·搜索引擎
AiFlutter9 小时前
Java实现简单的搜索引擎
java·搜索引擎·mybatis
Java 第一深情11 小时前
零基础入门Flink,掌握基本使用方法
大数据·flink·实时计算
MXsoft61811 小时前
华为服务器(iBMC)硬件监控指标解读
大数据·运维·数据库
PersistJiao12 小时前
Spark 分布式计算中网络传输和序列化的关系(二)
大数据·网络·spark·序列化·分布式计算
九河云12 小时前
如何对AWS进行节省
大数据·云计算·aws