ES——(三)DSL高级查询

5.1. DSL 概述

Query DSL概述: Domain Specific Language(领域专用语言),Elasticsearch提供了基于JSON的DSL来定义查询。

  • prefix 用得少,但是项目中用一下
  • must 算的分,filter 不算得分,性能好一点
  • agg 项目中也会用

建立索引库和文档

复制代码
PUT /my_index/_doc/1
{"id":1,"title":"华为笔记本电脑","category":"华为","images":"http://www.gulixueyuan.com/xm.jpg","price":5388}

PUT /my_index/_doc/2
{"id":2,"title":"华为手机","category":"华为","images":"http://www.gulixueyuan.com/xm.jpg","price":5500}

PUT /my_index/_doc/3
{"id":3,"title":"VIVO手机","category":"vivo","images":"http://www.gulixueyuan.com/xm.jpg","price":3600}
5.2. DSL 查询
5.2.1. 查询所有文档

match_all:

== get /my_index/_search

复制代码
post /my_index/_search
{
  "query": {
    "match_all": {}
  }
}


{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  }
}
5.2.2. 匹配查询

match:

复制代码
post /my_index/_search
{
  "query": {
    "match": {
      "title": "华为智能手机"
    }
  }
}


{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1.1239216,
    "hits": [
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1.1239216,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 0.5619608,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      },
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 0.35411233,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      }
    ]
  }
}
5.2.3. 多字段匹配

multi_match:

复制代码
post /my_index/_search
{
  "query": {
    "multi_match": {
      "query": "华为智能手机",
      "fields": ["title", "category"]
    }
  }
}

{
  "took": 26,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1.1239216,
    "hits": [
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1.1239216,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 0.5619608,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      },
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 0.35411233,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      }
    ]
  }
}
5.2.4. 关键字精确查询

term:关键字不会进行分词。

因为前面用来进行mapping测试了,所以导致可能category -> text,删除索引库,重新建立再查询

复制代码
GET /my_index/_search
{
  "query": {
    "term": {
      "category": {
        "value": "华为"
      }
    }
  }
}


{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  }
}
5.2.5. 多关键字精确查询

terms

复制代码
GET /my_index/_search
{
  "query": {
    "terms": {
      "category": ["华为", "vivo"]
    }
  }
}

{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  }
}
5.2.6. 范围查询

范围查询使用range。

  • gte: 大于等于

  • lte: 小于等于

  • gt: 大于

  • lt: 小于

    GET /my_index/_search
    {
    "query": {
    "range": {
    "price": {
    "gte": 3000,
    "lte": 5000
    }
    }
    }
    }

    {
    "took": 2,
    "timed_out": false,
    "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
    },
    "hits": {
    "total": {
    "value": 1,
    "relation": "eq"
    },
    "max_score": 1,
    "hits": [
    {
    "_index": "my_index",
    "_id": "3",
    "_score": 1,
    "_source": {
    "id": 3,
    "title": "VIVO手机",
    "category": "vivo",
    "images": "http://www.gulixueyuan.com/xm.jpg",
    "price": 3600
    }
    }
    ]
    }
    }

5.2.7. 指定返回字段

query 同级增加 _source 过滤

复制代码
get /my_index/_search
{
  "query": {
    "match": {
      "title": "手机"
    }
  },
  "_source": ["title", "price"]
}

{
  "took": 17,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1.100845,
    "hits": [
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1.100845,
        "_source": {
          "price": 3600,
          "title": "VIVO手机"
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 0.9983525,
        "_source": {
          "price": 5500,
          "title": "华为手机"
        }
      }
    ]
  }
}
5.2.8. 组合查询

bool 各条件之间有and,or或not的关系

  • must: 各个条件都必须满足,所有条件是and的关系
  • should: 各个条件有一个满足即可,即各条件是or的关系
  • must_not: 不满足所有条件,即各条件是not的关系
  • filter: 与must效果等同,但是它不计算得分,效率更高点。

只演示一个,其他不再演示

复制代码
get /my_index/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "title": "华为"
          }
        },
        {
          "range": {
            "price": {
              "gte": 3000,
              "lte": 5400
            }
          }
        }
      ]
    }
  }
}

filter:与must效果等同,但是它不计算得分,效率更高点。

_score的分值为0 在Elasticsearch中,_score 字段代表每个文档的相关性分数(relevance score)。

这个分数用于衡量一个文档与特定查询的匹配程度,它是基于搜索查询的条件和文档的内容来计算的。相关性分数越高,表示文档与查询的匹配度越高,排名也越靠前。

复制代码
get /my_index/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "match": {
            "title": "华为"
          }
        }
      ]
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 0,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 0,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 0,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      }
    ]
  }
}
5.2.9. 聚合查询

聚合允许使用者对es文档进行统计分析,例如取最大值、平均值、分组等等。聚合分析主要又分为两种

  • 指标聚合:是对数据集求最大、最小、平均值以及求和等指标的聚合。
  • 桶聚合:则和我们MySQL中的分组Group by类似,先对数据进行分组然后再进行指标聚合。
指标聚合

这里的结果看:aggregations

max(min、avg、sum类似)

复制代码
get /my_index/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "max_price": { # 这是个自定义的名称
      "max": {
        "field": "price"
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  },
  "aggregations": {
    "max_price": {
      "value": 5500
    }
  }
}

count

复制代码
get /my_index/_count
{
  "query": {
    "match_all": {}
  }
}

{
  "count": 3,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  }
}

stats

统计基础指标聚合集,ES中提供了一个stats,可以将某个字段的countmaxminavgsum一次性统一计算出来。

复制代码
get /my_index/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "stats_price": {
      "stats": {
        "field": "price"
      }
    }
  }
}

{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  },
  "aggregations": {
    "stats_price": {
      "count": 3,
      "min": 3600,
      "max": 5500,
      "avg": 4829.333333333333,
      "sum": 14488
    }
  }
}
桶聚合

terms:

桶聚合相当于sql中的group by语句

必须重新映射,如果自动映射,category -> text 无法进行聚合操作

复制代码
PUT /my_index
{
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "index": true,
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_max_word"
      },
      "category": {
        "type": "keyword",
        "index": true
      },
      "images": {
        "type": "keyword",
        "index": true
      },
      "price": {
        "type": "integer",
        "index": true
      }
    }
  }
}

PUT /my_index/_doc/1
{"id":1,"title":"华为笔记本电脑","category":"华为","images":"http://www.gulixueyuan.com/xm.jpg","price":5388}

PUT /my_index/_doc/2
{"id":2,"title":"华为手机","category":"华为","images":"http://www.gulixueyuan.com/xm.jpg","price":5500}

PUT /my_index/_doc/3
{"id":3,"title":"VIVO手机","category":"vivo","images":"http://www.gulixueyuan.com/xm.jpg","price":3600}

get /my_index/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groupby_category": {
      "terms": {
        "field": "category",
        "size": 10
      }
    }
  }
}

{
  "took": 25,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  },
  "aggregations": {
    "groupby_category": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "华为",
          "doc_count": 2
        },
        {
          "key": "vivo",
          "doc_count": 1
        }
      ]
    }
  }
}

还可以聚合之后,再聚合,就是子聚合

复制代码
get /my_index/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "groupby_category": {
      "terms": {
        "field": "category",
        "size": 10
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}


{
  "took": 49,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        }
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 1,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        }
      }
    ]
  },
  "aggregations": {
    "groupby_category": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "华为",
          "doc_count": 2,
          "avg_price": {
            "value": 5444
          }
        },
        {
          "key": "vivo",
          "doc_count": 1,
          "avg_price": {
            "value": 3600
          }
        }
      ]
    }
  }
}
5.2.10. 排序

aggs 和 query sort 都是同级,highlight 也是

复制代码
get /my_index/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "title": "华为"
          }
        }
      ]
    }
  },
  "sort": [
      {
        "price": {
          "order": "asc"
        }
      }
    ]
}


{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "my_index",
        "_id": "1",
        "_score": null,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        },
        "sort": [
          5388
        ]
      },
      {
        "_index": "my_index",
        "_id": "2",
        "_score": null,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        },
        "sort": [
          5500
        ]
      }
    ]
  }
}
5.2.11. 分页查询

分页的两个关键属性:from、size。

  • from: 当前页的起始索引,默认从0开始。 from = (pageNum - 1) * size

  • size: 每页显示多少条

    get /my_index/_search
    {
    "query": {
    "match_all": {}
    },
    "from": 0,
    "size": 2
    }

    {
    "took": 1,
    "timed_out": false,
    "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
    },
    "hits": {
    "total": {
    "value": 3,
    "relation": "eq"
    },
    "max_score": 1,
    "hits": [
    {
    "_index": "my_index",
    "_id": "1",
    "_score": 1,
    "_source": {
    "id": 1,
    "title": "华为笔记本电脑",
    "category": "华为",
    "images": "http://www.gulixueyuan.com/xm.jpg",
    "price": 5388
    }
    },
    {
    "_index": "my_index",
    "_id": "2",
    "_score": 1,
    "_source": {
    "id": 2,
    "title": "华为手机",
    "category": "华为",
    "images": "http://www.gulixueyuan.com/xm.jpg",
    "price": 5500
    }
    }
    ]
    }
    }

5.2.12. 高亮显示

无检索不高亮:

对哪个字段高亮,必须先进行检索才可以

结果中有带有相关标签,就说明成功了

复制代码
GET /my_index/_search
{
  "query": {
    "match": {
      "title": "华为手机"
    }
  },
  "highlight": {
    "fields": {
      "title": {}
    },
    "pre_tags": ["<font style='color:red'>"],
    "post_tags": ["</font>"]
  }
}


{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1.1239216,
    "hits": [
      {
        "_index": "my_index",
        "_id": "2",
        "_score": 1.1239216,
        "_source": {
          "id": 2,
          "title": "华为手机",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5500
        },
        "highlight": {
          "title": [
            "<font style='color:red'>华为</font><font style='color:red'>手机</font>"
          ]
        }
      },
      {
        "_index": "my_index",
        "_id": "3",
        "_score": 0.5619608,
        "_source": {
          "id": 3,
          "title": "VIVO手机",
          "category": "vivo",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 3600
        },
        "highlight": {
          "title": [
            "VIVO<font style='color:red'>手机</font>"
          ]
        }
      },
      {
        "_index": "my_index",
        "_id": "1",
        "_score": 0.35411233,
        "_source": {
          "id": 1,
          "title": "华为笔记本电脑",
          "category": "华为",
          "images": "http://www.gulixueyuan.com/xm.jpg",
          "price": 5388
        },
        "highlight": {
          "title": [
            "<font style='color:red'>华为</font>笔记本电脑"
          ]
        }
      }
    ]
  }
}
相关推荐
AAA修煤气灶刘哥4 小时前
ES 高级玩法大揭秘:从算分骚操作到深度分页踩坑,后端 er 速进!
java·后端·elasticsearch
island13144 小时前
【C++框架#5】Elasticsearch 安装和使用
开发语言·c++·elasticsearch
Cachel wood5 小时前
信息检索、推荐系统模型排序质量指标:AP@K和MAP@K
windows·搜索引擎·json·推荐系统·搜索
阿里嘎多哈基米8 小时前
ES——(一)基本概念
elasticsearch·kibana·倒排索引·dsl·非结构化数据
树荫下的光斑10 小时前
搜索引擎收录网站带www和不带www有区别吗?
搜索引擎
橘子1310 小时前
C++实战:搜索引擎项目(二)
开发语言·c++·搜索引擎
C_V_Better13 小时前
Elasticsearch 创建索引别名的正确姿势
大数据·elasticsearch
shallwe小威1 天前
SpringBoot集成ElasticSearch
数据库·spring boot·elasticsearch
Elastic 中国社区官方博客1 天前
使用 LangExtract 和 Elasticsearch
大数据·人工智能·elasticsearch·搜索引擎·ai·信息可视化·全文检索