Elasticsearch的高价玩法--拼音、同义词、自动补全、多字段按权重综合排序

介绍

基于 Elasticsearch 自定义扩展功能。我们将在其基础上构建拼音支持、同义词和自动更正。

安装扩展插件

cd elasticsearch-7.17.9/bin

./elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-pinyin/7.17.9

创建索引

使用curl命令操作elasticsearch 建立索引

markup 复制代码
curl -X PUT "http://localhost:9200/test_index" -H 'Content-Type: application/json' -d '
{
    "settings":{
        "analysis":{
            "analyzer":{
                "pinyin_analyzer":{
                    "tokenizer":"my_pinyin"
                },
                "ik_max_syno":{
                    "tokenizer":"ik_max_word",
                    "filter":"my_synonym"
                },
                "ik_smart_syno":{
                    "tokenizer":"ik_smart",
                    "filter":"my_synonym"
                }
            },
            "tokenizer":{
                "my_pinyin":{
                    "type":"pinyin",
                    "keep_first_letter":true,
                    "keep_separate_first_letter":false,
                    "keep_full_pinyin":true,
                    "keep_original":false,
                    "limit_first_letter_length":16,
                    "lowercase":true,
                    "remove_duplicated_term":true
                }
            },
            "filter":{
                "my_synonym":{
                    "type":"synonym_graph",
                    "synonyms_path":"analysis/synonyms.txt"
                    // ,"updateable": true
                }
            }
        }
    },
    "mappings":{
        "properties":{
            "name":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "remark":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "category":{
                "type":"keyword",
                "doc_values":false
            },
            "tag":{"type":"keyword"},
            "rating":{
              "type":"scaled_float",
              "scaling_factor": 10
            },
            "times":{
              "type":"integer"
            }
        }
    }
}'

插入示例数据

markup 复制代码
curl -X POST "http://localhost:9200/test_index/_bulk" -H 'Content-Type: application/json' -d'
{ "index": { "_id": 1 } }
{ "name": "番茄炒蛋", "remark": "番茄, 鸡蛋", "category": "家常菜", "tag": ["简单", "营养"] , "rating": 2.0,"times":100}
{ "index": { "_id": 2 } }
{ "name": "宫保鸡丁", "remark": "鸡胸肉, 干辣椒, 花生", "category": "川菜", "tag": ["辣", "下饭"], "rating": 3.4,"times":80 }
{ "index": { "_id": 3 } }
{ "name": "蒸蛋羹", "remark": "水,盐,蛋,麻油,生抽", "category": "川菜", "tag": ["辣", "下饭"] , "rating": 4.8,"times":20}
{ "index": { "_id": 4 } }
{ "name": "水煮蛋", "remark": "水,蛋", "category": "川菜", "tag": ["下饭"] , "rating": 4.9,"times":30}
{ "index": { "_id": 5 } }
{ "name": "酒酿蛋花年糕", "remark": "水,糖,蛋白,酒酿", "category": "家常菜", "tag": [ "简单","营养", "快手菜"] , "rating": 5.0,"times":50}
'

拼音搜索示例

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "fanqie",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "fanqie"
        }
      }
    }
  }
}'

匹配结果:

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 13.421699,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          13.421699,
          100
        ]
      }
    ]
  }
}

同义词搜索示例

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "西红柿",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "西红柿"
        }
      }
    }
  }
}'


匹配结果:
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 8.388562,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          8.388562,
          100
        ]
      }
    ]
  }
}

自动补全搜索示例

根据输入的关键字给出关联的建议词

如下是根据输入的关键字匹配两个字段进行自动给出建议词汇

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "suggest": {
    "nameSuggest": {
      "text": "鸡",
      "completion": {
        "field": "name.suggest"
      }
    },
    "remarkSuggest": {
      "text": "鸡",
      "completion": {
        "field": "remark.suggest"
      }
    }
  }
}'
匹配的结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "suggest": {
    "remarkSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": [
          {
            "text": "鸡胸肉, 干辣椒, 花生",
            "_index": "test_index",
            "_type": "_doc",
            "_id": "2",
            "_score": 1.0,
            "_source": {
              "name": "宫保鸡丁",
              "remark": "鸡胸肉, 干辣椒, 花生",
              "category": "川菜",
              "tag": [
                "辣",
                "下饭"
              ],
              "rating": 3.4,
              "times": 80
            }
          }
        ]
      }
    ],
    "nameSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": []
      }
    ]
  }
}

多字段按照权重综合排序

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "function_score": {
     
      "functions": [
        {
          "field_value_factor": {
            "field": "rating",
            "factor": 5,  // name 字段的权重最大
            "modifier": "none",
            "missing": 1  // 如果字段为空,则使用默认值 1
          }
        },
        {
          "field_value_factor": {
            "field": "times",
            "factor": 3,  // title 字段的权重次之
            "modifier": "none",
            "missing": 1
          }
        }
      ],
      "score_mode": "sum",  // 将所有字段的得分加总
      "boost_mode": "multiply"  // 将查询得分与函数得分相乘
    }
  
  },
  "size": 10, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    }
  ]
}'

匹配结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 6,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 315.0,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          315.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 257.0,
        "_source": {
          "name": "宫保鸡丁",
          "remark": "鸡胸肉, 干辣椒, 花生",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 3.4,
          "times": 80
        },
        "sort": [
          257.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "5",
        "_score": 174.0,
        "_source": {
          "name": "酒酿蛋花年糕",
          "remark": "水,糖,蛋白,酒酿",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.8,
          "times": 50
        },
        "sort": [
          174.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "4",
        "_score": 114.5,
        "_source": {
          "name": "水煮蛋",
          "remark": "水,蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.9,
          "times": 30
        },
        "sort": [
          114.5
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "3",
        "_score": 84.0,
        "_source": {
          "name": "蒸蛋羹",
          "remark": "水,盐,蛋,麻油,生抽",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 4.8,
          "times": 20
        },
        "sort": [
          84.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "6",
        "_score": 83.0,
        "_source": {
          "name": "牛奶麵包",
          "remark": "奶油,牛奶,糖,3颗蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.6,
          "times": 20
        },
        "sort": [
          83.0
        ]
      }
    ]
  }
}
相关推荐
TDengine (老段)5 分钟前
TDengine 中的关联查询
大数据·javascript·网络·物联网·时序数据库·tdengine·iotdb
这个懒人2 小时前
深入解析Translog机制:Elasticsearch的数据守护者
数据库·elasticsearch·nosql·translog
直裾4 小时前
Mapreduce的使用
大数据·数据库·mapreduce
愿你天黑有灯下雨有伞6 小时前
Docker 安装 Elasticsearch 教程
运维·elasticsearch·docker
麻芝汤圆7 小时前
使用 MapReduce 进行高效数据清洗:从理论到实践
大数据·linux·服务器·网络·数据库·windows·mapreduce
树莓集团7 小时前
树莓集团海南落子:自贸港布局的底层逻辑
大数据
不剪发的Tony老师7 小时前
Hue:一个大数据查询工具
大数据
靠近彗星7 小时前
如何检查 HBase Master 是否已完成初始化?| 详细排查指南
大数据·数据库·分布式·hbase
墨染丶eye8 小时前
数据仓库项目启动与管理
大数据·数据仓库·spark
SelectDB8 小时前
Apache Doris 2025 Roadmap:构建 GenAI 时代实时高效统一的数据底座
大数据·数据库·aigc