介绍
基于 Elasticsearch 自定义扩展功能。我们将在其基础上构建拼音支持、同义词和自动更正。
安装扩展插件
cd elasticsearch-7.17.9/bin
./elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-pinyin/7.17.9
创建索引
使用curl命令操作elasticsearch 建立索引
            
            
              markup
              
              
            
          
          curl -X PUT "http://localhost:9200/test_index" -H 'Content-Type: application/json' -d '
{
    "settings":{
        "analysis":{
            "analyzer":{
                "pinyin_analyzer":{
                    "tokenizer":"my_pinyin"
                },
                "ik_max_syno":{
                    "tokenizer":"ik_max_word",
                    "filter":"my_synonym"
                },
                "ik_smart_syno":{
                    "tokenizer":"ik_smart",
                    "filter":"my_synonym"
                }
            },
            "tokenizer":{
                "my_pinyin":{
                    "type":"pinyin",
                    "keep_first_letter":true,
                    "keep_separate_first_letter":false,
                    "keep_full_pinyin":true,
                    "keep_original":false,
                    "limit_first_letter_length":16,
                    "lowercase":true,
                    "remove_duplicated_term":true
                }
            },
            "filter":{
                "my_synonym":{
                    "type":"synonym_graph",
                    "synonyms_path":"analysis/synonyms.txt"
                    // ,"updateable": true
                }
            }
        }
    },
    "mappings":{
        "properties":{
            "name":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "remark":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "category":{
                "type":"keyword",
                "doc_values":false
            },
            "tag":{"type":"keyword"},
            "rating":{
              "type":"scaled_float",
              "scaling_factor": 10
            },
            "times":{
              "type":"integer"
            }
        }
    }
}'插入示例数据
            
            
              markup
              
              
            
          
          curl -X POST "http://localhost:9200/test_index/_bulk" -H 'Content-Type: application/json' -d'
{ "index": { "_id": 1 } }
{ "name": "番茄炒蛋", "remark": "番茄, 鸡蛋", "category": "家常菜", "tag": ["简单", "营养"] , "rating": 2.0,"times":100}
{ "index": { "_id": 2 } }
{ "name": "宫保鸡丁", "remark": "鸡胸肉, 干辣椒, 花生", "category": "川菜", "tag": ["辣", "下饭"], "rating": 3.4,"times":80 }
{ "index": { "_id": 3 } }
{ "name": "蒸蛋羹", "remark": "水,盐,蛋,麻油,生抽", "category": "川菜", "tag": ["辣", "下饭"] , "rating": 4.8,"times":20}
{ "index": { "_id": 4 } }
{ "name": "水煮蛋", "remark": "水,蛋", "category": "川菜", "tag": ["下饭"] , "rating": 4.9,"times":30}
{ "index": { "_id": 5 } }
{ "name": "酒酿蛋花年糕", "remark": "水,糖,蛋白,酒酿", "category": "家常菜", "tag": [ "简单","营养", "快手菜"] , "rating": 5.0,"times":50}
'拼音搜索示例
            
            
              markup
              
              
            
          
          curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "fanqie",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "fanqie"
        }
      }
    }
  }
}'
匹配结果:
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 13.421699,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          13.421699,
          100
        ]
      }
    ]
  }
}同义词搜索示例
            
            
              markup
              
              
            
          
          curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "西红柿",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "西红柿"
        }
      }
    }
  }
}'
匹配结果:
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 8.388562,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          8.388562,
          100
        ]
      }
    ]
  }
}自动补全搜索示例
根据输入的关键字给出关联的建议词
如下是根据输入的关键字匹配两个字段进行自动给出建议词汇
            
            
              markup
              
              
            
          
          curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "suggest": {
    "nameSuggest": {
      "text": "鸡",
      "completion": {
        "field": "name.suggest"
      }
    },
    "remarkSuggest": {
      "text": "鸡",
      "completion": {
        "field": "remark.suggest"
      }
    }
  }
}'
匹配的结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "suggest": {
    "remarkSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": [
          {
            "text": "鸡胸肉, 干辣椒, 花生",
            "_index": "test_index",
            "_type": "_doc",
            "_id": "2",
            "_score": 1.0,
            "_source": {
              "name": "宫保鸡丁",
              "remark": "鸡胸肉, 干辣椒, 花生",
              "category": "川菜",
              "tag": [
                "辣",
                "下饭"
              ],
              "rating": 3.4,
              "times": 80
            }
          }
        ]
      }
    ],
    "nameSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": []
      }
    ]
  }
}多字段按照权重综合排序
            
            
              markup
              
              
            
          
          curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "function_score": {
     
      "functions": [
        {
          "field_value_factor": {
            "field": "rating",
            "factor": 5,  // name 字段的权重最大
            "modifier": "none",
            "missing": 1  // 如果字段为空,则使用默认值 1
          }
        },
        {
          "field_value_factor": {
            "field": "times",
            "factor": 3,  // title 字段的权重次之
            "modifier": "none",
            "missing": 1
          }
        }
      ],
      "score_mode": "sum",  // 将所有字段的得分加总
      "boost_mode": "multiply"  // 将查询得分与函数得分相乘
    }
  
  },
  "size": 10, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    }
  ]
}'
匹配结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 6,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 315.0,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          315.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 257.0,
        "_source": {
          "name": "宫保鸡丁",
          "remark": "鸡胸肉, 干辣椒, 花生",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 3.4,
          "times": 80
        },
        "sort": [
          257.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "5",
        "_score": 174.0,
        "_source": {
          "name": "酒酿蛋花年糕",
          "remark": "水,糖,蛋白,酒酿",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.8,
          "times": 50
        },
        "sort": [
          174.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "4",
        "_score": 114.5,
        "_source": {
          "name": "水煮蛋",
          "remark": "水,蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.9,
          "times": 30
        },
        "sort": [
          114.5
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "3",
        "_score": 84.0,
        "_source": {
          "name": "蒸蛋羹",
          "remark": "水,盐,蛋,麻油,生抽",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 4.8,
          "times": 20
        },
        "sort": [
          84.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "6",
        "_score": 83.0,
        "_source": {
          "name": "牛奶麵包",
          "remark": "奶油,牛奶,糖,3颗蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.6,
          "times": 20
        },
        "sort": [
          83.0
        ]
      }
    ]
  }
}