Elasticsearch的高价玩法--拼音、同义词、自动补全、多字段按权重综合排序

介绍

基于 Elasticsearch 自定义扩展功能。我们将在其基础上构建拼音支持、同义词和自动更正。

安装扩展插件

cd elasticsearch-7.17.9/bin

./elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-pinyin/7.17.9

创建索引

使用curl命令操作elasticsearch 建立索引

markup 复制代码
curl -X PUT "http://localhost:9200/test_index" -H 'Content-Type: application/json' -d '
{
    "settings":{
        "analysis":{
            "analyzer":{
                "pinyin_analyzer":{
                    "tokenizer":"my_pinyin"
                },
                "ik_max_syno":{
                    "tokenizer":"ik_max_word",
                    "filter":"my_synonym"
                },
                "ik_smart_syno":{
                    "tokenizer":"ik_smart",
                    "filter":"my_synonym"
                }
            },
            "tokenizer":{
                "my_pinyin":{
                    "type":"pinyin",
                    "keep_first_letter":true,
                    "keep_separate_first_letter":false,
                    "keep_full_pinyin":true,
                    "keep_original":false,
                    "limit_first_letter_length":16,
                    "lowercase":true,
                    "remove_duplicated_term":true
                }
            },
            "filter":{
                "my_synonym":{
                    "type":"synonym_graph",
                    "synonyms_path":"analysis/synonyms.txt"
                    // ,"updateable": true
                }
            }
        }
    },
    "mappings":{
        "properties":{
            "name":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "remark":{
                "type":"text",
                "analyzer":"standard",
                "fields":{
                    "keyword":{
                        "type":"keyword",
                        "ignore_above":256,
                        "doc_values":false
                    },
                    "pinyin":{
                        "type":"text",
                        "analyzer":"pinyin_analyzer"
                    },
                    "ik":{
                        "type":"text",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno"
                    },
                    "suggest":{
                        "type":"completion",
                        "analyzer":"ik_max_syno",
                        "search_analyzer":"ik_smart_syno",
                        "preserve_separators": "false",
                        "preserve_position_increments": "true",
                        "max_input_length": 50
                    }
                }
            },
            "category":{
                "type":"keyword",
                "doc_values":false
            },
            "tag":{"type":"keyword"},
            "rating":{
              "type":"scaled_float",
              "scaling_factor": 10
            },
            "times":{
              "type":"integer"
            }
        }
    }
}'

插入示例数据

markup 复制代码
curl -X POST "http://localhost:9200/test_index/_bulk" -H 'Content-Type: application/json' -d'
{ "index": { "_id": 1 } }
{ "name": "番茄炒蛋", "remark": "番茄, 鸡蛋", "category": "家常菜", "tag": ["简单", "营养"] , "rating": 2.0,"times":100}
{ "index": { "_id": 2 } }
{ "name": "宫保鸡丁", "remark": "鸡胸肉, 干辣椒, 花生", "category": "川菜", "tag": ["辣", "下饭"], "rating": 3.4,"times":80 }
{ "index": { "_id": 3 } }
{ "name": "蒸蛋羹", "remark": "水,盐,蛋,麻油,生抽", "category": "川菜", "tag": ["辣", "下饭"] , "rating": 4.8,"times":20}
{ "index": { "_id": 4 } }
{ "name": "水煮蛋", "remark": "水,蛋", "category": "川菜", "tag": ["下饭"] , "rating": 4.9,"times":30}
{ "index": { "_id": 5 } }
{ "name": "酒酿蛋花年糕", "remark": "水,糖,蛋白,酒酿", "category": "家常菜", "tag": [ "简单","营养", "快手菜"] , "rating": 5.0,"times":50}
'

拼音搜索示例

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "fanqie",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "fanqie"
        }
      }
    }
  }
}'

匹配结果:

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 13.421699,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          13.421699,
          100
        ]
      }
    ]
  }
}

同义词搜索示例

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "multi_match": {
      "query": "西红柿",
      "fields": [
        "name.pinyin^4",
        "name.ik^5",
        "remark.pinyin^2",
        "remark.ik^3"
      ],
      "operator": "or"
    }
  },
  "size": 15, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      },
      "times": {
        "order": "desc"
      }
    }
  ]
  ,
  "highlight": {
    "fields": {
      "name": {}
    },
    "highlight_query": {
      "match_phrase": {
        "name": {
          "query": "西红柿"
        }
      }
    }
  }
}'


匹配结果:
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 8.388562,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          8.388562,
          100
        ]
      }
    ]
  }
}

自动补全搜索示例

根据输入的关键字给出关联的建议词

如下是根据输入的关键字匹配两个字段进行自动给出建议词汇

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "suggest": {
    "nameSuggest": {
      "text": "鸡",
      "completion": {
        "field": "name.suggest"
      }
    },
    "remarkSuggest": {
      "text": "鸡",
      "completion": {
        "field": "remark.suggest"
      }
    }
  }
}'
匹配的结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "suggest": {
    "remarkSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": [
          {
            "text": "鸡胸肉, 干辣椒, 花生",
            "_index": "test_index",
            "_type": "_doc",
            "_id": "2",
            "_score": 1.0,
            "_source": {
              "name": "宫保鸡丁",
              "remark": "鸡胸肉, 干辣椒, 花生",
              "category": "川菜",
              "tag": [
                "辣",
                "下饭"
              ],
              "rating": 3.4,
              "times": 80
            }
          }
        ]
      }
    ],
    "nameSuggest": [
      {
        "text": "鸡",
        "offset": 0,
        "length": 1,
        "options": []
      }
    ]
  }
}

多字段按照权重综合排序

markup 复制代码
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "function_score": {
     
      "functions": [
        {
          "field_value_factor": {
            "field": "rating",
            "factor": 5,  // name 字段的权重最大
            "modifier": "none",
            "missing": 1  // 如果字段为空,则使用默认值 1
          }
        },
        {
          "field_value_factor": {
            "field": "times",
            "factor": 3,  // title 字段的权重次之
            "modifier": "none",
            "missing": 1
          }
        }
      ],
      "score_mode": "sum",  // 将所有字段的得分加总
      "boost_mode": "multiply"  // 将查询得分与函数得分相乘
    }
  
  },
  "size": 10, 
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    }
  ]
}'

匹配结果:
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 6,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 315.0,
        "_source": {
          "name": "番茄炒蛋",
          "remark": "番茄, 鸡蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养"
          ],
          "rating": 3.0,
          "times": 100
        },
        "sort": [
          315.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 257.0,
        "_source": {
          "name": "宫保鸡丁",
          "remark": "鸡胸肉, 干辣椒, 花生",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 3.4,
          "times": 80
        },
        "sort": [
          257.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "5",
        "_score": 174.0,
        "_source": {
          "name": "酒酿蛋花年糕",
          "remark": "水,糖,蛋白,酒酿",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.8,
          "times": 50
        },
        "sort": [
          174.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "4",
        "_score": 114.5,
        "_source": {
          "name": "水煮蛋",
          "remark": "水,蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.9,
          "times": 30
        },
        "sort": [
          114.5
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "3",
        "_score": 84.0,
        "_source": {
          "name": "蒸蛋羹",
          "remark": "水,盐,蛋,麻油,生抽",
          "category": "川菜",
          "tag": [
            "辣",
            "下饭"
          ],
          "rating": 4.8,
          "times": 20
        },
        "sort": [
          84.0
        ]
      },
      {
        "_index": "test_index",
        "_type": "_doc",
        "_id": "6",
        "_score": 83.0,
        "_source": {
          "name": "牛奶麵包",
          "remark": "奶油,牛奶,糖,3颗蛋",
          "category": "家常菜",
          "tag": [
            "简单",
            "营养",
            "快手菜"
          ],
          "rating": 4.6,
          "times": 20
        },
        "sort": [
          83.0
        ]
      }
    ]
  }
}
相关推荐
Coder_Boy_11 分钟前
基于SpringAI的在线考试系统设计-用户管理模块设计
java·大数据·人工智能·spring boot·spring cloud
虫小宝15 分钟前
天猫返利app搜索系统优化:基于Elasticsearch的商品导购引擎设计
大数据·elasticsearch·搜索引擎
:mnong17 分钟前
大语言模型提示词生成交互原型案例分享
大数据·数据库·人工智能
小北方城市网18 分钟前
第 5 课:服务网格(Istio)实战|大规模微服务的流量与安全治理体系
大数据·开发语言·人工智能·python·安全·微服务·istio
AC赳赳老秦18 分钟前
Go语言微服务文档自动化生成:基于DeepSeek的智能解析实践
大数据·开发语言·人工智能·微服务·golang·自动化·deepseek
证能量少女19 分钟前
2026 中专大数据与会计专业可考的会计相关证书有哪些
大数据
AIGC合规助手30 分钟前
最新I江苏算法、大模型备案攻略+补贴政策汇总
大数据·人工智能·安全·语言模型·aigc
wiss6633 分钟前
国产知识文档系统深度测评:功能、优势与选型指南
大数据·人工智能·企业知识管理·文件数据利用·电子文档管理系统
h***381834 分钟前
Java进阶(ElasticSearch的安装与使用)
java·elasticsearch·jenkins
龙亘川34 分钟前
深度解析智慧路灯大数据平台:物联网 + 大数据构建智慧城市感知底座
大数据·物联网·智慧城市·智慧路灯·智慧城管