介绍
基于 Elasticsearch 自定义扩展功能。我们将在其基础上构建拼音支持、同义词和自动更正。
安装扩展插件
cd elasticsearch-7.17.9/bin
./elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-pinyin/7.17.9
创建索引
使用curl命令操作elasticsearch 建立索引
markup
curl -X PUT "http://localhost:9200/test_index" -H 'Content-Type: application/json' -d '
{
"settings":{
"analysis":{
"analyzer":{
"pinyin_analyzer":{
"tokenizer":"my_pinyin"
},
"ik_max_syno":{
"tokenizer":"ik_max_word",
"filter":"my_synonym"
},
"ik_smart_syno":{
"tokenizer":"ik_smart",
"filter":"my_synonym"
}
},
"tokenizer":{
"my_pinyin":{
"type":"pinyin",
"keep_first_letter":true,
"keep_separate_first_letter":false,
"keep_full_pinyin":true,
"keep_original":false,
"limit_first_letter_length":16,
"lowercase":true,
"remove_duplicated_term":true
}
},
"filter":{
"my_synonym":{
"type":"synonym_graph",
"synonyms_path":"analysis/synonyms.txt"
// ,"updateable": true
}
}
}
},
"mappings":{
"properties":{
"name":{
"type":"text",
"analyzer":"standard",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256,
"doc_values":false
},
"pinyin":{
"type":"text",
"analyzer":"pinyin_analyzer"
},
"ik":{
"type":"text",
"analyzer":"ik_max_syno",
"search_analyzer":"ik_smart_syno"
},
"suggest":{
"type":"completion",
"analyzer":"ik_max_syno",
"search_analyzer":"ik_smart_syno",
"preserve_separators": "false",
"preserve_position_increments": "true",
"max_input_length": 50
}
}
},
"remark":{
"type":"text",
"analyzer":"standard",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256,
"doc_values":false
},
"pinyin":{
"type":"text",
"analyzer":"pinyin_analyzer"
},
"ik":{
"type":"text",
"analyzer":"ik_max_syno",
"search_analyzer":"ik_smart_syno"
},
"suggest":{
"type":"completion",
"analyzer":"ik_max_syno",
"search_analyzer":"ik_smart_syno",
"preserve_separators": "false",
"preserve_position_increments": "true",
"max_input_length": 50
}
}
},
"category":{
"type":"keyword",
"doc_values":false
},
"tag":{"type":"keyword"},
"rating":{
"type":"scaled_float",
"scaling_factor": 10
},
"times":{
"type":"integer"
}
}
}
}'
插入示例数据
markup
curl -X POST "http://localhost:9200/test_index/_bulk" -H 'Content-Type: application/json' -d'
{ "index": { "_id": 1 } }
{ "name": "番茄炒蛋", "remark": "番茄, 鸡蛋", "category": "家常菜", "tag": ["简单", "营养"] , "rating": 2.0,"times":100}
{ "index": { "_id": 2 } }
{ "name": "宫保鸡丁", "remark": "鸡胸肉, 干辣椒, 花生", "category": "川菜", "tag": ["辣", "下饭"], "rating": 3.4,"times":80 }
{ "index": { "_id": 3 } }
{ "name": "蒸蛋羹", "remark": "水,盐,蛋,麻油,生抽", "category": "川菜", "tag": ["辣", "下饭"] , "rating": 4.8,"times":20}
{ "index": { "_id": 4 } }
{ "name": "水煮蛋", "remark": "水,蛋", "category": "川菜", "tag": ["下饭"] , "rating": 4.9,"times":30}
{ "index": { "_id": 5 } }
{ "name": "酒酿蛋花年糕", "remark": "水,糖,蛋白,酒酿", "category": "家常菜", "tag": [ "简单","营养", "快手菜"] , "rating": 5.0,"times":50}
'
拼音搜索示例
markup
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
"query": {
"multi_match": {
"query": "fanqie",
"fields": [
"name.pinyin^4",
"name.ik^5",
"remark.pinyin^2",
"remark.ik^3"
],
"operator": "or"
}
},
"size": 15,
"sort": [
{
"_score": {
"order": "desc"
},
"times": {
"order": "desc"
}
}
]
,
"highlight": {
"fields": {
"name": {}
},
"highlight_query": {
"match_phrase": {
"name": {
"query": "fanqie"
}
}
}
}
}'
匹配结果:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "_doc",
"_id": "1",
"_score": 13.421699,
"_source": {
"name": "番茄炒蛋",
"remark": "番茄, 鸡蛋",
"category": "家常菜",
"tag": [
"简单",
"营养"
],
"rating": 3.0,
"times": 100
},
"sort": [
13.421699,
100
]
}
]
}
}
同义词搜索示例
markup
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
"query": {
"multi_match": {
"query": "西红柿",
"fields": [
"name.pinyin^4",
"name.ik^5",
"remark.pinyin^2",
"remark.ik^3"
],
"operator": "or"
}
},
"size": 15,
"sort": [
{
"_score": {
"order": "desc"
},
"times": {
"order": "desc"
}
}
]
,
"highlight": {
"fields": {
"name": {}
},
"highlight_query": {
"match_phrase": {
"name": {
"query": "西红柿"
}
}
}
}
}'
匹配结果:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "_doc",
"_id": "1",
"_score": 8.388562,
"_source": {
"name": "番茄炒蛋",
"remark": "番茄, 鸡蛋",
"category": "家常菜",
"tag": [
"简单",
"营养"
],
"rating": 3.0,
"times": 100
},
"sort": [
8.388562,
100
]
}
]
}
}
自动补全搜索示例
根据输入的关键字给出关联的建议词
如下是根据输入的关键字匹配两个字段进行自动给出建议词汇
markup
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
"suggest": {
"nameSuggest": {
"text": "鸡",
"completion": {
"field": "name.suggest"
}
},
"remarkSuggest": {
"text": "鸡",
"completion": {
"field": "remark.suggest"
}
}
}
}'
匹配的结果:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"suggest": {
"remarkSuggest": [
{
"text": "鸡",
"offset": 0,
"length": 1,
"options": [
{
"text": "鸡胸肉, 干辣椒, 花生",
"_index": "test_index",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"name": "宫保鸡丁",
"remark": "鸡胸肉, 干辣椒, 花生",
"category": "川菜",
"tag": [
"辣",
"下饭"
],
"rating": 3.4,
"times": 80
}
}
]
}
],
"nameSuggest": [
{
"text": "鸡",
"offset": 0,
"length": 1,
"options": []
}
]
}
}
多字段按照权重综合排序
markup
curl -X GET "http://localhost:9200/test_index/_search" -H 'Content-Type: application/json' -d'
{
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "rating",
"factor": 5, // name 字段的权重最大
"modifier": "none",
"missing": 1 // 如果字段为空,则使用默认值 1
}
},
{
"field_value_factor": {
"field": "times",
"factor": 3, // title 字段的权重次之
"modifier": "none",
"missing": 1
}
}
],
"score_mode": "sum", // 将所有字段的得分加总
"boost_mode": "multiply" // 将查询得分与函数得分相乘
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}'
匹配结果:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 6,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "_doc",
"_id": "1",
"_score": 315.0,
"_source": {
"name": "番茄炒蛋",
"remark": "番茄, 鸡蛋",
"category": "家常菜",
"tag": [
"简单",
"营养"
],
"rating": 3.0,
"times": 100
},
"sort": [
315.0
]
},
{
"_index": "test_index",
"_type": "_doc",
"_id": "2",
"_score": 257.0,
"_source": {
"name": "宫保鸡丁",
"remark": "鸡胸肉, 干辣椒, 花生",
"category": "川菜",
"tag": [
"辣",
"下饭"
],
"rating": 3.4,
"times": 80
},
"sort": [
257.0
]
},
{
"_index": "test_index",
"_type": "_doc",
"_id": "5",
"_score": 174.0,
"_source": {
"name": "酒酿蛋花年糕",
"remark": "水,糖,蛋白,酒酿",
"category": "家常菜",
"tag": [
"简单",
"营养",
"快手菜"
],
"rating": 4.8,
"times": 50
},
"sort": [
174.0
]
},
{
"_index": "test_index",
"_type": "_doc",
"_id": "4",
"_score": 114.5,
"_source": {
"name": "水煮蛋",
"remark": "水,蛋",
"category": "家常菜",
"tag": [
"简单",
"营养",
"快手菜"
],
"rating": 4.9,
"times": 30
},
"sort": [
114.5
]
},
{
"_index": "test_index",
"_type": "_doc",
"_id": "3",
"_score": 84.0,
"_source": {
"name": "蒸蛋羹",
"remark": "水,盐,蛋,麻油,生抽",
"category": "川菜",
"tag": [
"辣",
"下饭"
],
"rating": 4.8,
"times": 20
},
"sort": [
84.0
]
},
{
"_index": "test_index",
"_type": "_doc",
"_id": "6",
"_score": 83.0,
"_source": {
"name": "牛奶麵包",
"remark": "奶油,牛奶,糖,3颗蛋",
"category": "家常菜",
"tag": [
"简单",
"营养",
"快手菜"
],
"rating": 4.6,
"times": 20
},
"sort": [
83.0
]
}
]
}
}