Elasticsearch基础操作

本文主要介绍Elasticsearch的基础操作（本文时基于kibana，也可以用postman去配置调用），用之前可以先在虚拟机上装配一下ES、kibana、以及IK分词器，配置的方式可以线上也可以线下，这篇文章就不介绍配置相关的，主要是记录常见的操作。

一、基础概念

java 复制代码

#--------------------------基础概念--------------------------#
#索引（indices）-------------------Databases 数据库
#类型（type）----------------------Table 数据表
#文档（Document）---------------Row 行
#字段（Field）---------------------Columns 列

二、倒排索引

java 复制代码

#--------------------------倒排索引--------------------------#
#ES 倒排索引包含两个部分：
#单词词典 （Term Dictionary)，索引最小单位，记录所有文档的单词，记录单词到倒排列表的关联关系
#单词词典一般都会非常多，通过 B+ 树或 Hash 表方式以满足高性能的插入与查询

#倒排列表（Posting List)-由倒排索引项（Posting）组成
#文档 ID
#词频 TF，该单词在文档中出现的次数，用于相关性评分
#位置（Position)，单词在文档中分词的位置。用于语句搜索（phrase query)
#偏移（Offset)，记录单词的开始结束位置，实现高亮显示

#倒排列表的元数据结构：
#(DocID;TF;<POS>)
#其中：
#DocID：出现某单词的文档ID
#TF(词频)：单词在该文档中出现的次数
#POS：单词在文档中的位置

三、分词操作

java 复制代码

#--------------------------分词操作--------------------------#
#ES的默认分词设置是standard，会单字拆分
POST _analyze
{
  "analyzer":"standard",
  "text":"中华人民共和国"
}

#ik_smart:会做最粗粒度的拆
POST _analyze
{
  "analyzer": "ik_smart",
  "text": "中华人民共和国"
 }

#ik_max_word:会将文本做最细粒度的拆分
POST _analyze
{
  "analyzer":"ik_max_word",
  "text":"中华人民共和国"
}

四、索引操作

java 复制代码

#--------------------------索引操作--------------------------#
#查询es信息
GET /

#查询所有索引信息
GET /_cat/indices?v

#查询节点状态
GET /_cat/nodes?v

#查询索引结构信息
GET /study?pretty=true

#创建索引
PUT /new_index_data/
{
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "BIZ_ID" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "BIZ_TYPE_ID" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "CHANNEL" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "CREATE_TIME" : {
          "type" : "date"
        },
        "ORGANIZE_CODE" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "TRANSACTION_ID" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "USER_ID" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "USE_ENCRYPT" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "id" : {
          "type" : "long"
        },
        "virtual_channel_id" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
}

#查询索引结构信息
GET /new_index_data?pretty=true

#查询索引的Settings配置
GET /new_index_data/_settings

#查询索引下所有数据
GET /new_index_data/_search
{
  "query": {
    "match_all": {}
  }
}

#查询索引下数据总量
GET /new_index_data/_count

#删除索引
DELETE /new_index_data/

#删除索引下的数据，此语句可以根据query条件，进行选择性删除数据。
POST /index_name/_delete_by_query
{
  "query":{
    "match_all":{}
  }
}

#查询es健康状况
GET /_cat/health?v

#创建了一个名为 my_index的索引，设置了3个主分片和2个副本分片。映射中定义了四个字段：title（文本类型）、content（文本类型）、author（关键字类型）和 publish_date（日期类型）。
PUT my_index
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 2
  },
  "mappings": {
    "properties": {
      "title": { "type": "text" },
      "content": { "type": "text" },
      "author": { "type": "keyword" },
      "publish_date": { "type": "date" }
    }
  }
}
#查询mapping
GET my_index/_mapping
#查询所有索引信息
GET /_cat/indices?v
#查询索引结构信息（详情）
GET /my_index?pretty=true
#查询索引的Settings配置
GET /my_index/_settings
#查询索引下所有数据
GET /my_index/_search
{
  "query": {
    "match_all": {}
  }
}
GET /my_index1/_search
{
  "query": {
    "match_all": {}
  }
}
# 更新索引设置，更新副本分片数量为1
PUT my_index/_settings
{
  "number_of_replicas": 1
}
# 更新映射
# 若需添加或修改字段映射，需使用 PUT 请求重新提交整个映射定义。注意，映射更新可能导致需要重新索引已有数据以应用新映射。
#删除索引
DELETE my_index

五、文档操作

java 复制代码

#--------------------------索引数据操作（文档）--------------------------#
#添加文档
POST my_index/_doc
{
  "title": "My First Blog Post",
  "content": "This is the content of my first blog post.",
  "author": "zlj",
  "publish_date": "2024-05-13T12:00:00Z"
}
#查询文档
GET my_index/_search
{
  "query": {
    "match": {
      "title": "blog"
    }
  }
}
GET my_index/_search
{
  "query": {
    "match": {
      "content": "blog"
    }
  }
}
#query代表一个查询对象，里面可以有不同的查询属性。查询类型：例如match_all、match、term、range等。查询条件：查询条件会根据类型的不同，写法也有差异。
#根据文档ID更新文档内容
PUT my_index/_doc/I4EFcY8BsiNjKcqvjXQ8
{
  "title": "Updated Blog Post",
  "content": "This is the updated content of my blog post.",
  "author": "John Doe",
  "publish_date": "2024-04-03T12:00:00Z"
}
#通过指定id查询文档
GET my_index/_doc/I4EFcY8BsiNjKcqvjXQ8
#通过指定的id删除文档
DELETE my_index/_doc/I4EFcY8BsiNjKcqvjXQ8

六、批量操作

java 复制代码

#--------------------------批量操作--------------------------#
#批量创建文档 _bulk
POST _bulk
{"create":{"_index":"article", "_type":"_doc", "_id":1}}
{"id":1,"title":"fox老师","content":"fox老师666","tags":["java", "面向对象"],"create_time":1554015482530}
{"create":{"_index":"article", "_type":"_doc","_id":2}}
{"id":2,"title":"mark老师","content":"mark老师NB","tags":["java", "面向对象"],"create_time":1554015482530}
#可以通过ID批量获取不同index和type的数据
GET _mget
{
"docs": [
{
"_index": "my_index",
"_id": "I4EFcY8BsiNjKcqvjXQ8"
},
{
"_index": "article",
"_id": 1
},
{
"_index": "article",
"_id": 2
}
]
}
#可以通过ID批量获取es_db的数据
GET /article/_mget
{
"docs": [
{
"_id": 1
},
{
"_id": 2
}
]
}
#简化后
GET /article/_mget
{
 "ids":["1","4"]
}
#查询索引
GET /article/_search
{
  "query": {
    "match_all": {}
  }
}
#批量删除文档
POST _bulk
{"delete":{"_index":"article", "_type":"_doc", "_id":1}}
{"delete":{"_index":"article", "_type":"_doc", "_id":2}}
#组合
POST _bulk
{"index":{"_index":"article", "_type":"_doc", "_id":1}}
{"id":1,"title":"fox老师","content":"fox老师666","tags":["java", "面向对象"],"create_time":1554015482530}
{"delete":{"_index":"article", "_type":"_doc", "_id":1}}
{"update":{"_index":"article", "_type":"_doc", "_id":2}}
{"doc":{"create_time":1554018421008}}
#_msearch批量读取
GET /article/_msearch
{}
{"query" : {"match_all" : {}}, "from" : 0, "size" : 2}
{"index" : "article"}
{"query" : {"match_all" : {}}}

七、高级查询-数据准备

java 复制代码

#--------------------------高级查询 数据准备--------------------------#
# GET /索引名/_doc/_search {json格式请求体数据}
# GET /索引名/_search {json格式请求体数据}

# 1.创建索引 映射
PUT /products/
{
  "mappings": {
    "properties": {
      "title":{
        "type": "keyword"
      },
      "price":{
        "type": "double"
      },
      "created_at":{
        "type":"date"
      },
      "description":{
        "type":"text"
      }
    }
  }
}
# 2.测试数据
PUT /products/_doc/_bulk
{"index":{}}
  {"title":"iphone12 pro","price":8999,"created_at":"2020-10-23","description":"iPhone 12 Pro采用超瓷晶面板和亚光质感玻璃背板，搭配不锈钢边框，有银色、石墨色、金色、海蓝色四种颜色。宽度:71.5毫米，高度:146.7毫米，厚度:7.4毫米，重量：187克"}
{"index":{}}
  {"title":"iphone12","price":4999,"created_at":"2020-10-23","description":"iPhone 12 高度：146.7毫米；宽度：71.5毫米；厚度：7.4毫米；重量：162克（5.73盎司） [5]  。iPhone 12设计采用了离子玻璃，以及7000系列铝金属外壳。"}
{"index":{}}
  {"title":"iphone13","price":6000,"created_at":"2021-09-15","description":"iPhone 13屏幕采用6.1英寸OLED屏幕；高度约146.7毫米，宽度约71.5毫米，厚度约7.65毫米，重量约173克。"}
{"index":{}}
  {"title":"iphone13 pro","price":8999,"created_at":"2021-09-15","description":"iPhone 13Pro搭载A15 Bionic芯片，拥有四种配色，支持5G。有128G、256G、512G、1T可选，售价为999美元起。"}
  
#查询索引结构信息
GET /products?pretty=true
#查询索引的Settings配置
GET /products/_settings
#查询索引下所有数据
GET /products/_search
{
  "query": {
    "match_all": {}
  }
}
#查询索引下数据总量
GET /products/_count

八、高级查询-查询操作

java 复制代码

#--------------------------高级查询 查询操作--------------------------#

#1、match_all关键字: 返回索引中的全部文档
GET /products/_search
{
  "query": {
    "match_all": {}
  }
}

#2、term 关键字: 用来使用关键词查询
GET /products/_search
{
 "query": {
   "term": {
     "price": {
       "value": 4999
     }
   }
 }
}
#NOTE1: 通过使用term查询得知ES中默认使用分词器为标准分词器(StandardAnalyzer),标准分词器对于英文单词分词,对于中文单字分词。
#NOTE2: 通过使用term查询得知,在ES的Mapping Type 中 keyword , date ,integer, long , double , boolean or ip 这些类型不分词，只有text类型分词。

#3、range 关键字: 用来指定查询指定范围内的文档
GET /products/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 1400,
        "lte": 9999
      }
    }
  }
}

#4、prefix 关键字: 用来检索含有指定前缀的关键词的相关文档
GET /products/_search
{
  "query": {
    "prefix": {
      "title": {
        "value": "ipho"
      }
    }
  }
}

#5、wildcard 关键字: 通配符查询 ? 用来匹配一个任意字符 * 用来匹配多个任意字符
GET /products/_search
{
  "query": {
    "wildcard": {
      "description": {
        "value": "iphon*"
      }
    }
  }
}

#6、ids 关键字 : 值为数组类型,用来根据一组id获取多个对应的文档
GET /products/_search
{
  "query": {
    "ids": {
      "values": ["E7MbhY8BTmd2OsNDpakT","FLMbhY8BTmd2OsNDpaka"]
    }
  }
}
#这里查询条件id值需要换成自己创建索引后的id

#7、fuzzy 关键字: 用来模糊查询含有指定关键字的文档
GET /products/_search
{
  "query": {
    "fuzzy": {
      "description": "iphooone"
    }
  }
}
#注意: fuzzy 模糊查询 最大模糊错误 必须在0-2之间
#搜索关键词长度为 2 不允许存在模糊
#搜索关键词长度为3-5 允许一次模糊
#搜索关键词长度大于5 允许最大2模糊

#8、布尔查询[bool]
#bool 关键字: 用来组合多个条件实现复杂查询
#must: 相当于&& 同时成立
#should: 相当于|| 成立一个就行
#must_not: 相当于! 不能满足任何一个
GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {"term": {
          "price": {
            "value": 4999
          }
        }}
      ]
    }
  }
}

#9、多字段查询[multi_match]
GET /products/_search
{
  "query": {
    "multi_match": {
      "query": "iphone13 毫",
      "fields": ["title","description"]
    }
  }
}
#注意: 字段类型分词,将查询条件分词之后进行查询改字段  如果该字段不分词就会将查询条件作为整体进行查询

#10默认字段分词查询[query_string]
GET /products/_search
{
  "query": {
    "query_string": {
      "default_field": "description",
      "query": "屏幕真的非常不错"
    }
  }
}
#注意: 查询字段分词就将查询条件分词查询  查询字段不分词将查询条件不分词查询

#11、高亮查询[highlight]
#（1）highlight 关键字: 可以让符合条件的文档中的关键词高亮
GET /products/_search
{
  "query": {
    "term": {
      "description": {
        "value": "iphone"
      }
    }
  },
  "highlight": {
    "fields": {
      "*":{}
    }
  }
}
#（2）自定义高亮html标签: 可以在highlight中使用pre_tags和post_tags
GET /products/_search
{
  "query": {
    "term": {
      "description": {
        "value": "iphone"
      }
    }
  },
  "highlight": {
    "post_tags": ["</span>"], 
    "pre_tags": ["<span style='color:red'>"],
    "fields": {
      "*":{}
    }
  }
}
#（3）多字段高亮 使用require_field_match开启多个字段高亮
GET /products/_search
{
  "query": {
    "term": {
      "description": {
        "value": "iphone"
      }
    }
  },
  "highlight": {
    "require_field_match": "false",
    "post_tags": ["</span>"], 
    "pre_tags": ["<span style='color:red'>"],
    "fields": {
      "*":{}
    }
  }
}

#12、size 关键字: 指定查询结果中返回指定条数。 默认返回值10条
GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "size": 2
}

#13、from 关键字: 用来指定起始返回位置，和size关键字连用可实现分页效果
GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "size": 2,
  "from": 0
}

#14、指定字段排序[sort]
GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ]
}

#15、_source 关键字: 是一个数组,在数组中用来指定展示那些字段
GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "_source": ["title","description"]
}