安装
sh
docker network create elastic
docker pull docker.elastic.co/elasticsearch/elasticsearch:8.10.4
# 增加虚拟内存, 此处适用于linux
vim /etc/sysctl.conf # 添加 vm.max_map_count=262144
# 重新启动
sysctl vm.max_map_count
docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.10.4
成功后会出现下图
将密码保存,可以先重置:
sh
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
复制证书以供ssl访问
sh
docker cp es01:/usr/share/elasticsearch/config/certs/http_ca.crt .
使用
connect
py
from elasticsearch import Elasticsearch
NODES = [
"https://localhost:9200",
"https://localhost:9201",
"https://localhost:9202",
]
# Password for the 'elastic' user generated by Elasticsearch
ELASTIC_PASSWORD = "<password>"
# Create the client instance
client = Elasticsearch(
NODES,
ca_certs="/path/to/http_ca.crt",
basic_auth=("elastic", ELASTIC_PASSWORD)
# 支持多种登陆方式
# api_key=("api_key.id", "api_key.api_key")
# bearer_auth="token-value"
# ssl_assert_fingerprint=CERT_FINGERPRINT,
)
# Successful response!
client.info()
index
py
mapping = {
"settings": {
"analysis": {
"analyzer": {
"latex_analyzer": {
"type": "standard", # 使用标准分析器作为基础
"stopwords": "_none_" # 不使用停用词
}
}
}
},
"mappings": {
"properties": {
"question": {
"type": "text",
"analyzer": "latex_analyzer" # 使用上面定义的分析器
}
}
}
}
# 创建索引
es.indices.create(index="questions_index", body=mapping)
insert
py
from elasticsearch import Elasticsearch, helpers
insert_df = df["question"].to_frame()
insert_df.head()
def doc_generator(df, index_name):
df_iter = df.iterrows()
for index, document in df_iter:
yield {
"_index": index_name,
"_id": index,
"_source": document.to_dict(),
}
helpers.bulk(es, doc_generator(insert_df, 'questions_index'))
query
py
search_query = {
"query": {
"match": {
"question": "数轴上A、B两点所表示的有理数的和是"
}
}
}
response = es.search(index="questions_index", body=search_query)
for hit in response['hits']['hits']:
print(hit['_source']['question'])
py
import ujson
# 原始查询
query_string = '''{
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"query": "数轴上A、B两点所表示的有理数的和是",
"fields": ["question^3", "question.raw^3", "question.search^1", "question.autosuggest^1", "question.english^1"],
"type": "cross_fields",
"operator": "and"
}
},
{
"multi_match": {
"query": "数轴上A、B两点所表示的有理数的和是",
"fields": ["question^3", "question.raw^3", "question.search^1", "question.autosuggest^1", "question.english^1"],
"type": "phrase",
"operator": "and"
}
},
{
"multi_match": {
"query": "数轴上A、B两点所表示的有理数的和是",
"fields": ["question^3", "question.raw^3", "question.english^1"],
"type": "phrase_prefix",
"operator": "and"
}
}
],
"minimum_should_match": "1"
}
}
]
}
}
]
}
}
}'''
query_dict = ujson.loads(query_string)
response = es.search(index='questions_index', body=query_dict)
for hit in response['hits']['hits']:
print(hit['_source'])