容器化部署elasticsearch教程+python操作es数据库示例

1. 拉取镜像

bash 复制代码

docker pull elasticsearch:7.17.1

2. 创建配置文件

bash 复制代码

mkdir -p    /home/elasticsearch
sudo chmod 777 /home/elasticsearch
mkdir -p /home/elasticsearch/config
mkdir -p /home/elasticsearch/home
mkdir -p /home/elasticsearch/logs
mkdir -p /home/elasticsearch/data
mkdir -p /home/elasticsearch/plugins
echo "http.host: 0.0.0.0" >>/home/elasticsearch/config/elasticsearch.yml  
chown -R 1000:1000 /home/elasticsearch/logs /home/elasticsearch/data

3. 创建容器

bash 复制代码

docker run -itd -p 32140:9200 -p 32139:9300 \
--restart=always \
--privileged=true \
--name=elasticsearch-filebeat \
-e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms16g -Xmx16g" \
-e "discovery.type=single-node" \
-e "ELASTIC_PASSWORD=paas123"  \
-e "xpack.security.enabled=true"  \
-v /home/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml \
-v /home/elasticsearch/logs:/usr/share/elasticsearch/logs \
-v /home/elasticsearch/data:/usr/share/elasticsearch/data \
-v /home/elasticsearch/home:/usr/share/elasticsearch/home \
-v /home/elasticsearch/plugins:/usr/share/elasticsearch/plugins \
elasticsearch:7.17.1

4. 验证服务

bash 复制代码

[root@localhost elasticsearch]#  curl -k http://localhost:32140/_cat/health?v --user elastic:paas123
epoch      timestamp cluster       status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
1776329397 08:49:57  elasticsearch yellow          1         1      1   1    0    0        1             0                  -                 50.0%
[root@localhost elasticsearch]#

5. web api接口示例

bash 复制代码

# 查询数据
http://10.132.1.126:32140/{index_name}/_search?from=0&size=100&timeout=1s

# 通过用例名称查询数据
http://10.132.1.126:32140/{index_name}/_search?q=case_name:用户查看云主机权限策略资源粒度下的告警日志生成&from=0&size=10&timeout=1s

6. python 操作ES示例脚本

python 复制代码

# -*- coding: utf-8 -*-
# @Time    : 2026/4/15 15:11
# @Software: PyCharm
# @Desc    :
import hashlib
import warnings

from elasticsearch import Elasticsearch

# Elasticsearch（7.17.1）服务版本与python Elasticsearch（7.17.5）库版本不要差异太大，不同版本有差异


# 精准匹配这个废弃警告，忽略它
warnings.filterwarnings(
    "ignore",
    category=DeprecationWarning,
    message=r"The 'body' parameter is deprecated for the 'search' API and will be removed in a future version.*"
)

# ES 相关配置
ES_HOST = '10.132.1.126'
ES_PORT = 32140
ES_USER = 'elastic'
ES_PWD = 'paas123'

# 定义 Mapping
index_name = "test_api_maps"
mapping_body = {
    "mappings": {
        # 【新增】全局模板：将所有字符串字段强制映射为 keyword，防止空数组被识别为 text
        "dynamic_templates": [
            {
                "strings_as_keyword": {
                    "match_mapping_type": "string",
                    "mapping": {
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            }
        ],
        "properties": {
            # --- 基础信息 ---
            "case_path": {
                "type": "keyword",  # 用例路径，用于精确查找和去重
                "ignore_above": 256  # 超过256字符不索引，防止超长报错
            },
            "case_name": {
                "type": "keyword",  # 用例名称
                "ignore_above": 256
            },

            # --- 核心嵌套结构 ---
            "api_tree": {
                "type": "nested",  # 【关键】必须是 nested，因为它是对象数组
                "properties": {
                    # 1. 前置明细 (动态对象)
                    "prefix_step": {
                        "type": "nested",
                        "dynamic": True,  # 允许动态添加步骤名（如 '创建告警策略'）
                        "properties": {
                            "step_name": {"type": "keyword"},  # 存储步骤名
                            "details": {  # 存储接口数组
                                "type": "nested",
                                "properties": {
                                    "method": {"type": "keyword"},
                                    "url": {"type": "keyword"},
                                    "status": {"type": "keyword"}
                                }
                            }
                        }
                    },
                    # 2. 步骤明细 (动态对象)
                    "case_step": {
                        "type": "nested",
                        "dynamic": True,  # 允许动态添加步骤名（如 '创建告警策略'）
                        "properties": {
                            "step_name": {"type": "keyword"},  # 存储步骤名
                            "details": {  # 存储接口数组
                                "type": "nested",
                                "properties": {
                                    "method": {"type": "keyword"},
                                    "url": {"type": "keyword"},
                                    "status": {"type": "keyword"}
                                }
                            }
                        }
                    },
                    # 3. 接口汇总列表 (关键字段)
                    "api_list": {
                        "type": "keyword"  # 用于精确匹配接口是否存在
                    }
                }
            }
        }
    }
}

# -----------------
# 存入数据格式
# -----------------
# data = {
#     'case_path': 'am.cases.scp.func.admin.network.topo.dvs.test_dvs.TestDvs#test_tc_topo_business_org_01_001',
#     'case_name': '租户经典网络创建交换机测试',
#     'api_tree': [
#         {
#             'prefix_step': [{
#                 'step_name': 'admin查询资源池',
#                 'details': [
#                     {'method': 'GET', 'url': '/login-info', 'status': '200'},
#                     {'method': 'POST', 'url': '/ticket', 'status': '200'},
#                     {'method': 'GET', 'url': '/admin/azs', 'status': '200'}
#                 ]
#             }]
#         },
#         {
#             'case_step': [
#                 {
#                     'step_name': 'admin创建运营管理员',
#                     'details': [
#                         {'method': 'GET', 'url': '/login-info', 'status': '200'},
#                         {'method': 'GET', 'url': '/admin/clusters', 'status': '200'},
#                         {'method': 'GET', 'url': '/admin/azs/{uuid}/overview', 'status': '200'},
#                         {'method': 'POST', 'url': '/admin/msps', 'status': '200'},
#                         {'method': 'GET', 'url': '/admin/ulogs/{uuid}', 'status': '200'}
#                     ]},
#                 {
#                     'step_name': 'admin创建租户',
#                     'details': [
#                         {'method': 'GET', 'url': '/login-info', 'status': '200'},
#                         {'method': 'POST', 'url': '/admin/projects', 'status': '200'},
#                         {'method': 'GET', 'url': '/admin/ulogs/{uuid}', 'status': '200'},
#                     ]
#                 },
#                 {
#                     'step_name': '登录账号',
#                     'details': []
#                 },
#             ]
#         },
# 
#         {
#             'api_list': [  # 存放用例调用的所有接口
#                 'GET::/summary', 'GET::/admin/dhs',
#                 'GET::/admin/clusters',
#                 'GET::/tenant/ulogs',
#                 'GET::/admin/azs/{uuid}/overview',
#                 'GET::/admin/network/vpc/topo',
#                 'GET::/admin/ulogs/{uuid}',
#                 'POST::/admin/projects',
#             ]
#         }
#     ]
# }


def calculate_str_md5(input_str: str, encoding: str = "utf-8") -> str:
    """
    计算字符串的MD5值
    :param input_str: 输入字符串
    :param encoding: 字符串编码，默认utf-8
    :return: 32位十六进制MD5字符串
    """
    # 创建MD5对象
    md5_obj = hashlib.md5()
    # 更新待哈希的字节数据
    md5_obj.update(input_str.encode(encoding))
    # 获取十六进制摘要（32位字符串）
    return md5_obj.hexdigest()


class EsClient():
    def __init__(self, host=ES_HOST, port=ES_PORT, user=ES_USER, pwd=ES_PWD):
        self.es = Elasticsearch(
            [{'host': host, 'port': port, 'scheme': 'http'}],
            http_auth=(user, pwd)
        )
        self.connect_check()

    # 验证连接
    def connect_check(self):
        if self.es.ping():
            print("✅ ES连接成功！集群信息：")
            cluster_info = self.es.info()
            print(f"   集群名称: {cluster_info['cluster_name']}")
            print(f"   ES版本: {cluster_info['version']['number']}")
            print(f"   节点名称: {cluster_info['name']}")
        else:
            print("❌ ES连接失败：ping()返回False")

    # 初始化索引
    def init_index(self, index_name):
        if not self.es.indices.exists(index=index_name):
            self.es.indices.create(index=index_name, body=mapping_body)
        else:
            print(f"ℹ️ 索引 {index_name} 已存在，跳过创建")

    # 删除索引
    def delete_index(self, index_name):
        if self.es.indices.exists(index=index_name):
            self.es.indices.delete(index=index_name)
            print(f"✅ 索引 {index_name} 删除成功")
        else:
            print(f"ℹ️ 索引 {index_name} 不存在，跳过删除")

    # 插入数据
    def insert_data(self, index_name, data):
        _id = calculate_str_md5(data['case_path'])
        try:
            # 写入 ES
            res = self.es.index(index=index_name, id=_id, document=data)
            print(f"📝 写入成功 ID: {res['_id']}, 结果: {res['result']}")
        except Exception as e:
            print(f"❌ 写入失败: {e}")

        # 刷新索引，确保写入立即可见
        self.es.indices.refresh(index=index_name)

    #  查询数据
    def get_data(self, index_name, _id):
        response = self.es.get(index=index_name, id=_id)
        print("查询结果:", response['_source'])

    #  通过接口数据查询用例数据
    def search_case_by_api(self, index_name, target_api):
        """
        查询 api_list 中包含指定接口的用例路径
        """

        # 2. 构建查询 DSL
        # 使用 term 查询进行精确匹配（假设 api_list 是 keyword 类型或数组）
        # 这比 q=... 字符串查询更安全，不会受特殊字符影响
        query_body = {
            "query": {
                "nested": {
                    "path": "api_tree",  # 指定要遍历的数组字段名
                    "query": {
                        "term": {
                            "api_tree.api_list": target_api  # 指定具体的字段路径
                        }
                    }
                }
            },
            "_source": ["case_path"]  # 只返回 case_path 字段
        }

        try:
            # 3. 执行搜索
            response = self.es.search(index=index_name, body=query_body, size=10)

            # 4. 解析结果
            hits = response['hits']['hits']
            total = response['hits']['total']['value']

            print(f"共找到 {total} 条包含接口 '{target_api}' 的用例：")
            for hit in hits:
                # 获取 _source 中的 case_path
                case_path = hit['_source'].get('case_path')
                print(f"- {case_path}")

        except Exception as e:
            print(f"查询出错: {e}")


if __name__ == '__main__':
    Es = EsClient()

    # Es.delete_index(index_name)
    # Es.init_index(index_name)

    # Es.get_data(index_name, '61da0b051dd70e66bb0789e8ea8b68d5')

    target_api = "GET::/admin/ulogs"
    Es.search_case_by_api(index_name, target_api)