Elasticsearch学习

1.创建索引

复制代码
from elasticsearch import Elasticsearch
import random
from datetime import datetime, timedelta

# 连接ES
es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("用户名", "密码"),
    verify_certs=False
)

print(f"✅ 连接成功!ES版本: {es.info()['version']['number']}")

# 1. 创建更多商品数据
def create_sample_data():
    """创建示例商品数据"""
    
    # 品牌列表
    brands = ["Apple", "Samsung", "Huawei", "Xiaomi", "OPPO", "vivo", "Honor", "Google", "OnePlus", "Sony"]
    
    # 产品类型
    product_types = ["智能手机", "笔记本电脑", "平板电脑", "智能手表", "耳机", "显示器", "相机"]
    
    # 类别
    categories = ["电子产品", "家居用品", "运动户外", "图书音像", "服装鞋帽"]
    
    # 商品名称模板
    phone_models = ["Pro", "Max", "Ultra", "Plus", "Lite", "SE", "Note", "Fold", "Flip"]
    
    products = []
    
    # 生成50个商品
    for i in range(1, 51):
        brand = random.choice(brands)
        product_type = random.choice(product_types)
        
        # 根据类型生成不同的价格范围
        if product_type == "智能手机":
            price = round(random.uniform(1999, 8999), 2)
        elif product_type == "笔记本电脑":
            price = round(random.uniform(3999, 12999), 2)
        elif product_type == "平板电脑":
            price = round(random.uniform(1499, 5999), 2)
        elif product_type == "智能手表":
            price = round(random.uniform(499, 2999), 2)
        elif product_type == "耳机":
            price = round(random.uniform(99, 1999), 2)
        else:
            price = round(random.uniform(299, 3999), 2)
        
        # 生成随机评分 (3.0 - 5.0)
        rating = round(random.uniform(3.0, 5.0), 1)
        
        # 生成随机库存
        stock = random.randint(10, 1000)
        
        # 生成上架日期(最近一年内)
        days_ago = random.randint(1, 365)
        launch_date = (datetime.now() - timedelta(days=days_ago)).strftime("%Y-%m-%d")
        
        # 生成标签
        all_tags = ["热销", "新品", "限时优惠", "旗舰", "性价比", "便携", "高清", "长续航", "快充", "防水"]
        num_tags = random.randint(1, 4)
        tags = random.sample(all_tags, num_tags)
        
        product = {
            "id": str(i),
            "brand": brand,
            "name": f"{brand} {product_type} {random.choice(phone_models)} {i}",
            "price": price,
            "price_sign": "¥",
            "currency": "CNY",
            "image_link": f"https://example.com/images/product_{i}.jpg",
            "description": f"这是一款{brand}的{product_type},性能强劲,品质保证。{random.choice(['适合日常使用', '专业级设备', '性价比之选', '旗舰机型'])}。",
            "rating": rating,
            "category": random.choice(categories),
            "product_type": product_type,
            "tag_list": tags,
            "stock": stock,
            "launch_date": launch_date,
            "is_available": random.choice([True, False]),
            "sales_volume": random.randint(0, 10000)
        }
        products.append(product)
    
    return products

# 2. 批量导入数据
def batch_insert(products):
    """批量插入数据"""
    print("\n📥 开始批量导入数据...")
    success = 0
    for product in products:
        try:
            es.index(index="products-catalog", id=product["id"], body=product)
            success += 1
            if success % 10 == 0:
                print(f"  已导入 {success} 条...")
        except Exception as e:
            print(f"  ❌ 导入失败 ID:{product['id']}: {e}")
    
    print(f"✅ 导入完成!成功导入 {success} 条商品数据")

# 3. 执行导入
products = create_sample_data()
batch_insert(products)

# 4. 验证数据
print("\n📊 当前索引统计:")
print(f"   商品总数: {es.count(index='products-catalog')['count']}")

2.查询操作

复制代码
from elasticsearch import Elasticsearch
import json

# 连接ES
es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("", ""),
    verify_certs=False
)

def print_result(title, result):
    print(f"\n{'='*60}")
    print(f"📌 {title}")
    print('='*60)
    if isinstance(result, dict):
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print(result)

print("\n🔍 基础查询练习 ==========")

# 1.1 查询所有商品
result = es.search(
    index="products-catalog",
    body={
        "query": {"match_all": {}},
        "size": 5
    }
)
print_result("1.1 查询所有商品 (前5条)", {
    "total": result['hits']['total']['value'],
    "samples": [hit['_source']['name'] for hit in result['hits']['hits']]
})

# 1.2 精确匹配查询
result = es.search(
    index="products-catalog",
    body={
        "query": {
            "term": {"brand.keyword": "Apple"}
        }
    }
)
print_result("1.2 精确匹配:Apple品牌商品", {
    "count": result['hits']['total']['value'],
    "products": [f"{hit['_source']['name']} - ¥{hit['_source']['price']}" 
                 for hit in result['hits']['hits']]
})

# 1.3 全文搜索
result = es.search(
    index="products-catalog",
    body={
        "query": {
            "match": {
                "description": "旗舰 性能"
            }
        }
    }
)
print_result("1.3 全文搜索:描述中包含'旗舰'或'性能'", {
    "count": result['hits']['total']['value'],
    "results": [f"{hit['_source']['name']} (得分:{hit['_score']:.2f})" 
                for hit in result['hits']['hits'][:5]]
})
复制代码
from elasticsearch import Elasticsearch
import json

es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("", ""),
    verify_certs=False
)

def print_result(title, result):
    print(f"\n{'='*60}")
    print(f"📌 {title}")
    print('='*60)
    if isinstance(result, dict):
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print(result)

print("\n🔍 复合查询练习 ==========")

# 2.1 布尔查询:价格在3000-6000之间的Apple产品
result = es.search(
    index="products-catalog",
    body={
        "query": {
            "bool": {
                "must": [
                    {"term": {"brand.keyword": "Apple"}}
                ],
                "filter": [
                    {"range": {"price": {"gte": 3000, "lte": 6000}}}
                ]
            }
        }
    }
)
print_result("2.1 布尔查询:Apple品牌 价格3000-6000", {
    "count": result['hits']['total']['value'],
    "products": [f"{hit['_source']['name']} - ¥{hit['_source']['price']}" 
                 for hit in result['hits']['hits']]
})

# 2.2 多条件查询
result = es.search(
    index="products-catalog",
    body={
        "query": {
            "bool": {
                "must": [
                    {"match": {"name": "Pro"}},
                    {"range": {"price": {"lte": 5000}}}
                ],
                "should": [
                    {"term": {"brand.keyword": "Xiaomi"}},
                    {"term": {"brand.keyword": "Samsung"}}
                ],
                "minimum_should_match": 1
            }
        }
    }
)
print_result("2.2 多条件:名称含Pro,价格≤5000,优先小米/三星", {
    "count": result['hits']['total']['value'],
    "results": [f"{hit['_source']['brand']} {hit['_source']['name']} - ¥{hit['_source']['price']}" 
                for hit in result['hits']['hits']]
})

聚合查询

复制代码
from elasticsearch import Elasticsearch
import json

es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("xxx", "xxx"),
    verify_certs=False
)

def print_result(title, result):
    print(f"\n{'='*60}")
    print(f"📌 {title}")
    print('='*60)
    if isinstance(result, dict):
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print(result)

print("\n🔍 聚合分析练习 ==========")

# 3.1 按品牌统计商品数量
result = es.search(
    index="products-catalog",
    body={
        "size": 0,
        "aggs": {
            "brand_count": {
                "terms": {"field": "brand.keyword", "size": 10}
            }
        }
    }
)
print_result("3.1 聚合:各品牌商品数量", {
    brand['key']: brand['doc_count'] 
    for brand in result['aggregations']['brand_count']['buckets']
})

# 3.2 价格统计分析
result = es.search(
    index="products-catalog",
    body={
        "size": 0,
        "aggs": {
            "price_stats": {
                "stats": {"field": "price"}
            }
        }
    }
)
stats = result['aggregations']['price_stats']
print_result("3.2 价格统计", {
    "最低价": f"¥{stats['min']:.2f}",
    "最高价": f"¥{stats['max']:.2f}",
    "平均价": f"¥{stats['avg']:.2f}",
    "总数": stats['count']
})

# 3.3 多层聚合:按产品类型统计平均价格
result = es.search(
    index="products-catalog",
    body={
        "size": 0,
        "aggs": {
            "by_type": {
                "terms": {"field": "product_type.keyword"},
                "aggs": {
                    "avg_price": {"avg": {"field": "price"}},
                    "max_price": {"max": {"field": "price"}},
                    "min_price": {"min": {"field": "price"}}
                }
            }
        }
    }
)
print_result("3.3 多层聚合:各类型价格统计", {
    bucket['key']: {
        "平均价": f"¥{bucket['avg_price']['value']:.2f}",
        "最高价": f"¥{bucket['max_price']['value']:.2f}",
        "最低价": f"¥{bucket['min_price']['value']:.2f}",
        "数量": bucket['doc_count']
    }
    for bucket in result['aggregations']['by_type']['buckets']
})

更新和删除

复制代码
from elasticsearch import Elasticsearch
import json

es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("", ""),
    verify_certs=False
)

def print_result(title, result):
    print(f"\n{'='*60}")
    print(f"📌 {title}")
    print('='*60)
    if isinstance(result, dict):
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print(result)

print("\n🔍 更新和删除练习 ==========")

# 5.1 更新文档
def update_example():
    """更新示例"""
    print("\n📝 5.1 更新文档示例")
    
    # 先查看更新前的数据
    result = es.search(
        index="products-catalog",
        body={"query": {"term": {"brand.keyword": "Apple"}}, "size": 1}
    )
    
    if result['hits']['hits']:
        doc_id = result['hits']['hits'][0]['_id']
        old_price = result['hits']['hits'][0]['_source']['price']
        old_tags = result['hits']['hits'][0]['_source'].get('tag_list', [])
        
        print(f"更新前 - ID:{doc_id}, 价格:{old_price}, 标签:{old_tags}")
        
        # 更新价格和标签
        es.update(
            index="products-catalog",
            id=doc_id,
            body={"doc": {"price": 9999, "tag_list": ["热销", "旗舰", "限时优惠"]}}
        )
        
        # 查看更新后的数据
        updated = es.get(index="products-catalog", id=doc_id)
        new_price = updated['_source']['price']
        new_tags = updated['_source']['tag_list']
        
        print(f"更新后 - ID:{doc_id}, 价格:{new_price}, 标签:{new_tags}")

update_example()

# 5.2 删除文档示例(谨慎使用,默认注释掉)
def delete_example():
    """删除示例(谨慎使用)"""
    print("\n🗑️ 5.2 删除文档示例(已注释,安全)")
    # 取消下面的注释来实际执行删除
    """
    # 删除价格低于500的商品
    result = es.delete_by_query(
        index="products-catalog",
        body={"query": {"range": {"price": {"lt": 500}}}}
    )
    print(f"删除了 {result['deleted']} 个价格低于500的商品")
    """
    print("(要实际执行删除,请取消代码注释)")

delete_example()

# 5.3 新增文档示例
def create_example():
    """新增文档示例"""
    print("\n➕ 5.3 新增文档示例")
    
    new_product = {
        "id": "999",
        "brand": "TestBrand",
        "name": "测试商品-学习使用",
        "price": 2999.0,
        "price_sign": "¥",
        "currency": "CNY",
        "description": "这是一个用于学习ES的新增文档示例",
        "rating": 4.5,
        "category": "测试类别",
        "product_type": "测试类型",
        "tag_list": ["测试", "学习", "示例"],
        "stock": 100,
        "launch_date": "2024-01-01",
        "is_available": True,
        "sales_volume": 0
    }
    
    response = es.index(index="products-catalog", id="999", body=new_product)
    print(f"新增文档结果: {response['result']}")
    
    # 验证新增
    check = es.get(index="products-catalog", id="999")
    print(f"验证新增: {check['_source']['name']}")

create_example()

索引操作

复制代码
from elasticsearch import Elasticsearch
import json

es = Elasticsearch(
    "https://localhost:9201",
    basic_auth=("elastic", "123456"),
    verify_certs=False
)

def print_result(title, result):
    print(f"\n{'='*60}")
    print(f"📌 {title}")
    print('='*60)
    if isinstance(result, dict):
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print(result)

print("\n🔍 索引管理练习 ==========")

# 6.1 查看所有索引
result = es.cat.indices(format="json")
print_result("6.1 所有索引列表", [
    f"{idx['index']} (文档数:{idx['docs.count']}, 大小:{idx['store.size']})"
    for idx in result
])

# 6.2 查看索引映射
result = es.indices.get_mapping(index="products-catalog")
print_result("6.2 products-catalog索引的映射结构", 
    result['products-catalog']['mappings']['properties'].keys()
)

# 6.3 查看索引设置
result = es.indices.get_settings(index="products-catalog")
settings = result['products-catalog']['settings']['index']
print_result("6.3 索引设置", {
    "分片数": settings['number_of_shards'],
    "副本数": settings['number_of_replicas'],
    "创建时间": settings['creation_date']
})

# 6.4 统计信息
result = es.indices.stats(index="products-catalog")
stats = result['indices']['products-catalog']['total']
print_result("6.4 索引统计信息", {
    "文档总数": stats['docs']['count'],
    "已删除文档": stats['docs']['deleted'],
    "存储大小": stats['store']['size_in_bytes'],
    "分片数": len(stats['shards'])
})
相关推荐
liliangcsdn1 小时前
IMPALA强化学习算法的学习和解读
学习·算法
蒸蒸yyyyzwd1 小时前
os八股学习笔记
笔记·学习
野犬寒鸦2 小时前
Java8 ConcurrentHashMap 深度解析(底层数据结构详解及方法执行流程)
java·开发语言·数据库·后端·学习·算法·哈希算法
郝学胜-神的一滴2 小时前
在Vibe Coding时代,学习设计模式与软件架构
人工智能·学习·设计模式·架构·软件工程
科技林总2 小时前
【系统分析师】9.5 容灾与业务持续
学习
宇木灵2 小时前
C语言基础-六、指针
c语言·开发语言·学习·算法
山岚的运维笔记2 小时前
SQL Server笔记 -- 第69章:时态表
数据库·笔记·后端·sql·microsoft·sqlserver
春和景明3602 小时前
费曼学习法8
学习
Elasticsearch2 小时前
Elasticsearch:通过最小分数确保语义精度
elasticsearch