1.创建索引
复制代码
from elasticsearch import Elasticsearch
import random
from datetime import datetime, timedelta
# 连接ES
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("用户名", "密码"),
verify_certs=False
)
print(f"✅ 连接成功!ES版本: {es.info()['version']['number']}")
# 1. 创建更多商品数据
def create_sample_data():
"""创建示例商品数据"""
# 品牌列表
brands = ["Apple", "Samsung", "Huawei", "Xiaomi", "OPPO", "vivo", "Honor", "Google", "OnePlus", "Sony"]
# 产品类型
product_types = ["智能手机", "笔记本电脑", "平板电脑", "智能手表", "耳机", "显示器", "相机"]
# 类别
categories = ["电子产品", "家居用品", "运动户外", "图书音像", "服装鞋帽"]
# 商品名称模板
phone_models = ["Pro", "Max", "Ultra", "Plus", "Lite", "SE", "Note", "Fold", "Flip"]
products = []
# 生成50个商品
for i in range(1, 51):
brand = random.choice(brands)
product_type = random.choice(product_types)
# 根据类型生成不同的价格范围
if product_type == "智能手机":
price = round(random.uniform(1999, 8999), 2)
elif product_type == "笔记本电脑":
price = round(random.uniform(3999, 12999), 2)
elif product_type == "平板电脑":
price = round(random.uniform(1499, 5999), 2)
elif product_type == "智能手表":
price = round(random.uniform(499, 2999), 2)
elif product_type == "耳机":
price = round(random.uniform(99, 1999), 2)
else:
price = round(random.uniform(299, 3999), 2)
# 生成随机评分 (3.0 - 5.0)
rating = round(random.uniform(3.0, 5.0), 1)
# 生成随机库存
stock = random.randint(10, 1000)
# 生成上架日期(最近一年内)
days_ago = random.randint(1, 365)
launch_date = (datetime.now() - timedelta(days=days_ago)).strftime("%Y-%m-%d")
# 生成标签
all_tags = ["热销", "新品", "限时优惠", "旗舰", "性价比", "便携", "高清", "长续航", "快充", "防水"]
num_tags = random.randint(1, 4)
tags = random.sample(all_tags, num_tags)
product = {
"id": str(i),
"brand": brand,
"name": f"{brand} {product_type} {random.choice(phone_models)} {i}",
"price": price,
"price_sign": "¥",
"currency": "CNY",
"image_link": f"https://example.com/images/product_{i}.jpg",
"description": f"这是一款{brand}的{product_type},性能强劲,品质保证。{random.choice(['适合日常使用', '专业级设备', '性价比之选', '旗舰机型'])}。",
"rating": rating,
"category": random.choice(categories),
"product_type": product_type,
"tag_list": tags,
"stock": stock,
"launch_date": launch_date,
"is_available": random.choice([True, False]),
"sales_volume": random.randint(0, 10000)
}
products.append(product)
return products
# 2. 批量导入数据
def batch_insert(products):
"""批量插入数据"""
print("\n📥 开始批量导入数据...")
success = 0
for product in products:
try:
es.index(index="products-catalog", id=product["id"], body=product)
success += 1
if success % 10 == 0:
print(f" 已导入 {success} 条...")
except Exception as e:
print(f" ❌ 导入失败 ID:{product['id']}: {e}")
print(f"✅ 导入完成!成功导入 {success} 条商品数据")
# 3. 执行导入
products = create_sample_data()
batch_insert(products)
# 4. 验证数据
print("\n📊 当前索引统计:")
print(f" 商品总数: {es.count(index='products-catalog')['count']}")
2.查询操作
复制代码
from elasticsearch import Elasticsearch
import json
# 连接ES
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("", ""),
verify_certs=False
)
def print_result(title, result):
print(f"\n{'='*60}")
print(f"📌 {title}")
print('='*60)
if isinstance(result, dict):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
print("\n🔍 基础查询练习 ==========")
# 1.1 查询所有商品
result = es.search(
index="products-catalog",
body={
"query": {"match_all": {}},
"size": 5
}
)
print_result("1.1 查询所有商品 (前5条)", {
"total": result['hits']['total']['value'],
"samples": [hit['_source']['name'] for hit in result['hits']['hits']]
})
# 1.2 精确匹配查询
result = es.search(
index="products-catalog",
body={
"query": {
"term": {"brand.keyword": "Apple"}
}
}
)
print_result("1.2 精确匹配:Apple品牌商品", {
"count": result['hits']['total']['value'],
"products": [f"{hit['_source']['name']} - ¥{hit['_source']['price']}"
for hit in result['hits']['hits']]
})
# 1.3 全文搜索
result = es.search(
index="products-catalog",
body={
"query": {
"match": {
"description": "旗舰 性能"
}
}
}
)
print_result("1.3 全文搜索:描述中包含'旗舰'或'性能'", {
"count": result['hits']['total']['value'],
"results": [f"{hit['_source']['name']} (得分:{hit['_score']:.2f})"
for hit in result['hits']['hits'][:5]]
})
复制代码
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("", ""),
verify_certs=False
)
def print_result(title, result):
print(f"\n{'='*60}")
print(f"📌 {title}")
print('='*60)
if isinstance(result, dict):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
print("\n🔍 复合查询练习 ==========")
# 2.1 布尔查询:价格在3000-6000之间的Apple产品
result = es.search(
index="products-catalog",
body={
"query": {
"bool": {
"must": [
{"term": {"brand.keyword": "Apple"}}
],
"filter": [
{"range": {"price": {"gte": 3000, "lte": 6000}}}
]
}
}
}
)
print_result("2.1 布尔查询:Apple品牌 价格3000-6000", {
"count": result['hits']['total']['value'],
"products": [f"{hit['_source']['name']} - ¥{hit['_source']['price']}"
for hit in result['hits']['hits']]
})
# 2.2 多条件查询
result = es.search(
index="products-catalog",
body={
"query": {
"bool": {
"must": [
{"match": {"name": "Pro"}},
{"range": {"price": {"lte": 5000}}}
],
"should": [
{"term": {"brand.keyword": "Xiaomi"}},
{"term": {"brand.keyword": "Samsung"}}
],
"minimum_should_match": 1
}
}
}
)
print_result("2.2 多条件:名称含Pro,价格≤5000,优先小米/三星", {
"count": result['hits']['total']['value'],
"results": [f"{hit['_source']['brand']} {hit['_source']['name']} - ¥{hit['_source']['price']}"
for hit in result['hits']['hits']]
})
聚合查询
复制代码
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("xxx", "xxx"),
verify_certs=False
)
def print_result(title, result):
print(f"\n{'='*60}")
print(f"📌 {title}")
print('='*60)
if isinstance(result, dict):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
print("\n🔍 聚合分析练习 ==========")
# 3.1 按品牌统计商品数量
result = es.search(
index="products-catalog",
body={
"size": 0,
"aggs": {
"brand_count": {
"terms": {"field": "brand.keyword", "size": 10}
}
}
}
)
print_result("3.1 聚合:各品牌商品数量", {
brand['key']: brand['doc_count']
for brand in result['aggregations']['brand_count']['buckets']
})
# 3.2 价格统计分析
result = es.search(
index="products-catalog",
body={
"size": 0,
"aggs": {
"price_stats": {
"stats": {"field": "price"}
}
}
}
)
stats = result['aggregations']['price_stats']
print_result("3.2 价格统计", {
"最低价": f"¥{stats['min']:.2f}",
"最高价": f"¥{stats['max']:.2f}",
"平均价": f"¥{stats['avg']:.2f}",
"总数": stats['count']
})
# 3.3 多层聚合:按产品类型统计平均价格
result = es.search(
index="products-catalog",
body={
"size": 0,
"aggs": {
"by_type": {
"terms": {"field": "product_type.keyword"},
"aggs": {
"avg_price": {"avg": {"field": "price"}},
"max_price": {"max": {"field": "price"}},
"min_price": {"min": {"field": "price"}}
}
}
}
}
)
print_result("3.3 多层聚合:各类型价格统计", {
bucket['key']: {
"平均价": f"¥{bucket['avg_price']['value']:.2f}",
"最高价": f"¥{bucket['max_price']['value']:.2f}",
"最低价": f"¥{bucket['min_price']['value']:.2f}",
"数量": bucket['doc_count']
}
for bucket in result['aggregations']['by_type']['buckets']
})
更新和删除
复制代码
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("", ""),
verify_certs=False
)
def print_result(title, result):
print(f"\n{'='*60}")
print(f"📌 {title}")
print('='*60)
if isinstance(result, dict):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
print("\n🔍 更新和删除练习 ==========")
# 5.1 更新文档
def update_example():
"""更新示例"""
print("\n📝 5.1 更新文档示例")
# 先查看更新前的数据
result = es.search(
index="products-catalog",
body={"query": {"term": {"brand.keyword": "Apple"}}, "size": 1}
)
if result['hits']['hits']:
doc_id = result['hits']['hits'][0]['_id']
old_price = result['hits']['hits'][0]['_source']['price']
old_tags = result['hits']['hits'][0]['_source'].get('tag_list', [])
print(f"更新前 - ID:{doc_id}, 价格:{old_price}, 标签:{old_tags}")
# 更新价格和标签
es.update(
index="products-catalog",
id=doc_id,
body={"doc": {"price": 9999, "tag_list": ["热销", "旗舰", "限时优惠"]}}
)
# 查看更新后的数据
updated = es.get(index="products-catalog", id=doc_id)
new_price = updated['_source']['price']
new_tags = updated['_source']['tag_list']
print(f"更新后 - ID:{doc_id}, 价格:{new_price}, 标签:{new_tags}")
update_example()
# 5.2 删除文档示例(谨慎使用,默认注释掉)
def delete_example():
"""删除示例(谨慎使用)"""
print("\n🗑️ 5.2 删除文档示例(已注释,安全)")
# 取消下面的注释来实际执行删除
"""
# 删除价格低于500的商品
result = es.delete_by_query(
index="products-catalog",
body={"query": {"range": {"price": {"lt": 500}}}}
)
print(f"删除了 {result['deleted']} 个价格低于500的商品")
"""
print("(要实际执行删除,请取消代码注释)")
delete_example()
# 5.3 新增文档示例
def create_example():
"""新增文档示例"""
print("\n➕ 5.3 新增文档示例")
new_product = {
"id": "999",
"brand": "TestBrand",
"name": "测试商品-学习使用",
"price": 2999.0,
"price_sign": "¥",
"currency": "CNY",
"description": "这是一个用于学习ES的新增文档示例",
"rating": 4.5,
"category": "测试类别",
"product_type": "测试类型",
"tag_list": ["测试", "学习", "示例"],
"stock": 100,
"launch_date": "2024-01-01",
"is_available": True,
"sales_volume": 0
}
response = es.index(index="products-catalog", id="999", body=new_product)
print(f"新增文档结果: {response['result']}")
# 验证新增
check = es.get(index="products-catalog", id="999")
print(f"验证新增: {check['_source']['name']}")
create_example()
索引操作
复制代码
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(
"https://localhost:9201",
basic_auth=("elastic", "123456"),
verify_certs=False
)
def print_result(title, result):
print(f"\n{'='*60}")
print(f"📌 {title}")
print('='*60)
if isinstance(result, dict):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
print("\n🔍 索引管理练习 ==========")
# 6.1 查看所有索引
result = es.cat.indices(format="json")
print_result("6.1 所有索引列表", [
f"{idx['index']} (文档数:{idx['docs.count']}, 大小:{idx['store.size']})"
for idx in result
])
# 6.2 查看索引映射
result = es.indices.get_mapping(index="products-catalog")
print_result("6.2 products-catalog索引的映射结构",
result['products-catalog']['mappings']['properties'].keys()
)
# 6.3 查看索引设置
result = es.indices.get_settings(index="products-catalog")
settings = result['products-catalog']['settings']['index']
print_result("6.3 索引设置", {
"分片数": settings['number_of_shards'],
"副本数": settings['number_of_replicas'],
"创建时间": settings['creation_date']
})
# 6.4 统计信息
result = es.indices.stats(index="products-catalog")
stats = result['indices']['products-catalog']['total']
print_result("6.4 索引统计信息", {
"文档总数": stats['docs']['count'],
"已删除文档": stats['docs']['deleted'],
"存储大小": stats['store']['size_in_bytes'],
"分片数": len(stats['shards'])
})