【数据库】Elasticsearch实战：从入门到精通

引言

Elasticsearch是一个基于Lucene构建的分布式、RESTful风格的搜索和数据分析引擎，能够实现近乎实时的搜索、数据分析和全文检索功能。作为ELK（Elasticsearch、Logstash、Kibana）技术栈的核心组件，Elasticsearch在大数据处理、日志分析、全文搜索等领域有着广泛的应用。本文将从入门到精通，详细讲解Elasticsearch的核心概念、索引管理、搜索查询、性能优化等关键知识点。

一、Elasticsearch概述

1.1 什么是Elasticsearch

Elasticsearch是一个开源的分布式搜索引擎，具备以下核心特性：

分布式架构：支持水平扩展，处理PB级数据
实时性：数据写入后秒级可查询
高可用性：支持数据副本和故障自动转移
RESTful API：提供完整的HTTP API，易于集成
全文检索：基于Lucene强大的全文检索能力
多租户：支持通过Index和Type实现数据隔离

1.2 核心概念解析

概念	关系型数据库对比	说明
Cluster	数据库集群	由多个节点组成，共同存储数据
Node	数据库实例	集群中的单个服务器实例
Index	数据库	存储文档的逻辑命名空间
Shard	分表	索引的水平分片
Replica	主从复制	分片的数据副本
Document	行记录	可被索引的基本信息单元
Field	列	文档中的字段

json 复制代码

// Document 示例 - 一部电影信息
{
  "title": "流浪地球",
  "director": "郭帆",
  "year": 2019,
  "genre": ["科幻", "冒险", "灾难"],
  "rating": 8.5,
  "votes": 125000,
  "duration": 125,
  "actors": ["吴京", "屈楚萧", "李光洁"],
  "description": "未来时代，太阳即将毁灭，人类在地球表面建造出巨大的推进器...",
  "release_date": "2019-02-05",
  "box_office": 4600000000
}

二、索引与映射管理

2.1 创建索引

bash 复制代码

# 创建索引 - 基础配置
PUT /movies
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "chinese_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["stop_words"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "title": {
        "type": "text",
        "analyzer": "chinese_analyzer",
        "fields": {
          "keyword": {
            "type": "keyword"
          }
        }
      },
      "director": {
        "type": "keyword"
      },
      "year": {
        "type": "integer"
      },
      "genre": {
        "type": "keyword"
      },
      "rating": {
        "type": "float"
      },
      "votes": {
        "type": "long"
      },
      "duration": {
        "type": "integer"
      },
      "actors": {
        "type": "keyword"
      },
      "description": {
        "type": "text",
        "analyzer": "chinese_analyzer"
      },
      "release_date": {
        "type": "date",
        "format": "yyyy-MM-dd"
      },
      "box_office": {
        "type": "long"
      }
    }
  }
}

2.2 动态映射与模板

json 复制代码

// 动态模板示例 - 统一处理字符串字段
PUT /logs
{
  "mappings": {
    "dynamic_templates": [
      {
        "string_fields": {
          "match_mapping_type": "string",
          "match": "*",
          "mapping": {
            "type": "keyword"
          }
        }
      },
      {
        "message_as_text": {
          "match_mapping_type": "string",
          "match": "message",
          "mapping": {
            "type": "text",
            "analyzer": "standard"
          }
        }
      },
      {
        "long_as_long": {
          "match_mapping_type": "long",
          "mapping": {
            "type": "long"
          }
        }
      }
    ]
  }
}

2.3 索引别名与零停机维护

python 复制代码

# Python操作Elasticsearch - 索引别名管理
from elasticsearch import Elasticsearch

es = Elasticsearch(["http://localhost:9200"])

# 创建新索引
new_index = "products_v2"
es.indices.create(index=new_index, body={
    "settings": {"number_of_shards": 2, "number_of_replicas": 1},
    "mappings": {
        "properties": {
            "name": {"type": "text"},
            "price": {"type": "float"},
            "category": {"type": "keyword"},
            "stock": {"type": "integer"}
        }
    }
})

# 给新索引添加别名
es.indices.put_alias(index=new_index, name="products")

# 原子切换别名
actions = {
    "actions": [
        {"remove": {"index": "products_v1", "alias": "products"}},
        {"add": {"index": "products_v2", "alias": "products"}}
    ]
}
es.indices.update_aliases(body=actions)

# Reindex操作
es.reindex(body={
    "source": {"index": "old_index"},
    "dest": {"index": "new_index"},
    "script": {
        "source": "ctx._source.price *= 0.9",
        "lang": "painless"
    }
}, request_timeout=300)

三、文档操作

3.1 CRUD操作

python 复制代码

# 文档的增删改查
from elasticsearch import Elasticsearch
from datetime import datetime

es = Elasticsearch(["http://localhost:9200"])

# 创建文档 - 指定ID
doc = {
    "title": "流浪地球2",
    "director": "郭帆",
    "year": 2023,
    "genre": ["科幻", "冒险"],
    "rating": 8.0,
    "votes": 95000,
    "duration": 173,
    "actors": ["吴京", "刘德华", "沙溢"],
    "release_date": "2023-01-22"
}

result = es.index(index="movies", id="movie_1001", body=doc)
print(f"Indexed: {result['_id']}")

# 创建文档 - 自动生成ID
result = es.index(index="movies", body=doc)
print(f"Indexed with auto ID: {result['_id']}")

# 获取文档
doc = es.get(index="movies", id="movie_1001")
print(f"Title: {doc['_source']['title']}")
print(f"Director: {doc['_source']['director']}")

# 更新文档 - 部分更新
es.update(index="movies", id="movie_1001", body={
    "doc": {
        "rating": 8.3,
        "votes": 100000
    }
})

# 更新文档 - 脚本更新
es.update(index="movies", id="movie_1001", body={
    "script": {
        "source": "ctx._source.rating = params.new_rating; ctx._source.votes += params.delta",
        "lang": "painless",
        "params": {
            "new_rating": 8.5,
            "delta": 5000
        }
    }
})

# 删除文档
es.delete(index="movies", id="movie_1001")

# 批量操作
from elasticsearch.helpers import bulk

actions = [
    {
        "_index": "movies",
        "_id": "movie_1002",
        "_source": {
            "title": "满江红",
            "director": "张艺谋",
            "year": 2023,
            "genre": ["悬疑", "喜剧"],
            "rating": 7.2,
            "votes": 55000
        }
    },
    {
        "_index": "movies",
        "_id": "movie_1003",
        "_source": {
            "title": "狂飙",
            "director": "徐纪周",
            "year": 2023,
            "genre": ["犯罪", "剧情"],
            "rating": 8.5,
            "votes": 120000
        }
    }
]

success, failed = bulk(es, actions)
print(f"Success: {success}, Failed: {len(failed)}")

3.2 批量处理与Bulk API

python 复制代码

# Bulk API 批量处理
import json

# 构建bulk请求体
bulk_body = []

# 批量索引
operations = [
    {"index": {"_index": "products", "_id": "prod_001"}},
    {"name": "iPhone 15", "price": 7999.0, "category": "手机", "stock": 100},
    {"index": {"_index": "products", "_id": "prod_002"}},
    {"name": "MacBook Pro", "price": 19999.0, "category": "电脑", "stock": 50},
    {"index": {"_index": "products", "_id": "prod_003"}},
    {"name": "AirPods Pro", "price": 1899.0, "category": "耳机", "stock": 200},
]

# 批量删除
operations.extend([
    {"delete": {"_index": "products", "_id": "prod_old_001"}},
    {"delete": {"_index": "products", "_id": "prod_old_002"}}
])

# 执行bulk操作
response = es.bulk(body=operations, refresh=True)

if response.get("errors"):
    for item in response["items"]:
        if "error" in item.get("index", {}):
            print(f"Error: {item['index']['error']}")
else:
    print(f"Bulk operation completed successfully")

四、搜索查询详解

4.1 全文检索

python 复制代码

# 全文搜索查询
query = {
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "title": {
                            "query": "地球 科幻",
                            "operator": "or",
                            "minimum_should_match": "50%"
                        }
                    }
                }
            ],
            "should": [
                {
                    "match": {
                        "description": {
                            "query": "太空 人类",
                            "boost": 1.5
                        }
                    }
                }
            ],
            "filter": [
                {"range": {"year": {"gte": 2010}}}
            ]
        }
    },
    "highlight": {
        "fields": {
            "title": {},
            "description": {"fragment_size": 150}
        },
        "pre_tags": ["<em>"],
        "post_tags": ["</em>"]
    },
    "sort": [
        {"_score": "desc"},
        {"year": "desc"},
        {"rating": "desc"}
    ],
    "from": 0,
    "size": 20
}

result = es.search(index="movies", body=query)

print(f"Total hits: {result['hits']['total']['value']}")
for hit in result['hits']['hits']:
    print(f"\nTitle: {hit['_source']['title']}")
    print(f"Score: {hit['_score']}")
    if 'highlight' in hit:
        print(f"Highlight: {hit['highlight']}")

4.2 高级查询

python 复制代码

# 多条件组合查询
query = {
    "query": {
        "bool": {
            "must": [
                {
                    "bool": {
                        "should": [
                            {"term": {"genre": "科幻"}},
                            {"term": {"genre": "冒险"}}
                        ],
                        "minimum_should_match": 1
                    }
                }
            ],
            "must_not": [
                {"term": {"director": "张艺谋"}}
            ],
            "filter": [
                {"term": {"year": 2023}},
                {"range": {"rating": {"gte": 7.0}}},
                {"range": {"votes": {"gte": 10000}}}
            ]
        }
    },
    "aggs": {
        "by_genre": {
            "terms": {"field": "genre", "size": 10},
            "aggs": {
                "avg_rating": {"avg": {"field": "rating"}}
            }
        },
        "rating_stats": {
            "stats": {"field": "rating"}
        },
        "year_histogram": {
            "histogram": {
                "field": "year",
                "interval": 5
            }
        }
    }
}

result = es.search(index="movies", body=query)

# 输出聚合结果
print("\n=== Aggregation Results ===")
for genre in result["aggregations"]["by_genre"]["buckets"]:
    print(f"{genre['key']}: {genre['doc_count']} movies, avg rating: {genre['avg_rating']['value']:.2f}")

print(f"\nRating stats: {result['aggregations']['rating_stats']}")

4.3 模糊搜索与纠错

python 复制代码

# 模糊搜索和建议
query = {
    "query": {
        "bool": {
            "should": [
                {
                    "match": {
                        "title": {
                            "query": "流浪地球",
                            "fuzziness": "AUTO",
                            "prefix_length": 1
                        }
                    }
                },
                {
                    "match_phrase": {
                        "title": {
                            "query": "流浪地球",
                            "slop": 1
                        }
                    }
                }
            ]
        }
    },
    "suggest": {
        "title_suggest": {
            "text": "流浪地球",
            "term": {
                "field": "title",
                "suggest_mode": "popular",
                "max_edits": 2,
                "prefix_length": 1
            }
        },
        "phrase_suggest": {
            "text": "流浪地球",
            "phrase": {
                "field": "title",
                "size": 3,
                "gram_size": 3,
                "direct_generator": [{
                    "field": "title",
                    "suggest_mode": "popular"
                }]
            }
        }
    }
}

result = es.search(index="movies", body=query)

# 处理建议结果
print("\n=== Suggestions ===")
for suggestion in result.get("suggest", {}).get("title_suggest", []):
    print(f"Original: {suggestion['text']}")
    for option in suggestion.get("options", []):
        print(f"  -> {option['text']} (score: {option['score']})")

五、聚合分析

5.1 聚合查询

python 复制代码

# 聚合分析示例 - 电影数据分析
query = {
    "size": 0,
    "aggs": {
        # 按类型分组统计
        "genres_overview": {
            "terms": {
                "field": "genre",
                "size": 15,
                "order": {"_count": "desc"}
            }
        },
        
        # 评分统计
        "rating_statistics": {
            "stats": {"field": "rating"}
        },
        
        # 评分分布
        "rating_distribution": {
            "histogram": {
                "field": "rating",
                "interval": 0.5,
                "min_doc_count": 1
            }
        },
        
        # 票房分析
        "box_office_stats": {
            "percentiles": {
                "field": "box_office",
                "percents": [25, 50, 75, 90, 99]
            }
        },
        
        # 导演作品统计
        "top_directors": {
            "terms": {
                "field": "director",
                "size": 10,
                "order": {"total_box_office": "desc"}
            },
            "aggs": {
                "total_box_office": {
                    "sum": {"field": "box_office"}
                },
                "avg_rating": {
                    "avg": {"field": "rating"}
                }
            }
        },
        
        # 年份趋势
        "yearly_trend": {
            "date_histogram": {
                "field": "release_date",
                "calendar_interval": "year",
                "format": "yyyy"
            },
            "aggs": {
                "avg_rating": {"avg": {"field": "rating"}},
                "total_votes": {"sum": {"field": "votes"}}
            }
        }
    }
}

result = es.search(index="movies", body=query)

print("=== Movie Analytics ===\n")
print(f"Total movies analyzed\n")

print("--- Rating Statistics ---")
stats = result["aggregations"]["rating_statistics"]
print(f"Min: {stats['min']:.1f}, Max: {stats['max']:.1f}")
print(f"Avg: {stats['avg']:.2f}, Total: {stats['sum']:.1f}")

print("\n--- Top 10 Directors ---")
for director in result["aggregations"]["top_directors"]["buckets"]:
    print(f"{director['key']}: {director['doc_count']} movies, "
          f"total box office: {director['total_box_office']['value']/100000000:.2f}B, "
          f"avg rating: {director['avg_rating']['value']:.2f}")

5.2 Pipeline聚合

python 复制代码

# Pipeline聚合 - 计算导演平均评分排名
query = {
    "size": 0,
    "aggs": {
        "directors": {
            "terms": {
                "field": "director",
                "size": 20,
                "order": {"avg_rating": "desc"}
            },
            "aggs": {
                "avg_rating": {
                    "avg": {"field": "rating"}
                },
                "min_rating": {
                    "min": {"field": "rating"}
                },
                "max_rating": {
                    "max": {"field": "rating"}
                }
            }
        },
        # Pipeline: 计算平均评分的百分位排名
        "rating_percentiles": {
            "percentiles_bucket": {
                "buckets_path": "directors>avg_rating",
                "percents": [25, 50, 75, 90]
            }
        },
        # Pipeline: 过滤高于平均的导演
        "directors_filtered": {
            "filter": {
                "range": {
                    "avg_rating": {"gte": 7.5}
                }
            },
            "aggs": {
                "top_directors": {
                    "terms": {
                        "field": "director",
                        "size": 10
                    }
                }
            }
        }
    }
}

六、性能优化与最佳实践

6.1 索引性能优化

python 复制代码

# 索引性能优化配置
settings = {
    "settings": {
        "number_of_shards": 5,
        "number_of_replicas": 1,
        "refresh_interval": "30s",  # 降低刷新频率，提升写入性能
        "translog": {
            "sync_interval": "10s",
            "durability": "async"  # 异步刷盘，提升性能
        },
        "indexing": {
            "slowlog": {
                "threshold": {
                    "index": "2s"
                }
            }
        },
        "refresh": {
            "blocks": {
                "write": False,
                "metadata": False,
                "read": False
            }
        }
    }
}

# 使用Bulk API批量写入
from elasticsearch.helpers import parallel_bulk
from concurrent.futures import ThreadPoolExecutor

def generate_movies():
    """模拟生成电影数据"""
    import random
    directors = ["张艺谋", "陈凯歌", "冯小刚", "徐克", "周星驰"]
    genres = ["动作", "喜剧", "爱情", "科幻", "悬疑", "动画"]
    
    for i in range(10000):
        yield {
            "_index": "movies",
            "_source": {
                "title": f"电影_{i}",
                "director": random.choice(directors),
                "year": random.randint(2000, 2024),
                "genre": random.sample(genres, k=random.randint(1, 3)),
                "rating": round(random.uniform(5.0, 9.5), 1),
                "votes": random.randint(1000, 1000000),
                "duration": random.randint(80, 180)
            }
        }

# 并行bulk写入
with ThreadPoolExecutor(max_workers=4) as executor:
    futures = []
    for ok, result in parallel_bulk(es, generate_movies(), chunk_size=5000, raise_on_error=False):
        futures.append(ok)
    
    success_count = sum(1 for ok in futures if ok)
    print(f"Successfully indexed {success_count}/{len(futures)} documents")

6.2 查询性能优化

python 复制代码

# 查询性能优化技巧

# 1. 只返回需要的字段
query = {
    "_source": ["title", "year", "rating", "director"],
    "query": {"match_all": {}}
}

# 2. 使用filter缓存
query = {
    "query": {
        "bool": {
            "filter": [  # filter不计算评分，会被缓存
                {"term": {"year": 2023}},
                {"range": {"rating": {"gte": 7.0}}}
            ],
            "must": [  # must参与评分
                {"match": {"title": "科幻"}}
            ]
        }
    }
}

# 3. 使用search_after深度分页
query = {
    "query": {"match_all": {}},
    "sort": [
        {"year": "desc"},
        {"rating": "desc"},
        {"_id": "asc"}
    ],
    "size": 20
}

# 初始查询
result = es.search(index="movies", body=query)
if len(result["hits"]["hits"]) == 20:
    last_sort_values = result["hits"]["hits"][-1]["sort"]
    
    # 使用search_after获取下一页
    query["search_after"] = last_sort_values
    next_page = es.search(index="movies", body=query)

# 4. 使用聚合采样
query = {
    "size": 0,
    "aggs": {
        "sample": {
            "sample": {
                "shard_size": 10000,
                "max_docs_per_value": 100
            },
            "aggs": {
                "genre_distribution": {
                    "terms": {"field": "genre", "size": 20}
                }
            }
        }
    }
}

6.3 集群健康与监控

python 复制代码

# 集群健康检查与监控
from elasticsearch import Elasticsearch
import time

def check_cluster_health():
    health = es.cluster.health()
    print(f"Cluster: {health['cluster_name']}")
    print(f"Status: {health['status']}")  # green, yellow, red
    print(f"Number of nodes: {health['number_of_nodes']}")
    print(f"Active shards: {health['active_shards']}")
    print(f"Relocating shards: {health['relocating_shards']}")
    print(f"Initializing shards: {health['initializing_shards']}")
    print(f"Unassigned shards: {health['unassigned_shards']}")
    
    return health['status'] == 'green'

def monitor_index_stats(index_name):
    """监控索引统计信息"""
    stats = es.indices.stats(index=index_name)
    
    index_stats = stats['indices'][index_name]
    
    print(f"\n=== Index: {index_name} ===")
    
    # 存储统计
    store = index_stats['total']['store']
    print(f"Size: {store['size_in_bytes'] / 1024 / 1024:.2f} MB")
    print(f"Size in bytes: {store['size_in_bytes']}")
    
    # 文档统计
    docs = index_stats['total']['docs']
    print(f"Documents: {docs['count']}")
    print(f"Deleted documents: {docs['deleted']}")
    
    # 分片信息
    primaries = index_stats['primaries']
    print(f"\nPrimary shards: {primaries['segments']['count']}")
    print(f"Memory used by segments: {primaries['segments']['memory_in_bytes'] / 1024 / 1024:.2f} MB")
    
    # 索引和搜索统计
    indexing = primaries['indexing']
    print(f"\nIndexing:")
    print(f"  Total docs: {indexing['index_total']}")
    print(f"  Index time: {indexing['index_time_in_millis']}ms")
    print(f"  Throttle time: {indexing['index_throttle_time_in_millis']}ms")
    
    search = primaries['search']
    print(f"\nSearch:")
    print(f"  Total queries: {search['query_total']}")
    print(f"  Query time: {search['query_time_in_millis']}ms")
    print(f"  Avg query time: {search['query_time_in_millis'] / max(search['query_total'], 1):.2f}ms")

# 持续监控
while True:
    if check_cluster_health():
        print("Cluster health is GREEN")
    else:
        print("WARNING: Cluster health is not GREEN!")
    monitor_index_stats("movies")
    time.sleep(60)

七、实战案例：电商搜索系统

7.1 需求分析与设计

python 复制代码

# 电商搜索系统 - 商品索引设计
PUT /products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "product_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "asciifolding", "product_synonym"]
        },
        "pinyin_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": ["lowercase", "pinyin_filter"]
        }
      },
      "filter": {
        "product_synonym": {
          "type": "synonym",
          "synonyms": [
            "手机,手机终端,移动电话",
            "电脑,计算机,笔记本,台式机",
            "相机,摄像机,单反"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "product_id": {"type": "keyword"},
      "name": {
        "type": "text",
        "analyzer": "product_analyzer",
        "fields": {
          "pinyin": {"type": "text", "analyzer": "pinyin_analyzer"},
          "keyword": {"type": "keyword"}
        }
      },
      "category": {"type": "keyword"},
      "category_path": {"type": "keyword"},
      "brand": {"type": "keyword"},
      "price": {"type": "float"},
      "original_price": {"type": "float"},
      "stock": {"type": "integer"},
      "sales": {"type": "long"},
      "rating": {"type": "float"},
      "review_count": {"type": "integer"},
      "tags": {"type": "keyword"},
      "attributes": {
        "type": "nested",
        "properties": {
          "name": {"type": "keyword"},
          "value": {"type": "keyword"}
        }
      },
      "description": {"type": "text", "analyzer": "product_analyzer"},
      "images": {"type": "keyword"},
      "is_active": {"type": "boolean"},
      "created_at": {"type": "date"},
      "updated_at": {"type": "date"}
    }
  }
}

7.2 搜索功能实现

python 复制代码

# 电商搜索 - 搜索服务实现
class ProductSearchService:
    
    def __init__(self, es_client):
        self.es = es_client
        self.index = "products"
    
    def search(self, query_params):
        """
        综合搜索接口
        query_params: {
            'keyword': str,
            'category': str,
            'brand': List[str],
            'price_range': [min, max],
            'rating_min': float,
            'sort': str,  # 'relevance', 'price_asc', 'price_desc', 'sales', 'rating'
            'page': int,
            'size': int
        }
        """
        must_clauses = []
        filter_clauses = [{"term": {"is_active": True}}]
        
        # 关键词搜索
        if query_params.get('keyword'):
            keyword = query_params['keyword']
            must_clauses.append({
                "bool": {
                    "should": [
                        {
                            "match": {
                                "name": {
                                    "query": keyword,
                                    "boost": 3,
                                    "fuzziness": "AUTO"
                                }
                            }
                        },
                        {
                            "match": {
                                "name.pinyin": {
                                    "query": keyword,
                                    "boost": 1
                                }
                            }
                        },
                        {
                            "match": {
                                "description": {
                                    "query": keyword,
                                    "boost": 1
                                }
                            }
                        }
                    ],
                    "minimum_should_match": 1
                }
            })
        
        # 类目筛选
        if query_params.get('category'):
            filter_clauses.append({
                "term": {"category": query_params['category']}
            })
        
        # 品牌筛选
        if query_params.get('brands'):
            filter_clauses.append({
                "terms": {"brand": query_params['brands']}
            })
        
        # 价格区间
        if query_params.get('price_range'):
            price_range = query_params['price_range']
            filter_clauses.append({
                "range": {
                    "price": {
                        "gte": price_range[0],
                        "lte": price_range[1]
                    }
                }
            })
        
        # 评分筛选
        if query_params.get('rating_min'):
            filter_clauses.append({
                "range": {"rating": {"gte": query_params['rating_min']}}
            })
        
        # 排序
        sort_config = self._get_sort_config(query_params.get('sort', 'relevance'))
        
        # 构建查询
        query = {
            "bool": {
                "must": must_clauses if must_clauses else [{"match_all": {}}],
                "filter": filter_clauses
            }
        }
        
        # 聚合facets
        aggs = {
            "categories": {
                "terms": {"field": "category", "size": 50}
            },
            "brands": {
                "terms": {"field": "brand", "size": 100}
            },
            "price_stats": {
                "stats": {"field": "price"}
            },
            "price_ranges": {
                "range": {
                    "field": "price",
                    "ranges": [
                        {"to": 100},
                        {"from": 100, "to": 500},
                        {"from": 500, "to": 1000},
                        {"from": 1000, "to": 3000},
                        {"from": 3000}
                    ]
                }
            }
        }
        
        # 执行搜索
        body = {
            "query": query,
            "sort": sort_config,
            "from": (query_params.get('page', 1) - 1) * query_params.get('size', 20),
            "size": query_params.get('size', 20),
            "aggs": aggs,
            "highlight": {
                "fields": {
                    "name": {},
                    "description": {"fragment_size": 100}
                }
            }
        }
        
        return self.es.search(index=self.index, body=body)
    
    def _get_sort_config(self, sort_type):
        sort_mapping = {
            'relevance': [{"_score": "desc"}],
            'price_asc': [{"price": "asc"}],
            'price_desc': [{"price": "desc"}],
            'sales': [{"sales": "desc"}],
            'rating': [{"rating": "desc"}],
            'newest': [{"created_at": "desc"}]
        }
        return sort_mapping.get(sort_type, [{"_score": "desc"}])

# 使用示例
service = ProductSearchService(es)

result = service.search({
    'keyword': 'iPhone 手机',
    'category': '手机',
    'brand': ['Apple', '华为'],
    'price_range': [3000, 10000],
    'rating_min': 4.0,
    'sort': 'sales',
    'page': 1,
    'size': 20
})

print(f"Total hits: {result['hits']['total']['value']}")
print(f"\nFacets:")
print(f"Categories: {[(b['key'], b['doc_count']) for b in result['aggregations']['categories']['buckets']]}")
print(f"Price range distribution: {result['aggregations']['price_ranges']['buckets']}")

八、总结

Elasticsearch作为业界领先的搜索和数据分析引擎，提供了强大而灵活的能力。本文从基础概念出发，详细介绍了索引管理、文档操作、搜索查询、聚合分析和性能优化等核心知识点。

在实际应用中，需要注意以下几点：

合理的索引设计：根据业务需求设计合适的mapping和分片策略
查询优化：善用filter缓存、只返回必要字段、避免深度分页
监控与调优：持续监控集群健康和性能指标
数据安全：做好备份和权限控制

希望本文能够帮助读者全面掌握Elasticsearch，在实际项目中发挥其强大威力。