【架构实战】搜索引擎架构：ElasticSearch集群设计

一、ElasticSearch简介

ElasticSearch是一个基于Lucene的分布式搜索和分析引擎：

核心特点：

全文搜索
分布式实时分析
高可用
RESTful API
近实时（NRT）索引

典型使用场景：

全文检索
日志分析（ELK）
应用性能监控
安全分析
商品搜索

二、集群架构设计

1. 集群角色

复制代码

┌──────────────────────────────────────────────────────┐
│                     Client Node                       │
│            （协调节点，路由请求）                      │
└──────────────────────────────────────────────────────┘
                        │
        ┌───────────────┼───────────────┐
        ▼               ▼               ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│  Master Node │ │  Master Node │ │  Master Node │
│  （主节点）   │ │  （候选节点） │ │  （候选节点） │
└──────────────┘ └──────────────┘ └──────────────┘
        │               │               │
        └───────────────┼───────────────┘
                        ▼
┌──────────────────────────────────────────────────────┐
│                  Data Node（数据节点）                 │
│   ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐        │
│   │Shard 0 │ │Shard 1 │ │Shard 2 │ │Shard 3 │        │
│   │(Primary)│ │(Primary)│ │(Replica)│ │(Replica)│       │
│   └────────┘ └────────┘ └────────┘ └────────┘        │
└──────────────────────────────────────────────────────┘

2. 分片设计

yaml 复制代码

# elasticsearch.yml
# 集群配置
cluster.name: order-search
node.name: node-1
node.master: true
node.data: true

# 最小主节点数（防止脑裂）
discovery.zen.minimum_master_nodes: 2

# 分片配置
index.number_of_shards: 3        # 主分片数
index.number_of_replicas: 1      # 副本分片数

# 内存配置
bootstrap.memory_lock: true

# 线程池配置
thread_pool:
  search:
    size: 20
    queue_size: 1000

3. Docker集群部署

yaml 复制代码

# docker-compose.yml
version: '3'
services:
  es-master-1:
    image: elasticsearch:8.11.0
    container_name: es-master-1
    environment:
      - node.name=master-1
      - cluster.name=order-search
      - node.master=true
      - node.data=false
      - discovery.seed_hosts=es-master-1,es-master-2,es-master-3
      - cluster.initial_master_nodes=master-1,master-2,master-3
    ports:
      - "9201:9200"
    volumes:
      - es-master-1-data:/usr/share/elasticsearch/data
    mem_limit: 1g

  es-master-2:
    image: elasticsearch:8.11.0
    container_name: es-master-2
    environment:
      - node.name=master-2
      - cluster.name=order-search
      - node.master=true
      - node.data=false
    volumes:
      - es-master-2-data:/usr/share/elasticsearch/data

  es-data-1:
    image: elasticsearch:8.11.0
    container_name: es-data-1
    environment:
      - node.name=data-1
      - cluster.name=order-search
      - node.master=false
      - node.data=true
    volumes:
      - es-data-1-data:/usr/share/elasticsearch/data

  es-data-2:
    image: elasticsearch:8.11.0
    container_name: es-data-2
    environment:
      - node.name=data-2
      - cluster.name=order-search
      - node.master=false
      - node.data=true
    volumes:
      - es-data-2-data:/usr/share/elasticsearch/data

volumes:
  es-master-1-data:
  es-master-2-data:
  es-data-1-data:
  es-data-2-data:

三、索引设计

1. Mapping设计

json 复制代码

PUT /products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "ik_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["standard", "lowercase"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "product_id": { "type": "long" },
      "product_name": { 
        "type": "text",
        "analyzer": "ik_analyzer",
        "fields": {
          "keyword": { "type": "keyword" }
        }
      },
      "description": { 
        "type": "text",
        "analyzer": "ik_analyzer"
      },
      "category_id": { "type": "long" },
      "category_path": { "type": "keyword" },
      "brand_id": { "type": "long" },
      "brand_name": { "type": "keyword" },
      "price": { "type": "double" },
      "stock": { "type": "integer" },
      "sales_count": { "type": "integer" },
      "tags": { "type": "keyword" },
      "specs": {
        "type": "nested",
        "properties": {
          "name": { "type": "keyword" },
          "value": { "type": "keyword" }
        }
      },
      "images": { "type": "keyword" },
      "create_time": { "type": "date" },
      "update_time": { "type": "date" }
    }
  }
}

2. 索引模板

json 复制代码

PUT /_index_template/product_template
{
  "index_patterns": ["products-*"],
  "template": {
    "settings": {
      "number_of_shards": 3,
      "number_of_replicas": 1,
      "refresh_interval": "5s"
    },
    "mappings": {
      "properties": {
        "product_id": { "type": "long" },
        "product_name": { "type": "text", "analyzer": "ik_max_word" },
        "price": { "type": "double" },
        "category_id": { "type": "long" }
      }
    }
  }
}

3. 别名管理

json 复制代码

# 创建索引并指定别名
PUT /products_2024_01
{
  "settings": { ... },
  "mappings": { ... }
}

POST /_aliases
{
  "actions": [
    { "add": { "index": "products_2024_01", "alias": "products" } }
  ]
}

# 切换别名（零停机）
POST /_aliases
{
  "actions": [
    { "remove": { "index": "products_2024_01", "alias": "products" } },
    { "add": { "index": "products_2024_02", "alias": "products" } }
  ]
}

四、Java客户端

1. High-Level REST Client

xml 复制代码

<dependency>
    <groupId>co.elastic.clients</groupId>
    <artifactId>elasticsearch-java</artifactId>
    <version>8.11.0</version>
</dependency>

java 复制代码

@Configuration
public class ElasticsearchConfig {
    
    @Bean
    public ElasticsearchClient client() {
        RestClient restClient = RestClient.builder(
            new HttpHost("localhost", 9200)
        ).build();
        
        return new ElasticsearchClient(
            new RestClientTransport(restClient, new JacksonJsonpMapper())
        );
    }
}

@Service
public class ProductSearchService {
    
    @Autowired
    private ElasticsearchClient client;
    
    // 搜索商品
    public SearchResult<Product> searchProducts(SearchRequest request) {
        try {
            SearchResponse<Product> response = client.search(s -> s
                .index("products")
                .query(q -> q
                    .bool(b -> {
                        // 关键词搜索
                        if (StringUtils.hasText(request.getKeyword())) {
                            b.must(m -> m
                                .multiMatch(mm -> mm
                                    .query(request.getKeyword())
                                    .fields("product_name^3", "description", "brand_name")
                                )
                            );
                        }
                        
                        // 分类过滤
                        if (request.getCategoryId() != null) {
                            b.filter(f -> f
                                .term(t -> t
                                    .field("category_id")
                                    .value(request.getCategoryId())
                                )
                            );
                        }
                        
                        // 价格区间
                        if (request.getMinPrice() != null || request.getMaxPrice() != null) {
                            b.filter(f -> f
                                .range(r -> {
                                    var range = r.field("price");
                                    if (request.getMinPrice() != null) {
                                        range.gte(request.getMinPrice());
                                    }
                                    if (request.getMaxPrice() != null) {
                                        range.lte(request.getMaxPrice());
                                    }
                                    return range;
                                })
                            );
                        }
                        
                        return b;
                    })
                )
                .from(request.getOffset())
                .size(request.getPageSize())
                .sort(so -> {
                    if ("price".equals(request.getSortField())) {
                        return so.field(f -> {
                            if ("desc".equals(request.getSortOrder())) {
                                return f.field("price").order(SortOrder.Desc);
                            }
                            return f.field("price").order(SortOrder.Asc);
                        });
                    }
                    return so.field(f -> f.field("_score").order(SortOrder.Desc));
                })
                .highlight(h -> h
                    .fields("product_name", f -> f
                        .preTags("<em>")
                        .postTags("</em>")
                    )
                ),
                Product.class
            );
            
            return SearchResult.<Product>builder()
                .total(response.hits().total().value())
                .items(response.hits().hits().stream()
                    .map(hit -> {
                        Product p = hit.source();
                        p.setHighlight(hit.highlight().get("product_name"));
                        return p;
                    })
                    .collect(Collectors.toList()))
                .build();
                
        } catch (IOException e) {
            throw new RuntimeException("搜索失败", e);
        }
    }
    
    // 索引文档
    public void indexProduct(Product product) {
        try {
            client.index(i -> i
                .index("products")
                .id(product.getProductId().toString())
                .document(product)
            );
        } catch (IOException e) {
            throw new RuntimeException("索引失败", e);
        }
    }
    
    // 批量索引
    public void bulkIndexProducts(List<Product> products) {
        try {
            BulkRequest.Builder br = new BulkRequest.Builder();
            
            for (Product p : products) {
                br.operations(op -> op
                    .index(idx -> idx
                        .index("products")
                        .id(p.getProductId().toString())
                        .document(p)
                    )
                );
            }
            
            client.bulk(br.build());
        } catch (IOException e) {
            throw new RuntimeException("批量索引失败", e);
        }
    }
}

2. 拼音搜索支持

json 复制代码

PUT /products
{
  "settings": {
    "analysis": {
      "analyzer": {
        "pinyin_analyzer": {
          "tokenizer": "pinyin_tokenizer",
          "filter": ["lowercase"]
        }
      },
      "tokenizer": {
        "pinyin_tokenizer": {
          "type": "pinyin",
          "lowercase": true,
          "keep_first_letter": true,
          "keep_full_pinyin": true,
          "keep_none_chinese": true
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "product_name": {
        "type": "text",
        "analyzer": "ik_max_word",
        "fields": {
          "pinyin": {
            "type": "text",
            "analyzer": "pinyin_analyzer"
          }
        }
      }
    }
  }
}

五、性能优化

1. 查询优化

java 复制代码

// 避免深度分页，使用Search After
public SearchResult<Product> searchAfter(SearchRequest request, String sortValue) {
    try {
        SearchResponse<Product> response = client.search(s -> {
            var builder = s.index("products")
                .query(q -> q.matchAll(m -> m))
                .size(20);
            
            if (sortValue != null) {
                builder.searchAfter(searchAfter(sortValue));
            }
            
            return builder;
        }, Product.class);
        
        // 返回最后一条排序值，用于下次查询
        return SearchResult.<Product>builder()
            .items(response.hits().hits())
            .lastSortValue(response.hits().hits().size() > 0 
                ? response.hits().hits().get(response.hits().hits().size()-1).sortValues()[0].toString()
                : null)
            .build();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

2. 写入优化

yaml 复制代码

# 写入优化配置
index:
  number_of_shards: 3
  number_of_replicas: 0  # 写入时设为0，完成后设为1
  
  # 刷新间隔（降低刷新频率提高写入性能）
  refresh_interval: 30s
  
  # 合并策略
  merge:
    scheduler.max_thread_count: 1

# 副本异步写入
bootstrap.memory_lock: true

3. 分片分配策略

json 复制代码

# 查看分片分配
GET /_cat/shards?v

# 手动移动分片
POST /_cluster/reroute
{
  "commands": [
    {
      "move": {
        "index": "products",
        "shard": 0,
        "from_node": "node-1",
        "to_node": "node-2"
      }
    }
  ]
}

# 设置副本数量
PUT /products/_settings
{
  "number_of_replicas": 1
}

六、监控与运维

1. 集群健康

bash 复制代码

# 查看集群健康状态
GET /_cluster/health

# 查看节点状态
GET /_cat/nodes?v

# 查看索引状态
GET /_cat/indices?v

2. 性能监控

json 复制代码

# 节点统计
GET /_nodes/stats

# 索引统计
GET /products/_stats

# 集群统计
GET /_cluster/stats

3. Curator管理

yaml 复制代码

# curator.yml
client:
  hosts:
    - localhost
  port: 9200

actions:
  1:
    action: delete_indices
    description: 删除30天前的索引
    filters:
      - filtertype: age
        source: creation_date
        direction: older
        unit: days
        unit_count: 30

七、总结

ElasticSearch是强大的分布式搜索引擎：

集群架构：主节点 + 数据节点 + 协调节点
索引设计：合理分片 + Mapping + 别名
查询优化：避免深度分页 + Search After
写入优化：降低副本 + 批量写入

最佳实践：

根据数据量合理设置分片数
做好冷热数据分离
定期维护（合并、清理）
完善的监控告警

个人观点，仅供参考