一、ElasticSearch简介
ElasticSearch是一个基于Lucene的分布式搜索和分析引擎:
核心特点:
- 全文搜索
- 分布式实时分析
- 高可用
- RESTful API
- 近实时(NRT)索引
典型使用场景:
- 全文检索
- 日志分析(ELK)
- 应用性能监控
- 安全分析
- 商品搜索
二、集群架构设计
1. 集群角色
┌──────────────────────────────────────────────────────┐
│ Client Node │
│ (协调节点,路由请求) │
└──────────────────────────────────────────────────────┘
│
┌───────────────┼───────────────┐
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Master Node │ │ Master Node │ │ Master Node │
│ (主节点) │ │ (候选节点) │ │ (候选节点) │
└──────────────┘ └──────────────┘ └──────────────┘
│ │ │
└───────────────┼───────────────┘
▼
┌──────────────────────────────────────────────────────┐
│ Data Node(数据节点) │
│ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ │
│ │Shard 0 │ │Shard 1 │ │Shard 2 │ │Shard 3 │ │
│ │(Primary)│ │(Primary)│ │(Replica)│ │(Replica)│ │
│ └────────┘ └────────┘ └────────┘ └────────┘ │
└──────────────────────────────────────────────────────┘
2. 分片设计
yaml
# elasticsearch.yml
# 集群配置
cluster.name: order-search
node.name: node-1
node.master: true
node.data: true
# 最小主节点数(防止脑裂)
discovery.zen.minimum_master_nodes: 2
# 分片配置
index.number_of_shards: 3 # 主分片数
index.number_of_replicas: 1 # 副本分片数
# 内存配置
bootstrap.memory_lock: true
# 线程池配置
thread_pool:
search:
size: 20
queue_size: 1000
3. Docker集群部署
yaml
# docker-compose.yml
version: '3'
services:
es-master-1:
image: elasticsearch:8.11.0
container_name: es-master-1
environment:
- node.name=master-1
- cluster.name=order-search
- node.master=true
- node.data=false
- discovery.seed_hosts=es-master-1,es-master-2,es-master-3
- cluster.initial_master_nodes=master-1,master-2,master-3
ports:
- "9201:9200"
volumes:
- es-master-1-data:/usr/share/elasticsearch/data
mem_limit: 1g
es-master-2:
image: elasticsearch:8.11.0
container_name: es-master-2
environment:
- node.name=master-2
- cluster.name=order-search
- node.master=true
- node.data=false
volumes:
- es-master-2-data:/usr/share/elasticsearch/data
es-data-1:
image: elasticsearch:8.11.0
container_name: es-data-1
environment:
- node.name=data-1
- cluster.name=order-search
- node.master=false
- node.data=true
volumes:
- es-data-1-data:/usr/share/elasticsearch/data
es-data-2:
image: elasticsearch:8.11.0
container_name: es-data-2
environment:
- node.name=data-2
- cluster.name=order-search
- node.master=false
- node.data=true
volumes:
- es-data-2-data:/usr/share/elasticsearch/data
volumes:
es-master-1-data:
es-master-2-data:
es-data-1-data:
es-data-2-data:
三、索引设计
1. Mapping设计
json
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"ik_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["standard", "lowercase"]
}
}
}
},
"mappings": {
"properties": {
"product_id": { "type": "long" },
"product_name": {
"type": "text",
"analyzer": "ik_analyzer",
"fields": {
"keyword": { "type": "keyword" }
}
},
"description": {
"type": "text",
"analyzer": "ik_analyzer"
},
"category_id": { "type": "long" },
"category_path": { "type": "keyword" },
"brand_id": { "type": "long" },
"brand_name": { "type": "keyword" },
"price": { "type": "double" },
"stock": { "type": "integer" },
"sales_count": { "type": "integer" },
"tags": { "type": "keyword" },
"specs": {
"type": "nested",
"properties": {
"name": { "type": "keyword" },
"value": { "type": "keyword" }
}
},
"images": { "type": "keyword" },
"create_time": { "type": "date" },
"update_time": { "type": "date" }
}
}
}
2. 索引模板
json
PUT /_index_template/product_template
{
"index_patterns": ["products-*"],
"template": {
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "5s"
},
"mappings": {
"properties": {
"product_id": { "type": "long" },
"product_name": { "type": "text", "analyzer": "ik_max_word" },
"price": { "type": "double" },
"category_id": { "type": "long" }
}
}
}
}
3. 别名管理
json
# 创建索引并指定别名
PUT /products_2024_01
{
"settings": { ... },
"mappings": { ... }
}
POST /_aliases
{
"actions": [
{ "add": { "index": "products_2024_01", "alias": "products" } }
]
}
# 切换别名(零停机)
POST /_aliases
{
"actions": [
{ "remove": { "index": "products_2024_01", "alias": "products" } },
{ "add": { "index": "products_2024_02", "alias": "products" } }
]
}
四、Java客户端
1. High-Level REST Client
xml
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.11.0</version>
</dependency>
java
@Configuration
public class ElasticsearchConfig {
@Bean
public ElasticsearchClient client() {
RestClient restClient = RestClient.builder(
new HttpHost("localhost", 9200)
).build();
return new ElasticsearchClient(
new RestClientTransport(restClient, new JacksonJsonpMapper())
);
}
}
@Service
public class ProductSearchService {
@Autowired
private ElasticsearchClient client;
// 搜索商品
public SearchResult<Product> searchProducts(SearchRequest request) {
try {
SearchResponse<Product> response = client.search(s -> s
.index("products")
.query(q -> q
.bool(b -> {
// 关键词搜索
if (StringUtils.hasText(request.getKeyword())) {
b.must(m -> m
.multiMatch(mm -> mm
.query(request.getKeyword())
.fields("product_name^3", "description", "brand_name")
)
);
}
// 分类过滤
if (request.getCategoryId() != null) {
b.filter(f -> f
.term(t -> t
.field("category_id")
.value(request.getCategoryId())
)
);
}
// 价格区间
if (request.getMinPrice() != null || request.getMaxPrice() != null) {
b.filter(f -> f
.range(r -> {
var range = r.field("price");
if (request.getMinPrice() != null) {
range.gte(request.getMinPrice());
}
if (request.getMaxPrice() != null) {
range.lte(request.getMaxPrice());
}
return range;
})
);
}
return b;
})
)
.from(request.getOffset())
.size(request.getPageSize())
.sort(so -> {
if ("price".equals(request.getSortField())) {
return so.field(f -> {
if ("desc".equals(request.getSortOrder())) {
return f.field("price").order(SortOrder.Desc);
}
return f.field("price").order(SortOrder.Asc);
});
}
return so.field(f -> f.field("_score").order(SortOrder.Desc));
})
.highlight(h -> h
.fields("product_name", f -> f
.preTags("<em>")
.postTags("</em>")
)
),
Product.class
);
return SearchResult.<Product>builder()
.total(response.hits().total().value())
.items(response.hits().hits().stream()
.map(hit -> {
Product p = hit.source();
p.setHighlight(hit.highlight().get("product_name"));
return p;
})
.collect(Collectors.toList()))
.build();
} catch (IOException e) {
throw new RuntimeException("搜索失败", e);
}
}
// 索引文档
public void indexProduct(Product product) {
try {
client.index(i -> i
.index("products")
.id(product.getProductId().toString())
.document(product)
);
} catch (IOException e) {
throw new RuntimeException("索引失败", e);
}
}
// 批量索引
public void bulkIndexProducts(List<Product> products) {
try {
BulkRequest.Builder br = new BulkRequest.Builder();
for (Product p : products) {
br.operations(op -> op
.index(idx -> idx
.index("products")
.id(p.getProductId().toString())
.document(p)
)
);
}
client.bulk(br.build());
} catch (IOException e) {
throw new RuntimeException("批量索引失败", e);
}
}
}
2. 拼音搜索支持
json
PUT /products
{
"settings": {
"analysis": {
"analyzer": {
"pinyin_analyzer": {
"tokenizer": "pinyin_tokenizer",
"filter": ["lowercase"]
}
},
"tokenizer": {
"pinyin_tokenizer": {
"type": "pinyin",
"lowercase": true,
"keep_first_letter": true,
"keep_full_pinyin": true,
"keep_none_chinese": true
}
}
}
},
"mappings": {
"properties": {
"product_name": {
"type": "text",
"analyzer": "ik_max_word",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyin_analyzer"
}
}
}
}
}
}
五、性能优化
1. 查询优化
java
// 避免深度分页,使用Search After
public SearchResult<Product> searchAfter(SearchRequest request, String sortValue) {
try {
SearchResponse<Product> response = client.search(s -> {
var builder = s.index("products")
.query(q -> q.matchAll(m -> m))
.size(20);
if (sortValue != null) {
builder.searchAfter(searchAfter(sortValue));
}
return builder;
}, Product.class);
// 返回最后一条排序值,用于下次查询
return SearchResult.<Product>builder()
.items(response.hits().hits())
.lastSortValue(response.hits().hits().size() > 0
? response.hits().hits().get(response.hits().hits().size()-1).sortValues()[0].toString()
: null)
.build();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
2. 写入优化
yaml
# 写入优化配置
index:
number_of_shards: 3
number_of_replicas: 0 # 写入时设为0,完成后设为1
# 刷新间隔(降低刷新频率提高写入性能)
refresh_interval: 30s
# 合并策略
merge:
scheduler.max_thread_count: 1
# 副本异步写入
bootstrap.memory_lock: true
3. 分片分配策略
json
# 查看分片分配
GET /_cat/shards?v
# 手动移动分片
POST /_cluster/reroute
{
"commands": [
{
"move": {
"index": "products",
"shard": 0,
"from_node": "node-1",
"to_node": "node-2"
}
}
]
}
# 设置副本数量
PUT /products/_settings
{
"number_of_replicas": 1
}
六、监控与运维
1. 集群健康
bash
# 查看集群健康状态
GET /_cluster/health
# 查看节点状态
GET /_cat/nodes?v
# 查看索引状态
GET /_cat/indices?v
2. 性能监控
json
# 节点统计
GET /_nodes/stats
# 索引统计
GET /products/_stats
# 集群统计
GET /_cluster/stats
3. Curator管理
yaml
# curator.yml
client:
hosts:
- localhost
port: 9200
actions:
1:
action: delete_indices
description: 删除30天前的索引
filters:
- filtertype: age
source: creation_date
direction: older
unit: days
unit_count: 30
七、总结
ElasticSearch是强大的分布式搜索引擎:
- 集群架构:主节点 + 数据节点 + 协调节点
- 索引设计:合理分片 + Mapping + 别名
- 查询优化:避免深度分页 + Search After
- 写入优化:降低副本 + 批量写入
最佳实践:
- 根据数据量合理设置分片数
- 做好冷热数据分离
- 定期维护(合并、清理)
- 完善的监控告警
个人观点,仅供参考