Java与Elasticsearch集成详解
1. 环境配置
首先,你需要在Maven项目中添加Elasticsearch Java客户端依赖:
xml
<dependencies>
<!-- Elasticsearch Java Client -->
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.10.0</version>
</dependency>
<!-- JSON 处理 -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version>
</dependency>
<!-- Jakarta JSON 处理 -->
<dependency>
<groupId>jakarta.json</groupId>
<artifactId>jakarta.json-api</artifactId>
<version>2.1.1</version>
</dependency>
</dependencies>
2. 客户端初始化
Elasticsearch Java客户端有三层结构:
- 低级客户端:处理HTTP请求
- 传输层:处理序列化/反序列化
- API客户端:提供类型安全的API
java
// 创建低级客户端
RestClient restClient = RestClient.builder(
new HttpHost("localhost", 9200)).build();
// 创建传输层
ElasticsearchTransport transport = new RestClientTransport(
restClient, new JacksonJsonpMapper());
// 创建API客户端
ElasticsearchClient client = new ElasticsearchClient(transport);
3. 基础操作
创建索引
java
// 创建索引并设置映射
CreateIndexRequest requestWithMapping = new CreateIndexRequest.Builder()
.index("products")
.mappings(m -> m
.properties("name", p -> p.text(t -> t
.analyzer("ik_max_word")
.searchAnalyzer("ik_smart")))
.properties("price", p -> p.float_())
.properties("description", p -> p.text(t -> t
.analyzer("ik_max_word")))
.properties("createTime", p -> p.date())
)
.build();
client.indices().create(requestWithMapping);
添加文档
java
// 定义产品实体类
public class Product {
private String id;
private String name;
private float price;
private String description;
private Date createTime;
// getters and setters
}
// 添加文档
Product product = new Product();
product.setId("1");
product.setName("iPhone 13");
product.setPrice(5999.00f);
product.setDescription("Apple iPhone 13 128GB");
product.setCreateTime(new Date());
IndexRequest<Product> indexRequest = new IndexRequest.Builder<Product>()
.index("products")
.id(product.getId())
.document(product)
.build();
IndexResponse response = client.index(indexRequest);
查询文档
java
// 根据ID查询
GetRequest getRequest = new GetRequest.Builder()
.index("products")
.id("1")
.build();
GetResponse<Product> getResponse = client.get(getRequest, Product.class);
// 条件查询
SearchRequest searchRequest = new SearchRequest.Builder()
.index("products")
.query(q -> q
.match(m -> m
.field("name")
.query("iPhone")
)
)
.build();
SearchResponse<Product> searchResponse = client.search(searchRequest, Product.class);
4. 高级搜索
布尔查询
java
SearchRequest boolRequest = new SearchRequest.Builder()
.index("products")
.query(q -> q
.bool(b -> b
.must(m -> m
.match(t -> t
.field("name")
.query("iPhone")
)
)
.filter(f -> f
.range(r -> r
.field("price")
.gte(JsonData.of(5000))
.lte(JsonData.of(10000))
)
)
.should(s -> s
.match(t -> t
.field("description")
.query("Pro")
)
)
.minimumShouldMatch("1")
)
)
.build();
聚合查询
java
SearchRequest aggregationRequest = new SearchRequest.Builder()
.index("products")
.size(0) // 不返回文档,只返回聚合结果
.aggregations("price_stats", a -> a
.stats(s -> s
.field("price")
)
)
.build();
高亮搜索结果
java
SearchRequest highlightRequest = new SearchRequest.Builder()
.index("products")
.query(q -> q
.match(m -> m
.field("description")
.query("iPhone")
)
)
.highlight(h -> h
.fields("description", f -> f
.preTags("<em>")
.postTags("</em>")
.numberOfFragments(3)
)
)
.build();
5. 批量操作
批量操作可以大幅提高性能,减少网络请求次数:
java
// 准备批量数据
List<Product> products = new ArrayList<>();
for (int i = 1; i <= 100; i++) {
Product product = new Product();
product.setId(String.valueOf(i));
product.setName("Product " + i);
product.setPrice(1000 + i * 10.0f);
product.setDescription("Description for product " + i);
product.setCreateTime(new Date());
products.add(product);
}
// 批量索引
BulkRequest.Builder br = new BulkRequest.Builder();
for (Product product : products) {
br.operations(op -> op
.index(idx -> idx
.index("products")
.id(product.getId())
.document(product)
)
);
}
BulkRequest request = br.build();
BulkResponse response = client.bulk(request);
6. 最佳实践
客户端配置优化
java
RestClientBuilder builder = RestClient.builder(
new HttpHost("localhost", 9200))
.setRequestConfigCallback(requestConfigBuilder -> {
// 设置连接超时时间(默认1秒)
requestConfigBuilder.setConnectTimeout(5000);
// 设置套接字超时时间(默认30秒)
requestConfigBuilder.setSocketTimeout(60000);
return requestConfigBuilder;
})
.setHttpClientConfigCallback(httpClientBuilder -> {
// 设置最大连接数
httpClientBuilder.setMaxConnPerRoute(50);
httpClientBuilder.setMaxConnTotal(200);
return httpClientBuilder;
});
异常处理
java
try {
SearchRequest searchRequest = new SearchRequest.Builder()
.index("non_existent_index")
.query(q -> q.matchAll(m -> m))
.build();
SearchResponse<Product> response = client.search(searchRequest, Product.class);
} catch (ElasticsearchException e) {
if (e.status() == 404) {
System.err.println("索引不存在: " + e.getMessage());
} else if (e.status() == 400) {
System.err.println("请求错误: " + e.getMessage());
}
} catch (IOException e) {
System.err.println("IO 异常: " + e.getMessage());
}
封装服务类
java
@Service
public class ElasticsearchService<T> {
private final ElasticsearchClient client;
private final Class<T> entityClass;
private final String indexName;
// 构造函数、保存、查询、删除等方法...
// 条件查询
public List<T> search(Query query, int from, int size) throws IOException {
SearchRequest request = new SearchRequest.Builder()
.index(indexName)
.query(query)
.from(from)
.size(size)
.build();
SearchResponse<T> response = client.search(request, entityClass);
return response.hits().hits().stream()
.map(Hit::source)
.collect(Collectors.toList());
}
}
7. Spring Data Elasticsearch
如果你使用Spring Boot,可以通过Spring Data Elasticsearch更简单地集成:
xml
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
实体类定义:
java
@Document(indexName = "products")
public class Product {
@Id
private String id;
@Field(type = FieldType.Text, analyzer = "ik_max_word")
private String name;
@Field(type = FieldType.Float)
private float price;
// 其他字段...
}
Repository接口:
java
public interface ProductRepository extends ElasticsearchRepository<Product, String> {
// 根据名称查询
List<Product> findByName(String name);
// 根据价格范围查询
List<Product> findByPriceBetween(float minPrice, float maxPrice);
// 自定义查询
@Query("{\"bool\": {\"must\": [{\"match\": {\"name\": \"?0\"}}]}}")
Page<Product> findByNameCustomQuery(String name, Pageable pageable);
}
总结
本指南详细介绍了Java与Elasticsearch的集成方法,包括:
- 环境配置:添加依赖和初始化客户端
- 基础操作:索引创建、文档的增删改查
- 高级搜索:布尔查询、聚合、高亮等
- 批量操作:提高性能的批量处理
- 最佳实践:客户端优化、异常处理、服务封装
- Spring集成:使用Spring Data简化操作
通过这些内容,你应该能够在Java应用中有效地使用Elasticsearch,实现高性能的搜索和数据分析功能。
如果你有特定的使用场景或问题,可以参考官方文档或社区资源获取更多信息。