java
import com.xinren.cdc.XrDataCdcApplication;
import com.xinren.cdc.dao.domian.es.DenseVectorData;
import com.xinren.cdc.dao.index.DenseVectorIndex;
import org.dromara.easyes.core.conditions.select.LambdaEsQueryWrapper;
import org.dromara.easyes.core.conditions.update.LambdaEsUpdateWrapper;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScriptScoreQueryBuilder;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import javax.annotation.Resource;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@SpringBootTest(classes = XrDataCdcApplication.class)
class XrDataCdcApplicationTests {
@Resource
private DenseVectorIndex denseVectorIndex;
@Test
void createIndex() {
denseVectorIndex.createIndex();
}
@Test
void addIndex(){
DenseVectorData data = new DenseVectorData();
data.setId("1");
data.setText("大家好,才是真的好");
double[] doubles = new double[]{0.1,0.2,0.3,0.4,0.5};
data.setVectors(doubles);
denseVectorIndex.insert(data);
}
@Test
void queryIndex(){
double[] doubles = new double[]{0.1,0.2,0.3,0.4,0.5};
Map<String, Object> params = Collections.singletonMap("query_vector", doubles);
LambdaEsQueryWrapper<DenseVectorData> wrapper = new LambdaEsQueryWrapper<>();
// 余弦相似
Script scriptYx = new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG,
"cosineSimilarity(params.query_vector, 'vectors') + 1.0", params);
// 点积距离
Script scriptDj = new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG,
"dotProduct(params.query_vector, doc['vectors']) + 1",params);
// 曼哈顿距离:l1norm
Script scriptL1norm = new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG,
"1 / (1 + l1norm(params.query_vector, doc['vectors']))",params);
// 欧几里得距离:l2norm
Script scriptL2norm = new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG,
"1 / (1 + l2norm(params.query_vector, doc['vectors']))",params);
// 曼哈顿距离的Painless自定义脚本(需要手动编写计算逻辑)
String scriptSource = "double sum = 0;"
+ "for (int i = 0; i < params.query_vector.length; i++) {"
+ " sum += Math.abs((doc['vectors'].vectorValue)[i] - params.query_vector[i]);"
+ "}"
+ "return sum;";
// 创建Painless脚本
Script scriptMhd = new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG, scriptSource, params);
wrapper.setSearchSourceBuilder(new SearchSourceBuilder().query(new ScriptScoreQueryBuilder(new MatchAllQueryBuilder(),scriptMhd)));
List<DenseVectorData> denseVectorData = denseVectorIndex.selectList(wrapper);
System.out.println(denseVectorData);
}
@Test
void delIndex(){
LambdaEsUpdateWrapper<DenseVectorData> wrapper = new LambdaEsUpdateWrapper<>();
wrapper.eq(DenseVectorData::getId,"1");
denseVectorIndex.delete(wrapper);
}
}
Elasticsearch7.14安装插件后,创建查询索引
javascript
PUT /my_index
{
"mappings": {
"properties": {
"my_vector_field": {
"type": "dense_vector",
"dims": 3, // 向量维度
"similarity": "l2_norm" // 或 "cosine"
}
}
}
}
POST /my_index/_doc
{
"my_vector_field": [0.1, 0.2, 0.3], // 3维的向量数据
"content": "这是文档的其他文本信息..."
}
GET /my_index/_search
{
"query": {
"elastiknn_nearest_neighbors": {
"field": "my_vector_field",
"model": "l2", // 或 "cosine"
"vector": [0.1, 0.2, 0.3],
"k": 5 // 返回最相似的 top-k 结果
}
}
}