go get github.com/elastic/go-elasticsearch/v8
文档:https://www.elastic.co/docs/reference/elasticsearch/clients/go/getting-started#_indexing_documents
下载解压 elasticsearch-8.10.4
bash
D:\dev\php\magook\trunk\server\elasticsearch-8.10.4\bin
elasticsearch.bat
bash
D:\dev\php\magook\trunk\server\elasticsearch-head
npm run start
如果有向量类型的字段,需要先定义 mappings。
还需要特别注意的是 embedding 维度要匹配,elasticsearch中的dense_vector类型,在版本8.0 -- 8.11中,默认的最高维度是2048,在 **8.12+**之后是4096,当然,这个值越高计算越慢。数据的维度必须小于es能存储的维度,否则会报错。
此处采用火山引擎的模型doubao-embedding-large-text-250515,其维度是2048,参考 模型列表
go
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"
"github.com/cloudwego/eino-ext/components/embedding/ark"
"github.com/elastic/go-elasticsearch/v8"
"github.com/elastic/go-elasticsearch/v8/esapi"
)
func main() {
client, err := elasticsearch.NewClient(elasticsearch.Config{
Addresses: []string{"http://localhost:9200"},
})
if err != nil {
log.Panicf("connect es8 failed, err=%v", err)
}
////////////////////////////////////////////
createIndex(client, "my_index_vector")
////////////////////////////////////////////
ctx := context.Background()
ebs, err := ark.NewEmbedder(ctx, &ark.EmbeddingConfig{
BaseURL: os.Getenv("ARK_BASE_URL"),
APIKey: os.Getenv("ARK_API_KEY"),
Model: os.Getenv("ARK_EMBEDDING_MODEL"),
})
if err != nil {
panic(err)
}
content := "Eino 旨在提供 Golang 语言的 AI 应用开发框架。 Eino 参考了开源社区中诸多优秀的 AI 应用开发框架,例如 LangChain、LangGraph、LlamaIndex 等,提供了更符合 Golang 编程习惯的 AI 应用开发框架。"
res, err := ebs.EmbedStrings(ctx, []string{content})
if err != nil {
panic(err)
}
fmt.Println("info: ", len(res), len(res[0]))
indexDocument(client, "my_index_vector", "1", Document{
Title: "Eino是什么",
Content: content,
Embedding: res[0],
})
}
func createIndex(es *elasticsearch.Client, indexName string) {
mapping := `{
"mappings": {
"properties": {
"title": { "type": "text" },
"content": { "type": "text" },
"embedding": {
"type": "dense_vector",
"dims": 2048,
"index": true,
"similarity": "cosine"
}
}
}
}`
req := esapi.IndicesCreateRequest{
Index: indexName,
Body: strings.NewReader(mapping),
}
res, err := req.Do(context.Background(), es)
if err != nil {
log.Fatalf("Error creating index: %s", err)
}
defer res.Body.Close()
if res.IsError() {
log.Printf("Error: %s", res.String())
} else {
fmt.Println("Index created successfully")
}
}
type Document struct {
Title string `json:"title"`
Content string `json:"content"`
Embedding []float64 `json:"embedding"`
}
func indexDocument(es *elasticsearch.Client, indexName, id string, doc Document) {
data, _ := json.Marshal(doc)
req := esapi.IndexRequest{
Index: indexName,
DocumentID: id,
Body: strings.NewReader(string(data)),
Refresh: "true",
}
res, err := req.Do(context.Background(), es)
if err != nil {
log.Fatalf("Error indexing document: %s", err)
}
defer res.Body.Close()
if res.IsError() {
log.Printf("Error: %s", res.String())
} else {
fmt.Printf("Document %s indexed\n", id)
}
}
输出
bash
Index created successfully
info: 1 2048
Document 1 indexed

knn向量检索
go
func knnSearch(es *elasticsearch.Client, indexName string, queryVector []float32, k int) {
query := map[string]interface{}{
"knn": map[string]interface{}{
"field": "embedding",
"query_vector": queryVector,
"k": k,
"num_candidates": k * 10, // 候选数量(建议 k*10)
},
}
body, _ := json.Marshal(query)
req := esapi.SearchRequest{
Index: []string{indexName},
Body: bytes.NewReader(body),
}
res, err := req.Do(context.Background(), es)
if err != nil {
log.Fatalf("Error searching: %s", err)
}
defer res.Body.Close()
if res.IsError() {
log.Printf("Search error: %s", res.String())
return
}
var r map[string]interface{}
if err := json.NewDecoder(res.Body).Decode(&r); err != nil {
log.Fatalf("Error parsing response: %s", err)
}
// 打印结果
hits := r["hits"].(map[string]interface{})["hits"].([]interface{})
for _, hit := range hits {
source := hit.(map[string]interface{})["_source"]
score := hit.(map[string]interface{})["_score"]
fmt.Printf("Score: %f, Title: %s\n", score, source.(map[string]interface{})["title"])
}
}