在nodejs中使用ElasticSearch（二）核心概念，应用

核心概念

1、索引（index）

索引在Elasticsearch7之后，相当于关系型数据库中的表，之前相当于关系型数据库中的数据库。

Haskell 复制代码

# 创建索引
PUT /my_index

# 获取指定索引
GET /my_index

# 获取所有索引
GET /_cat/indices?v

# 模糊匹配索引
POST *index*/_search

# 删除索引
DELETE /my_index

# 创建索引，同时指定索引结构
# number_of_shards:主分片数量，数据会被分割到这些分片中，创建后不可修改，需根据数据量预估，默认5
# number_of_replicas:每个主分片的副本数，用于故障转移和提升查询性能，可动态修改，默认1
PUT /student_index
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "text"
      },
      "age":{
        "type": "integer"
      },
      "enrolled_date":{
        "type": "date"
      }
    }
  }
}

# 修改索引设置
PUT /student_index/_settings
{
  "settings": {
    "number_of_replicas": 2
  }
}

# 添加索引map属性
PUT /student_index/_mapping
{
  "properties":{
    "name":{
      "type":"integer"
    }
  }
}

#设置别名
#refresh_interval:指定了执行刷新操作的频率，默认值是 1s（秒）。
PUT my_index2
{
  "aliases": {
    "mi": {}
  },
  "settings": {
    "refresh_interval": "30s",
    "number_of_shards": 1,
    "number_of_replicas": 1
  }
}

# 多个索引有可以相同的别名，使用这个相同的别名的时候，这些索引的数据可以被一起查询
# 为索引添加别名
POST /_aliases 
{
  "actions":[
    {
      "add":{
        "index":"my_index2",
        "alias":"mi3"
      }
    }
  ]
}


# 批量创建索引增加数据，没有添加，有会报错
POST _bulk
{"create":{"_index":"my_index3","_id":1}}
{"id":1,"title":"hello3 1"}
{"create":{"_index":"my_index4","_id":1}}
{"id":2,"title":"hello4"}

# 批量创建索引增加数据，没有添加，有则更新
POST _bulk
{"index":{"_index":"my_index3","_id":1}}
{"id":1,"title":"hello3 1"}
{"index":{"_index":"my_index3","_id":2}}
{"id":1,"title":"hello3 2"}
{"index":{"_index":"my_index3","_id":3}}
{"id":1,"title":"hello3 3"}
{"index":{"_index":"my_index4","_id":4}}
{"id":2,"title":"hello4"}

2、映射（mapping）

类似于关系型数据库中的表结构，它决定了字段的类型、字段如何被索引和存储等。

Haskell 复制代码

# 设置content字段属性
POST /my_index/_mapping 
{
  "properties":{
    "content":{
      "type":"text",
      "analyzer":"ik_max_word",
      "search_analyzer":"ik_smart"
    }
  }
}

#"type": 指定该字段的类型。在本例中，content 字段的类型是 text。text 类型用于存储需要全文搜索的内容。

#"analyzer": 定义当索引文档时使用的分析器。这里设置的是 "ik_max_word" 分词器，它会将字段内容分词成尽可能多的词语。这在需要高召回率（recall）的搜索场景中特别有用。

#"search_analyzer": 定义在搜索字段时使用的分析器。这里设置的是 "ik_smart" 分词器，它会将查询内容分词成相对少的、但意义完整的词语。这在需要高精确度（precision）的搜索场景中特别有用。

3、文档（document）

文档是存储在索引中的基本单元，相当于关系型数据库中的行。每个文档是一个JSON格式的对象，包含一个或多个字段。

Haskell 复制代码

# 添加数据
POST /my_index/_create/1
{
  "content":"特朗普表示将对进口汽车征收 25% 左右的关税，这对全球汽车产业会带来哪些影响？我国车企该如何应对？"
}

# 查询数据
GET /my_index/_search
{
  "query": {
    "match": {
      "content": "特朗普对进口汽车征收关税"
    }
  }
}

# 获取10条数据
GET /my_index/_search
{
  "size": 10,
  "query": {
    "match_all": {}
  }
}

# 修改数据
POST /my_index/_update/1
{
  "doc": {
    "content": "梁文锋参与发表的 DeepSeek 新论文 NSA 注意力机制，有哪些信息值得关注？会带来哪些影响？"
  }
}

# 删除数据
DELETE /my_index/_doc/1

示例

Haskell 复制代码

# 获取所有索引
GET /_cat/indices?v

DELETE /employee

PUT /employee
{
  "settings": {
    "number_of_replicas": 1,
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "name":{
        "type": "keyword"
      },
      "sex":{
        "type": "integer"
      },
      "age":{
        "type": "integer"
      },
      "address":{
        "type": "text",
        "analyzer": "ik_max_word",
        "fields": {
          "keyword":{
            "type":"keyword"
          }
        }
      },
      "remark":{
        "type": "text",
        "analyzer": "ik_smart",
        "fields": {
          "keyword":{
            "type":"keyword"
          }
        }
      }
    }
  }
}

POST /employee/_bulk
{"index":{"_index":"employee","_id":"1"}}
{"name":"张三","sex":1,"age":25,"address":"广州天河公园","remark":"java developer"}
{"index":{"_index":"employee","_id":"2"}}
{"name":"李四","sex":1,"age":28,"address":"广州荔湾大厦","remark":"java assistant"}
{"index":{"_index":"employee","_id":"3"}}
{"name":"王五","sex":0,"age":20,"address":"广州白云山公园","remark":"php developer"}
{"index":{"_index":"employee","_id":"4"}}
{"name":"赵六","sex":0,"age":22,"address":"长沙橘子洲","remark":"python assistant"}
{"index":{"_index":"employee","_id":"5"}}
{"name":"张龙","sex":0,"age":19,"address":"长沙麓谷企业广场","remark":"java architect"}
{"index":{"_index":"employee","_id":"6"}}
{"name":"赵虎","sex":1,"age":32,"address":"长沙麓谷兴工国际产业园","remark":"java architect "}


# 获取所有数据
GET employee/_search

# 通过id查询
GET employee/_doc/1

# 通过多个id查询
GET employee/_mget
{
  "ids":[1,2,3]
}

#精确匹配
GET /employee/_search
{
  "query": {
    "term": {
      "name": {
        "value": "张三"
      }
    }
  }
}

# 分词检索
GET employee/_search 
{
  "query": {
    "match": {
      "address":"广州白云山"
    }
  }
}

# 范围查询
GET /employee/_search 
{
  "query": {
    "range": {
      "age": {
        "gte": 10,
        "lte": 20
      }
    }
  }
}

# 分页，4~6
GET /employee/_search 
{
  "from": 3,
  "size": 3
}

# 删除id
DELETE /employee/_doc/1

# 批量删除
POST _bulk
{"delete":{"_index":"employee","_id":2}}
{"delete":{"_index":"employee","_id":3}}

POST /employee/_bulk
{"delete":{"_id":4}}
{"delete":{"_id":5}}

# 通过条件删除
POST /employee/_delete_by_query
{
  "query": {
    "match": {
      "address": "广州"
    }
  }
}

# 更新一个字段
POST /employee/_update/1
{
  "doc":{
    "age":28
  }
}

# 批量更新
POST _bulk
{"update":{"_index":"employee","_id":3}}
{"doc":{"age":29}}
{"update":{"_index":"employee","_id":4}}
{"doc":{"age":27}}


# 查询更新
POST /employee/_update_by_query
{
  "query": {
    "term": {
      "name": "张三"
    }
  },
  "script": {
    "source": "ctx._source.age=30"
  }
}

# 保证线程安全更新
# 通过_id获取第一条数据，可查看_id，_seq_no
GET /employee/_doc/1

# 有就更新，没有创建
# 可以先获取_id，_primary_term，再修改
# if_seq_no=13：这是一个乐观并发控制（Optimistic Concurrency Control, OCC）参数，表示只有当文档的序列号（sequence number）为 13 时，才会执行该请求。这是为了防止在并发环境中对同一文档的冲突更新。
# if_primary_term=1：这是另一个乐观并发控制的参数，表示只有在文档的主分片版本号（primary term）为 1 时，才会执行请求。这两个参数结合使用，以确保对文档的更新是基于最新的状态。
POST /employee/_doc/1?if_seq_no=40&if_primary_term=2
{
 "name":"张三",
  "sex":1,
  "age":25
}

使用场景

1、微博、新闻、博客

2、按日期分割存储日志索引

在nodejs中使用ElasticSearch，博客文章搜索，无标准写法，自由发挥。

TypeScript 复制代码

import express, { response, request } from 'npm:express';
import fs from 'node:fs';
import { Client } from 'npm:@elastic/elasticsearch';

const client = new Client({ node: 'http://localhost:9200' });
const app = express();
const port = 3000;

// 解析 JSON 请求体
app.use(express.json());

// 解析 URL 编码表单数据
app.use(express.urlencoded({ extended: true }));

const message: string[] = [
  '梁文锋参与发表的 DeepSeek 新论文 NSA 注意力机制，有哪些信息值得关注？会带来哪些影响？',
  '如何看待浙大转专业机械工程报录比超计算机，总报名人数第一？机械专业前途此前是否被低估了？',
  '京东宣布为外卖骑手缴纳五险一金，系行业首个，如何看待此举？是否会引发外卖行业新的变革？',
  '看完《哪吒之魔童闹海》，有个疑问：「元始天尊」出关后，将如何处置大师兄「无量仙翁」及其阐教弟子？',
  '混天绫、风火轮、乾坤圈、火尖枪...有哪些「仙家法宝」如今已经有了科技平替？我们还能还原哪些「神器」？',
  '广州一街道招聘环卫工人年龄不超过 35 岁，街道办回应系单位用工正常需求，这一年龄限制有必要吗？',
  '中国的本地的常见食用淡水鱼，为什么大都刺多？纯粹是因为自然条件不好吗？',
  '小米集团总裁称小米将在 2 月底正式发布米家中央空调，产品都有哪些亮点？对此你有哪些期待？',
  '如果职场上领导告诉你，有另外一个领导看中了你，想调你去他们部门，你会怎么回答？',
  '特朗普表示将对进口汽车征收 25% 左右的关税，这对全球汽车产业会带来哪些影响？我国车企该如何应对？',
  '阿莫林带曼联 13 场英超 14 分，阿莫林执教的曼联成绩怎么样？如何评价他执教理念和风格？',
  '宇树人形机器人距离成为真正意义上的家务机器人还有多少年？',
  '《哪吒 2》超过《头脑特工队 2》成为全球动画电影票房冠军，你觉得该纪录能保持多久？',
  '蒸蛋要怎么做才嫩滑好吃？',
  '美俄两国同意恢复驻华盛顿和莫斯科大使馆的人员配置，未来将在多领域合作，释放了哪些信号？',
  '如果你是面试官，遇到了家人陪着一起来的面试者，你会不会录取他 (她)？',
  '你玩《天国拯救 2》的时候，遭遇过什么令人感动或者震撼的细节或故事？',
  '健身隔天练一个小时，是不是不如每天练半个小时？',
  '如何评价格力专卖店改名为「董明珠健康家」？品牌强绑个人 IP 带来的是流量还是隐患？',
  '为了让孩子「哪怕再高一公分」，家长花几十万带孩子打生长激素，这种方式安全吗？如何理解家长的身高焦虑？',
];

app.get('/', (_req: request, res: response) => {
  res.send('Hello World!');
});

// 添加文章
app.get('/add/article', async (_req: request, res: response) => {
  // 验证请求是否超时

  // 验证token,ip地址,一些非重要用户信息等

  // 验证cookie,HttpOnly,secure,与token关联,加密标识等

  // 验证请求参数

  // 验证用户权限

  // 防止数据不一致，开启事务，注意和其他接口对表执行修改的顺序，会造成死锁
  // 添加文章到数据

  // 添加文章标签关联，多对多

  // 添加elasticSearch数据，用于分词搜索
  const exists = await client.indices.exists({ index: 'index_article' });

  if (!exists) {
    await client.indices.create({
      index: 'index_article',
      aliases: {
        'article': {},
      },
      mappings: {
        properties: {
          id: { type: 'integer' },
          title: {
            type: 'text',
            analyzer: "ik_max_word",
            fields: { keyword: { type: 'keyword' } },
          },
          content: {
            type: 'text',
            analyzer: "ik_smart",
            fields: { keyword: { type: 'keyword' } },
          },
          like: { type: 'integer' },
          unlike: { type: 'integer' },
          follow: { type: 'integer' },
          createdAt: { type: 'date' },
          updatedAt: { type: 'date' },
        }
      },
      settings: {
        number_of_replicas: 1,
        number_of_shards: 1,
      },
    });
  }

  console.log(await client.count({ index: 'index_article' }));
  let id = await client.search({
    index: 'index_article',
    body: {
      aggs: {
        max_id: {
          max: {
            field: 'id'
          }
        }
      },
      size: 0  // 不需要返回具体的文档，只需要聚合结果
    }
  });

  console.log((id?.aggregations?.max_id as { value: number })?.value);

  const maxId = (id?.aggregations?.max_id as { value: number })?.value ? String((id?.aggregations?.max_id as { value: number })?.value + 1) : Math.random().toString().slice(2, -1);
  const data = {
    id: maxId,
    title: (message[Math.floor(Math.random() * message.length)]),
    content: (message[Math.floor(Math.random() * message.length)]),
    like: 0,
    unlike: 0,
    follow: 0,
    createdAt: new Date().toISOString(),
    updatedAt: new Date().toISOString(),
    aId: 1,
  };

  console.log(await client.index({ index: 'index_article', id: data.id, document: data }));

  // 修改redis缓存

  // 日志记录，可以发送给rabbitmq，kafka等队列工具后续处理，手动控制消息接收

  // 错误处理，最好前面每步都要有，还有修改数据部分执行成功处理

  res.send('Hello World!');
});

// 查询文章
app.get('/search/article', async (req: request, res: response) => {
  // 验证请求是否超时

  // 验证token,ip地址,一些非重要用户信息等

  // 验证cookie,HttpOnly,secure,与token关联,加密标识等

  // 验证请求参数

  // 验证用户权限

  // redis缓存查询

  // elasticsearch查询
  const currentPage = req.query?.currentPage || 1;
  const pageSize = req.query?.pageSize || 5;
  const title = req.query?.title || null;

  type UnwrapPromise<T> = T extends Promise<infer U> ? U : T;
  let list: UnwrapPromise<ReturnType<typeof client.search>>;

  if (title) {
    list = await client.search({
      index: 'index_article',
      query: { "match": { "title": title } },
      size: pageSize,
      from: (currentPage - 1) * pageSize,
    });
  } else {
    list = await client.search({
      index: 'index_article',
      size: pageSize,
      from: (currentPage - 1) * pageSize,
    });
  }

  // 数据库查询

  // 日志记录

  // 错误处理

  res.send(list);
});

// 修改文章
app.get('/update/article', async (req: request, res: response) => {
  // 验证请求是否超时

  // 验证token,ip地址,一些非重要用户信息等

  // 验证cookie,HttpOnly,secure,与token关联,加密标识等,内容不能设置太多

  // 验证请求参数

  // 验证用户权限

  // 修改数据库数据

  // 更新redis缓存

  // 更新elasticsearch
  const data = {
    title: (message[Math.floor(Math.random() * message.length)]),
    content: (message[Math.floor(Math.random() * message.length)]),
    like: 0,
    unlike: 0,
    follow: 0,
    createdAt: new Date().toISOString(),
    updatedAt: new Date().toISOString(),
  };

  const qr = await client.updateByQuery({
    index: 'index_article',
    refresh: true,
    "query": {
      "term": { // 精确查找
        "id": req.query?.id || 1,
      },
    },
    "script": {
      "source": `
        ctx._source.title="${data.title}";
        ctx._source.content="${data.content}";
      `
    }
  });

  console.log(qr);

  // 日志记录

  // 错误处理

  res.send('hello world!');
});

// 删除文章
app.get('/delete/article', async (req: request, res: response) => {
  // 验证请求是否超时

  // 验证token,ip地址,一些非重要用户信息等

  // 验证cookie,HttpOnly,secure,与token关联,加密标识等,内容不能设置太多

  // 验证请求参数

  // 验证用户权限

  // 开启事务，注意表执行顺序

  // 删除文章评论

  // 删除文章关联标签

  // 删除文章评论

  // 更新redis缓存

  // 删除elasticsearch中数据
  const qr = await client.deleteByQuery({
    index: 'index_article',
    "query": {
      "term": { // 精确查找
        "id": req.query?.id || 1,
      },
    },
  });
  console.log(qr);
  // 日志记录

  // 错误处理

  res.send('hello world');
});

app.listen(port, () => {
  console.log(`app listening on port ${port}`);
});