[学习笔记]IK分词器的学习

IK分词器有几种模式

复制代码
# 测试分词器
POST /_analyze
{
  "text":"黑马程序员学习java太棒了",
  "analyzer": "standard"
}
复制代码
# 测试分词器
POST /_analyze
{
  "text":"黑马程序员学习java太棒了",
  "analyzer": "ik_max_word"
}
复制代码
# 测试分词器
POST /_analyze
{
  "text":"黑马程序员学习java太棒了",
  "analyzer": "ik_smart"
}

总结

ik_max_word最细切分, 占用内存多,但是分词多

ik_smart最少切分,占用内存少,但是分词少

standard中文没用

IK分词器如何拓展词条?如何停用词条

配置你的ext.dic和stopword.dic

ext.dic

mapping属性

文档操作


分词规则总结

复制代码
CREATE TABLE `tb_hotel`  (
  `id` bigint(20) NOT NULL COMMENT '酒店id',
  `name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店名称',
  `address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店地址',
  `price` int(10) NOT NULL COMMENT '酒店价格',
  `score` int(2) NOT NULL COMMENT '酒店评分',
  `brand` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '酒店品牌',
  `city` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '所在城市',
  `star_name` varchar(16) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店星级,1星到5星,1钻到5钻',
  `business` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '商圈',
  `latitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '纬度',
  `longitude` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '经度',
  `pic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '酒店图片',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Compact;
elasticsearch 复制代码
# 酒店的mapping
PUT /hotel
{
  "mappings": {
      "properties": {
        "id":{
          "type": "keyword"
        },
        "name":{
          "type": "text",
          "analyzer": "ik_max_word",
          "copy_to": "all"
        },
        "address":{
          "type":"keyword",
          "index": false
        },
        "price":{
          "type":"integer"
        },
        "score":{
          "type":"integer"
        },
        "brand":{
          "type": "keyword"
        },
        "city":{
          "type":"keyword"
        },
        "star_name":{
          "type": "keyword"
        },
        "business":{
          "type": "keyword",
          "copy_to": "all"
        },
        "location":{
          "type":"geo_point"
        },
        "pic":{
          "type": "keyword",
          "index": false
        },
        "all":{
          "type": "text",
          "analyzer": "ik_max_word"
        }
      }
  }

Java的测试代码

java 复制代码
package cn.itcast.hotel.constants;

public class HotelIndexConstants {
    public static final String MAPPING_TEMPLATE = "{\n" +
            "  \"mappings\": {\n" +
            "      \"properties\": {\n" +
            "        \"id\":{\n" +
            "          \"type\": \"keyword\"\n" +
            "        },\n" +
            "        \"name\":{\n" +
            "          \"type\": \"text\",\n" +
            "          \"analyzer\": \"ik_max_word\",\n" +
            "          \"copy_to\": \"all\"\n" +
            "        },\n" +
            "        \"address\":{\n" +
            "          \"type\":\"keyword\",\n" +
            "          \"index\": false\n" +
            "        },\n" +
            "        \"price\":{\n" +
            "          \"type\":\"integer\"\n" +
            "        },\n" +
            "        \"score\":{\n" +
            "          \"type\":\"integer\"\n" +
            "        },\n" +
            "        \"brand\":{\n" +
            "          \"type\": \"keyword\"\n" +
            "        },\n" +
            "        \"city\":{\n" +
            "          \"type\":\"keyword\"\n" +
            "        },\n" +
            "        \"star_name\":{\n" +
            "          \"type\": \"keyword\"\n" +
            "        },\n" +
            "        \"business\":{\n" +
            "          \"type\": \"keyword\",\n" +
            "          \"copy_to\": \"all\"\n" +
            "        },\n" +
            "        \"location\":{\n" +
            "          \"type\":\"geo_point\"\n" +
            "        },\n" +
            "        \"pic\":{\n" +
            "          \"type\": \"keyword\",\n" +
            "          \"index\": false\n" +
            "        },\n" +
            "        \"all\":{\n" +
            "          \"type\": \"text\",\n" +
            "          \"analyzer\": \"ik_max_word\"\n" +
            "        }\n" +
            "      }\n" +
            "  }\n" +
            "  ";
}
java 复制代码
package cn.itcast.hotel;

import cn.itcast.hotel.pojo.Hotel;
import cn.itcast.hotel.pojo.HotelDoc;
import cn.itcast.hotel.service.impl.HotelService;
import com.alibaba.fastjson.JSON;
import org.apache.http.HttpHost;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.common.xcontent.XContentType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.io.IOException;

import static cn.itcast.hotel.constants.HotelIndexConstants.MAPPING_TEMPLATE;

/**
 * @author lst
 * @date 2023年11月23日 13:38
 */
@SpringBootTest
public class HotelT {
    private RestHighLevelClient restHighLevelClient;
    @Autowired
    HotelService hotelService;

    @BeforeEach
    public void before() {
        restHighLevelClient = new RestHighLevelClient(RestClient.builder(HttpHost.create("http://localhost:9200")));
    }

    @AfterEach
    void tearDown() throws IOException {
        restHighLevelClient.close();
    }

    @Test
    public void testCreateIndex() throws IOException {
        CreateIndexRequest request = new CreateIndexRequest("hotel");
        request.source(MAPPING_TEMPLATE, XContentType.JSON);
        restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
    }

    @Test
    public void testDeleteIndex() throws IOException {
        DeleteIndexRequest request = new DeleteIndexRequest("hotel");
        restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
    }
}
相关推荐
冷雨夜中漫步3 小时前
高级系统架构师笔记——系统质量属性与架构评估(1)软件系统质量属性
笔记·架构·系统架构
oe10193 小时前
好文与笔记分享 A Survey of Context Engineering for Large Language Models(中)
人工智能·笔记·语言模型·agent开发
嵌入式-老费4 小时前
自己动手写深度学习框架(快速学习python和关联库)
开发语言·python·学习
许长安4 小时前
C++中指针和引用的区别
c++·经验分享·笔记
摇滚侠4 小时前
Spring Boot3零基础教程,StreamAPI 介绍,笔记98
java·spring boot·笔记
执笔论英雄4 小时前
【大模型训练】zero 学习及deepseed实战
人工智能·深度学习·学习
摇滚侠5 小时前
Spring Boot3零基础教程,StreamAPI 的基本用法,笔记99
java·spring boot·笔记
岑梓铭5 小时前
《考研408数据结构》第六章(5.5树的应用)复习笔记
数据结构·笔记·考研·408·ds
前路不黑暗@6 小时前
Java:继承与多态
java·开发语言·windows·经验分享·笔记·学习·学习方法
2501_916007476 小时前
从零开始学习iOS App开发:Xcode、Swift和发布到App Store完整教程
android·学习·ios·小程序·uni-app·iphone·xcode