springboot实现热搜后端elk

需求描述:热搜方式的分词查询。

数据环境:mysql中存在已爬数据表名t_bj(id,titile,content,publishtime),使用logstash同步到es中,springboot连接es实现rest接口给前端页面。

springboot

dom文件:

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
    <dependency>
        <groupId>org.elasticsearch</groupId>
        <artifactId>elasticsearch</artifactId>
        <version>7.17.3</version>
    </dependency>

yml文件:

elasticsearch:

uris: 192.168.0.1:9200

connection-timeout: 1s

socket-timeout: 30s

java文件:

RsController.java

@RestController

@RequestMapping("/rs")

public class RsController {

@Autowired

private TBjService tBjService;

@PostMapping("/bjKey")

public PageInfo getTLawEs(@RequestBody ReqKey reqKey) {

return tService.getTBj(reqKey.getKeyValue(),reqKey.getPageNum(),reqKey.getPageSize());

}

}

ReqKey.java

@Data

public class ReqKey {

private String keyValue;

private int pageNum;

private int pageSize;

}

TBj.java

@Data

@Document(indexName = "t_bj_index")

public class TBj {

private String id;

private String title;

private String url;

private String content;

private String publishtime;

private String createtime;

}

RsConfig.java

@EnableElasticsearchRepositories(basePackages = {"cn.rs.elastic.repository"})

@Configuration

public class RsConfig {

@Value("${spring.elasticsearch.uris}")

private String hostAndPort;

@Bean

public RestHighLevelClient elasticsearchClient() {

ClientConfiguration clientConfiguration = ClientConfiguration.builder()

.connectedTo(hostAndPort)

.build();

return RestClients.create(clientConfiguration).rest();

}

}

TBjRepository.java

public interface TBjRepository extends ElasticsearchRepository<TBj, String> {

}

TBjService.java

@Service

public class TBjService {

private final TjRepository tBjRepository;
@Autowired
RsUtils rsUtils;
@Autowired
public TBjService(TBjRepository TBjRepository) {
    this.tBjRepository = TBjRepository;
}
public PageInfo<TBj> getTBj(String keywords, Integer pageNum, Integer pageSize) {
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    String[] queryFields = { "title"};
    QueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(keywords, queryFields)
            .field("content", 2F)
            .tieBreaker(0.3F);
    searchSourceBuilder.query(queryBuilder);
    List<String> highFields = ListUtil.toList(queryFields);
    highFields.add("content");
    return esUtils.page("t_bj_index", searchSourceBuilder, TBj.class, pageNum,pageSize, highFields);
}

}

RsUtils.java

package cn.rs.elastic.utils;

import cn.hutool.core.collection.CollectionUtil;

import cn.hutool.json.JSONUtil;

import com.github.pagehelper.PageInfo;

import lombok.AllArgsConstructor;

import lombok.Data;

import lombok.SneakyThrows;

import lombok.extern.slf4j.Slf4j;

import org.elasticsearch.action.bulk.BulkRequest;

import org.elasticsearch.action.bulk.BulkResponse;

import org.elasticsearch.action.delete.DeleteRequest;

import org.elasticsearch.action.search.SearchRequest;

import org.elasticsearch.action.search.SearchResponse;

import org.elasticsearch.action.search.SearchScrollRequest;

import org.elasticsearch.action.support.WriteRequest;

import org.elasticsearch.client.RequestOptions;

import org.elasticsearch.client.RestHighLevelClient;

import org.elasticsearch.common.text.Text;

import org.elasticsearch.search.SearchHit;

import org.elasticsearch.search.builder.SearchSourceBuilder;

import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;

import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;

import org.springframework.stereotype.Component;

import org.springframework.util.CollectionUtils;

import org.springframework.util.ReflectionUtils;

import javax.annotation.Resource;

import java.io.IOException;

import java.lang.reflect.Field;

import java.lang.reflect.Method;

import java.util.ArrayList;

import java.util.Collection;

import java.util.List;

import java.util.Map;

@Slf4j

@Component

public class EsUtils {

@Resource

private RestHighLevelClient restHighLevelClient;

public PageInfo page(String index, SearchSourceBuilder searchSourceBuilder, Class resultClass,

int currentPage, int size, List highFields) {

SearchRequest request = new SearchRequest(index);

if (CollectionUtil.isNotEmpty(highFields)) {

buildHighLight(searchSourceBuilder, highFields);

}

int num = (currentPage - 1) * size;

searchSourceBuilder.from(num)

.size(size);

request.source(searchSourceBuilder);

SearchResponse response = null;

try {

response = restHighLevelClient.search(request, RequestOptions.DEFAULT);

} catch (IOException e) {

e.printStackTrace();

}

assert response != null;

return analysisResponse(response, resultClass, currentPage, size, highFields);

}

private <T> PageInfo<T> analysisResponse(SearchResponse response, Class<T> resultClass, int currentPage, int size, List<String> highFields) {
    SearchHit[] searchHits = response.getHits().getHits();
    List<T> retList = new ArrayList<>(searchHits.length);
    for (SearchHit searchHit : searchHits) {
        String strJson = searchHit.getSourceAsString();
        T t = JSONUtil.toBean(strJson, resultClass);
        try {
            setId(resultClass, t, String.valueOf(searchHit.getId()));
        } catch (Exception e) {
            log.info("rs 查询数据设置主键id值异常", e);
        }
        if (!CollectionUtils.isEmpty(highFields)) {
            Map<String, HighlightField> highlightFieldMap = searchHit.getHighlightFields();
            HighlightField highlightField;
            for (String field : highFields) {
                highlightField = highlightFieldMap.get(field);
                if (highlightField != null) {
                    Text[] fragments = highlightField.getFragments();
                    StringBuilder builder = new StringBuilder();
                    for (Text text : fragments) {
                        builder.append(text);
                    }
                    setValue(resultClass, t, builder.toString(), field);
                }
            }
        }
        retList.add(t);
    }
    long totalNum = response.getHits().getTotalHits().value;
    PageInfo<T> pageVo = new PageInfo<>();
    pageVo.setPageNum(currentPage);
    pageVo.setPageSize(size);
    pageVo.setTotal(totalNum);
    pageVo.setList(retList);
    return pageVo;
}

@SneakyThrows
private <T> void setId(Class<T> resultClass, T t, Object id) {
    Field field = ReflectionUtils.findField(resultClass, "id");
    if (null != field) {
        field.setAccessible(true);
        Object object = ReflectionUtils.getField(field, t);
        if (object == null) {
            Method method = resultClass.getMethod("setId", String.class);
            ReflectionUtils.invokeMethod(method, t, id);
        }
    }
}

@SneakyThrows
private <T> void setValue(Class<T> resultClass, T t, Object fieldValue, String fieldName) {
    Field field = ReflectionUtils.findField(resultClass, fieldName);
    if (null != field) {
        field.setAccessible(true);
        String methodName = "set".concat(captureName(fieldName));
        Method method = resultClass.getMethod(methodName, String.class);
        ReflectionUtils.invokeMethod(method, t, fieldValue);
    }
}

private String captureName(String str) {
    char[] cs = str.toCharArray();
    cs[0] -= 32;
    return String.valueOf(cs);
}

private void buildHighLight(SearchSourceBuilder searchSourceBuilder, List<String> fields) {
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    fields.forEach(highlightBuilder::field);
    highlightBuilder.preTags("<em>");
    highlightBuilder.postTags("</em>");
    searchSourceBuilder.highlighter(highlightBuilder);
}

@AllArgsConstructor
@Data
public class ScrollPageBean<T> {
    private String scrollId;
    private PageInfo<T> scrollPage;
}

}

elasticSearch

作用:热搜引擎

1下载安装

2配置参数。conf/elasticsearch.yml

cluster.name: rs-single-node-cluster

node.name: rs-single-node

node.roles: ["master", "data"]

path.data: /path/to/data

path.logs: /path/to/logs

network.host: 0.0.0.0

http.port: 9200

cluster.initial_master_nodes: ["sj-single-node"]

xpack.security.enabled: false

xpack.security.enrollment.enabled: false

xpack.security.http.ssl:

enabled: false

keystore.path: certs/http.p12

xpack.security.transport.ssl:

enabled: false

verification_mode: certificate

keystore.path: certs/transport.p12

truststore.path: certs/transport.p12

http.host: 0.0.0.0

transport.port: 9300

xpack.ml.enabled: false

http.cors.enabled: true

http.cors.allow-origin: "*"

2启动。bin/elasticsearch.bat

3启动成功默认端口9200,可录入http://127.0.0.1:9200 查看启动参数

logstash

作用:同步mysql数据到es,可指定同步策略如间隔时间等

1下载安装。

2配置mysql和es连接。

主目录下创建目录和文件mysql_rs_conf/mysql_rs.conf。并将驱动文件拷贝至该目录下。配置文件内容:

input {

jdbc {

jdbc_connection_string => "jdbc:mysql://192.168.1.1:3306/dbBj?useUnicode=true&useSSL=false&characterEncoding=utf-8&serverTimezone=Asia/Shanghai&rewriteBatchedStatements=true"

jdbc_user => "bj"

jdbc_password => "123456"

jdbc_driver_library => "D:/logstash/logstash-8.12.2/mysql_es_conf/mysql-connector-j-8.0.33.jar"

jdbc_driver_class => "com.mysql.jdbc.Driver"

jdbc_paging_enabled => "true"

jdbc_page_size => "10000"

statement => "select * from t_bj"

schedule => "* * * * *"

}

}

output {

if [table] == "t_bj" {

elasticsearch {

hosts => "192.168.1.36:9200"

index => "t_bj_index"

document_id => "%{id}"

}

}

stdout {

codec => json_lines

}

}

3启动。默认端口5601

可手创建启动文件lg.cmd。内容:

chcp 65001

logstash -f ./mysql_rs_conf/mysql_rs.conf

4验证与访问。

kinana

作用:可查看同步数据、测试、监控

1下载安装。
2配置指向ES。
3启动。默认端口5601
4验证与访问。URL:127.0.0.1:5601/app/integrations/browse
mysql

作用:热搜的中间过渡存储,用于存储爬虫数据。

建表:

CREATE TABLE t_bj (
id int(11) NOT NULL AUTO_INCREMENT,
title varchar(1000) NOT NULL COMMENT '文章标题',
url varchar(1000) DEFAULT NULL COMMENT '文章地址',
content longtext COMMENT '文章内容',
publishtime varchar(30) DEFAULT NULL COMMENT '发布时间',
createtime datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',

PRIMARY KEY (id,title) USING BTREE

) ENGINE=InnoDB AUTO_INCREMENT=492 DEFAULT CHARSET=utf8 COMMENT='保健'

相关推荐
m0_748256144 小时前
SpringBoot
java·spring boot·后端
Mr.朱鹏4 小时前
针对Feign客户端请求体参数处理问题
java·jvm·spring boot·spring·spring cloud·maven·intellij-idea
多想和从前一样5 小时前
Django 创建表时 “__str__ ”方法的使用
后端·python·django
涛粒子6 小时前
Spring Bean 生命周期的执行流程
java·后端·spring
钝挫力PROGRAMER7 小时前
SpringBoot中Mybatis记录执行sql日志
spring boot·sql·mybatis
赵琳琅7 小时前
Java语言的云计算
开发语言·后端·golang
赵琳琅7 小时前
MDX语言的安全开发
开发语言·后端·golang
林林总肿8 小时前
Mybatis后端数据库查询多对多查询解决方案
数据库·spring boot·mybatis
夏梓蕙8 小时前
Elixir语言的软件开发工具
开发语言·后端·golang
夏梓蕙8 小时前
R语言的Web开发
开发语言·后端·golang