ElasticSearch仿京东搜索

一:爬取京东数据

复制代码
package com.esjd.Utils;

import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.MalformedURLException;
import java.net.URL;

public class HtmlParseUtil {
    @SneakyThrows
    public static void main(String[] args)   {
        //获取请求 https://search.jd.com/Search?keyword=java 需要联网
        String url = "https://search.jd.com/Search?keyword=java";


        //解析网页 jsoup返回的 Document就是游览器 Document对象
        Document document = Jsoup.parse(new URL(url),30000);

        //获取网页idJ_goodsList
        Element element = document.getElementById("J_goodsList");
      System.out.println(element.html());
        //获取所有的li元素
        Elements elements = document.getElementsByTag("li");
        for (Element element1 : elements) {
                String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
                String price = element1.getElementsByClass("p-price").eq(0).text();
                String title = element1.getElementsByClass("p-name").eq(0).text();
                System.out.println("______________________________________--");
                System.out.println(img);
                System.out.println(price);
                System.out.println(title);
        }
    }
}
封装成工具类
复制代码
@SneakyThrows
public List<Content> paresJD(String keyword){
    //获取请求 https://search.jd.com/Search?keyword=java 需要联网
    String urlKeywords = URLEncoder.encode(keyword, "UTF-8");

    //获取请求 https://search.jd.com/Search?keyword=java
    //前提: 需要联网, 而且不能获取到AJAX!
    String url ="https://search.jd.com/Search?keyword=" + urlKeywords + "&enc=utf-8";


    //解析网页 jsoup返回的 Document就是游览器 Document对象
    Document document = Jsoup.parse(new URL(url),30000);

    //获取网页idJ_goodsList
    Element element = document.getElementById("J_goodsList");
    //System.out.println(element.html());
    //获取所有的li元素
    Elements elements = document.getElementsByTag("li");


    ArrayList<Content>  goodsList = new ArrayList<>();
    for (Element element1 : elements) {
        if (element1.attr("class").equalsIgnoreCase("gl-item")) {
            String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = element1.getElementsByClass("p-price").eq(0).text();
            String title = element1.getElementsByClass("p-name").eq(0).text();
       Content content = new Content();
        content.setTitle(title);
        content.setPrice(price);
        content.setImg(img);
        goodsList.add(content);
    }
    }
    return goodsList;
}
编写pojo类
复制代码
@Data
@AllArgsConstructor
@NoArgsConstructor

public class Content {
    //根据业务需求自己添加属性
    private  String title;
    private String img;
    private String price;

}
解析数据到es中
复制代码
  @Autowired
//  不能直接使用     @Autowired 需要spring容器
  private RestHighLevelClient restHighLevelClient;


  //解析数据放入es中
  public Boolean  parseContent(String keywords) throws IOException {
      List<Content> contents = new HtmlParseUtil().paresJD(keywords);
      //把查询的数据放入es中
      BulkRequest bulkRequest = new BulkRequest();
      bulkRequest.timeout("2m");
      for (int i = 0; i < contents.size(); i++) {
          System.out.println(JSON.toJSONString(contents.get(i)));
          bulkRequest.add(new IndexRequest("jd_goods")
                      .source(JSON.toJSONString(
                              contents.get(i)), XContentType.JSON));

      }

      BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
      //判断返回是否成功
      return  !bulk.hasFailures();

  }}
对应的controller接口
复制代码
@Autowired
private  ContentService contentService;


//爬取数据到es中
@GetMapping("/pares/{keyword}")
public Boolean pares(@PathVariable("keyword") String  keyword) throws IOException{
     return  contentService.parseContent(keyword);
}
二:前后端分离进行搜索实现

搜索实现和搜索高亮实现

新建前端模板进行请求接口编写

复制代码
new Vue({
     el:"#app",
    data:{
         keyword: '',
        results: []
    },
    methods:{
         searchKey(){
             var keyword = this.keyword;
             console.log(keyword);
             //搜索分页
             // axios.get("search/"+keyword+"/1/10").then(response =>{
             //     console.log(response);
             //     //绑定数据
             //     this.results = response.data;
             // })
             //实现搜索高亮
             axios.get("/HighlightBuilder/"+keyword+"/1/10").then(response =>{
                 console.log(response);
                 //绑定数据
                 this.results = response.data;
             })
         }
    }
})
编写service层
复制代码
  //2. 获取这些数据实现搜索功能
    public List<Map<String ,Object>> searchPage(String keyword ,int  pageNo,int pageSize) throws IOException {
        if(pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();


        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);

        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));



        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        ArrayList<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit documentFields : searchResponse.getHits().getHits()) {
            //把所有结果遍历出来然后封装到list集合里面
           list.add( documentFields.getSourceAsMap());
        }


        return  list;
    }

    //2. 获取这些数据实现搜索高亮功能
    public List<Map<String ,Object>> searchHighlightBuilder(String keyword ,int  pageNo,int pageSize) throws IOException {
        if(pageNo<=1){
            pageNo = 1;
        }

        //条件搜索
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();


        //分页
        sourceBuilder.from(pageNo);
        sourceBuilder.size(pageSize);
        //精准匹配
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);

        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));

        //高亮
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置标题高亮
        highlightBuilder.field("title");
        //关闭多个高亮字段显示
        //highlightBuilder.requireFieldMatch(true);
        //设置高亮样式
        highlightBuilder.preTags("<span style='color:red'>");
        highlightBuilder.postTags("</span>");
        sourceBuilder.highlighter(highlightBuilder);



        //执行搜索
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        //解析结果
        ArrayList<Map<String,Object>> list = new ArrayList<>();
        for (SearchHit hit : searchResponse.getHits().getHits()) {


            //解析高亮的字段
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            //获取标题
            HighlightField title = highlightFields.get("title");
            //原来的结果
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();
            //解析高亮字段 把原先的字段替换为高亮字段
            if (title!= null){
                Text[] fragments = title.fragments();
                StringBuilder n_title = new StringBuilder();
                for (Text text : fragments) {
                    n_title.append(text);
                }
                sourceAsMap.put("title", n_title.toString());


            }
//            if (title!= null){
//                Text[] fragments = title.fragments();
//                String n_title = "";
//                for (Text text : fragments) {
//                    n_title+= text;
//                }
//                sourceAsMap.put("title",n_title);
//
//
//            }

            //把所有结果遍历出来然后封装到list集合里面
            list.add(sourceAsMap);

        }


        return  list;
    }
实现的接口controller
复制代码
//对数据进行分页
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                       @PathVariable("pageNo") int pageNo,
                                       @PathVariable("pageSize")int pageSize) throws IOException {

    return  contentService.searchPage(keyword, pageNo, pageSize);
}

//高亮
@GetMapping("HighlightBuilder/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> searchHighlightBuilder(@PathVariable("keyword") String keyword,
                                       @PathVariable("pageNo") int pageNo,
                                       @PathVariable("pageSize")int pageSize) throws IOException {

    return  contentService.searchHighlightBuilder(keyword, pageNo, pageSize);
}

null){

Text[] fragments = title.fragments();

StringBuilder n_title = new StringBuilder();

for (Text text : fragments) {

n_title.append(text);

}

sourceAsMap.put("title", n_title.toString());

复制代码
        }

// if (title!= null){

// Text[] fragments = title.fragments();

// String n_title = "";

// for (Text text : fragments) {

// n_title+= text;

// }

// sourceAsMap.put("title",n_title);

//

//

// }

复制代码
        //把所有结果遍历出来然后封装到list集合里面
        list.add(sourceAsMap);

    }


    return  list;
}


#### 实现的接口controller

//对数据进行分页

@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")

public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,

@PathVariable("pageNo") int pageNo,

@PathVariable("pageSize")int pageSize) throws IOException {

复制代码
return  contentService.searchPage(keyword, pageNo, pageSize);

}

//高亮

@GetMapping("HighlightBuilder/{keyword}/{pageNo}/{pageSize}")

public List<Map<String,Object>> searchHighlightBuilder(@PathVariable("keyword") String keyword,

@PathVariable("pageNo") int pageNo,

@PathVariable("pageSize")int pageSize) throws IOException {

复制代码
return  contentService.searchHighlightBuilder(keyword, pageNo, pageSize);

}

复制代码
相关推荐
vvilkim12 分钟前
Uniapp H5端SEO优化全攻略:提升搜索引擎排名与流量
搜索引擎·uni-app
明似水15 分钟前
Perplexity AI:对话式搜索引擎的革新者与未来认知操作系统
人工智能·搜索引擎
格格步入2 小时前
Redis 如何模糊搜索 Scan 😂😂😂
redis·后端·搜索引擎
终不悔2 小时前
Elasticsearch新手入门与性能优化实战
elasticsearch
Elastic 中国社区官方博客3 小时前
通过 AIOps 、生成式 AI 和机器学习实现更智能的可观测性
大数据·人工智能·elasticsearch·机器学习·搜索引擎·ai·可用性测试
静听山水3 小时前
Hologres 使用 FDW
大数据
Edingbrugh.南空4 小时前
Flink Connector Kafka深度剖析与进阶实践指南
大数据·flink·kafka
测试专家4 小时前
ARINC653分区调度算法的研究与改进
大数据·运维·网络·安全
远方16094 小时前
61-Oracle SQL Monitor-实操
大数据·数据库·sql·oracle·database
云云3214 小时前
Subway Surfers Blast × 亚矩阵云手机:手游矩阵运营的终极变现方案
大数据·人工智能·线性代数·智能手机·矩阵·架构