ElasticSearch_仿京东搜索

前端之家收集整理的这篇文章主要介绍了ElasticSearch_仿京东搜索前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

仿京东搜索

博客内容根据狂神说整理

https://www.bilibili.com/video/BV17a4y1x7zq

1. 爬虫

首先明确数据从哪里来

爬取数据 : 获取请求返回的页面信息,筛选出屋面想要的信息就可以了!

导入依赖 JSoup,解析网页 ==> 爬电影,音乐,用tika

  1. <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
  2. <dependency>
  3. <groupId>org.jsoup</groupId>
  4. <artifactId>jsoup</artifactId>
  5. <version>1.13.1</version>
  6. </dependency>

1. 测试使用爬虫

  1. package com.wang.wangesjd.utils;
  2. import org.jsoup.Jsoup;
  3. import org.jsoup.nodes.Document;
  4. import org.jsoup.nodes.Element;
  5. import org.jsoup.select.Elements;
  6. import java.io.IOException;
  7. import java.net.URL;
  8. public class HtmlParseUtil {
  9. public static void main(String[] args) throws IOException {
  10. //获取请求 https://search.jd.com/Search?keyword=java
  11. //前提: 需要联网,而且不能获取到AJAX!
  12. String url = "https://search.jd.com/Search?keyword=java";
  13. //设置超时时间 30S
  14. int timeOut = 30000;
  15. //解析网页 ==> Document就是浏览器的Document对象
  16. Document document = Jsoup.parse(new URL(url),timeOut);
  17. //所有你在JS中可以使用的方法,这里都能用!
  18. Element element = document.getElementById("J_goodsList");
  19. // System.out.println(element.html());
  20. //获取所有的li元素
  21. Elements elements = element.getElementsByTag("li");
  22. //获取元素中的内容,这里的el就是每一个li标签
  23. for (Element el : elements) {
  24. //关于这种图片特别多的网站,所有的图片都是延迟加载的!
  25. //JD 放在了这个class data-lazy-img
  26. String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
  27. String price = el.getElementsByClass("p-price").eq(0).text();
  28. String title = el.getElementsByClass("p-name").eq(0).text();
  29. System.out.println("===================================================");
  30. System.out.println(img);
  31. System.out.println(price);
  32. System.out.println(title);
  33. }
  34. }
  35. }
  • 注意
    • JD 貌似图片使用了反爬虫技术,要获取属性名和我们在前端调试时看到的不一样...

2. 提取工具类

  1. package com.wang.wangesjd.utils;
  2. import com.wang.wangesjd.pojo.Content;
  3. import org.jsoup.Jsoup;
  4. import org.jsoup.nodes.Document;
  5. import org.jsoup.nodes.Element;
  6. import org.jsoup.select.Elements;
  7. import java.io.IOException;
  8. import java.net.URL;
  9. import java.net.URLEncoder;
  10. import java.nio.charset.StandardCharsets;
  11. import java.util.ArrayList;
  12. import java.util.List;
  13. public class HtmlParseUtil {
  14. public static void main(String[] args) throws IOException {
  15. //URL会将符号转义!
  16. // HtmlParseUtil.parseJD("C%2B%2B").forEach(System.out::println);
  17. //查询中文需要URL转码
  18. // HtmlParseUtil.parseJD("心理学").forEach(System.out::println);
  19. HtmlParseUtil.parseJD("C++").forEach(System.out::println);
  20. }
  21. public static List<Content> parseJD(String keywords) throws IOException {
  22. //URL会对符号和汉字转码
  23. //要先转码再拼接,否则URL无法解析 (因为会将url中的符号也一起转码,无法识别)
  24. String urlKeywords = URLEncoder.encode(keywords,"UTF-8");
  25. //获取请求 https://search.jd.com/Search?keyword=java
  26. //前提: 需要联网,而且不能获取到AJAX!
  27. String url ="https://search.jd.com/Search?keyword=" + urlKeywords + "&enc=utf-8";
  28. //设置超时时间 30S
  29. int timeOut = 30000;
  30. //解析网页 ==> Document就是浏览器的Document对象
  31. Document document = Jsoup.parse(new URL(url),这里都能用!
  32. Element element = document.getElementById("J_goodsList");
  33. //获取所有的li元素
  34. Elements elements = element.getElementsByTag("li");
  35. List<Content> goodsList = new ArrayList<>();
  36. //获取元素中的内容,所有的图片都是延迟加载的!
  37. //JD 放在了这个class data-lazy-img
  38. String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
  39. String price = el.getElementsByClass("p-price").eq(0).text();
  40. String title = el.getElementsByClass("p-name").eq(0).text();
  41. Content content = new Content();
  42. content.setImg(img)
  43. .setPrice(price)
  44. .setTitle(title);
  45. goodsList.add(content);
  46. }
  47. return goodsList;
  48. }
  49. }
  • 注意
    • URL解析时会转义符号和中文,因此如果我们想传递中文或者符号的关键字,需要先转义
    • 不能将拼接后的url转义,这样会导致URL中正常的符号也被转义,导致无法识别,正确的做法是先将被拼接的转义,再拼接即可

3. 编写实体类和业务层

1. 实体类

  1. package com.wang.wangesjd.pojo;
  2. import lombok.AllArgsConstructor;
  3. import lombok.Data;
  4. import lombok.NoArgsConstructor;
  5. import lombok.experimental.Accessors;
  6. @Data
  7. @AllArgsConstructor
  8. @NoArgsConstructor
  9. @Accessors(chain = true)
  10. public class Content {
  11. private String img;
  12. private String price;
  13. private String title;
  14. }

2. 业务层

这里有个小坑 ==> SpringBoot接管类,如果是静态方法,使用自动装载无法使用静态方法

  1. package com.wang.wangesjd.service;
  2. import com.alibaba.fastjson.JSON;
  3. import com.wang.wangesjd.pojo.Content;
  4. import com.wang.wangesjd.utils.HtmlParseUtil;
  5. import org.elasticsearch.action.bulk.BulkRequest;
  6. import org.elasticsearch.action.bulk.BulkResponse;
  7. import org.elasticsearch.action.index.IndexRequest;
  8. import org.elasticsearch.action.search.SearchRequest;
  9. import org.elasticsearch.action.search.SearchResponse;
  10. import org.elasticsearch.client.RequestOptions;
  11. import org.elasticsearch.client.RestHighLevelClient;
  12. import org.elasticsearch.common.text.Text;
  13. import org.elasticsearch.common.unit.TimeValue;
  14. import org.elasticsearch.common.xcontent.XContentType;
  15. import org.elasticsearch.index.query.MatchQueryBuilder;
  16. import org.elasticsearch.index.query.QueryBuilders;
  17. import org.elasticsearch.index.query.TermQueryBuilder;
  18. import org.elasticsearch.search.SearchHit;
  19. import org.elasticsearch.search.builder.SearchSourceBuilder;
  20. import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
  21. import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
  22. import org.springframework.beans.factory.annotation.Autowired;
  23. import org.springframework.beans.factory.annotation.Qualifier;
  24. import org.springframework.stereotype.Service;
  25. import java.io.IOException;
  26. import java.util.ArrayList;
  27. import java.util.List;
  28. import java.util.Map;
  29. @Service
  30. public class ContentService {
  31. @Autowired
  32. @Qualifier("restHighLevelClient")
  33. private RestHighLevelClient rest;
  34. @Autowired
  35. private HtmlParseUtil htmlParseUtil;
  36. //解析数据,放入ES索引中
  37. public Boolean parseContent(String keywords) throws IOException {
  38. List<Content> contents = htmlParseUtil.parseJD(keywords);
  39. //把查询的数据放入ES中
  40. BulkRequest bulkRequest = new BulkRequest();
  41. bulkRequest.timeout(TimeValue.timeValueMinutes(2L));
  42. for (int i = 0; i < contents.size(); i++) {
  43. bulkRequest.add(new IndexRequest("jd_goods")
  44. .source(JSON.toJSONString(contents.get(i)),XContentType.JSON));
  45. }
  46. BulkResponse bulk = rest.bulk(bulkRequest,RequestOptions.DEFAULT);
  47. return !bulk.hasFailures();
  48. }
  49. //获取这些数据(从ES索引中),实现搜索功能
  50. // public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
  51. // if (pageNo <= 1) {
  52. // pageNo = 1;
  53. // }
  54. // //条件搜索
  55. // SearchRequest searchRequest = new SearchRequest("jd_goods");
  56. // SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  57. // //精准匹配
  58. // MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title",keyword);
  59. // sourceBuilder.query(matchQueryBuilder)
  60. // .timeout(TimeValue.timeValueMinutes(1L));
  61. // //分页
  62. // sourceBuilder.from(pageNo)
  63. // .size(pageSize);
  64. // //执行搜索
  65. // searchRequest.source(sourceBuilder);
  66. // SearchResponse searchResponse = rest.search(searchRequest,RequestOptions.DEFAULT);
  67. // //解析结果
  68. // List<Map<String,Object>> list = new ArrayList<>();
  69. // for (SearchHit documentFields : searchResponse.getHits()) {
  70. // list.add(documentFields.getSourceAsMap());
  71. // }
  72. // return list;
  73. // }
  74. //实现搜索高亮
  75. public List<Map<String,int pageSize) throws IOException {
  76. if (pageNo <= 1) {
  77. pageNo = 1;
  78. }
  79. //条件搜索
  80. SearchRequest searchRequest = new SearchRequest("jd_goods");
  81. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  82. //精准匹配
  83. MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("title",keyword);
  84. sourceBuilder.query(matchQueryBuilder)
  85. .timeout(TimeValue.timeValueMinutes(1L));
  86. //高亮
  87. HighlightBuilder highlightBuilder = new HighlightBuilder();
  88. //定义要高亮的标签和样式
  89. highlightBuilder.field("title")
  90. .preTags("<span style='color:red'>")
  91. .postTags("</span>")
  92. .requireFieldMatch(false); //是否需要高亮多个字段
  93. sourceBuilder.Highlighter(highlightBuilder);
  94. //分页
  95. sourceBuilder.from(pageNo)
  96. .size(pageSize);
  97. //执行搜索
  98. searchRequest.source(sourceBuilder);
  99. SearchResponse searchResponse = rest.search(searchRequest,RequestOptions.DEFAULT);
  100. //解析结果
  101. List<Map<String,Object>> list = new ArrayList<>();
  102. for (SearchHit documentFields : searchResponse.getHits()) {
  103. //解析高亮的字段
  104. Map<String,HighlightField> highlightFields = documentFields.getHighlightFields();
  105. HighlightField title = highlightFields.get("title");
  106. Map<String,Object> sourceAsMap = documentFields.getSourceAsMap(); //这里是原来的结果(不含高亮)
  107. if(title != null) {
  108. Text[] fragments = title.fragments();
  109. String highlightTitle = "";
  110. for (Text text : fragments) {
  111. highlightTitle += text;
  112. }
  113. //将高亮字段替换没有高亮的字段
  114. sourceAsMap.put("title",highlightTitle);
  115. }
  116. list.add(sourceAsMap);
  117. }
  118. return list;
  119. }
  120. }
@H_403_120@4. 页面跳转
  1. package com.wang.wangesjd.controller;
  2. import com.wang.wangesjd.service.ContentService;
  3. import org.springframework.beans.factory.annotation.Autowired;
  4. import org.springframework.web.bind.annotation.GetMapping;
  5. import org.springframework.web.bind.annotation.PathVariable;
  6. import org.springframework.web.bind.annotation.RestController;
  7. import java.io.IOException;
  8. import java.util.List;
  9. import java.util.Map;
  10. @RestController
  11. public class ContentController {
  12. @Autowired
  13. private ContentService contentService;
  14. @GetMapping("/parse/{keyword}")
  15. public Boolean parse(@PathVariable String keyword) throws IOException {
  16. return contentService.parseContent(keyword);
  17. }
  18. @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
  19. public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,@PathVariable("pageNo") int pageNo,@PathVariable("pageSize") int pageSize) throws IOException {
  20. return contentService.searchPage(keyword,pageNo,pageSize);
  21. }
  22. }

使用RestFul风格

5. 前端

  1. <!DOCTYPE html>
  2. <html xmlns:th="http://www.thymeleaf.org">
  3. <head>
  4. <Meta charset="utf-8"/>
  5. <title>ES仿京东实战</title>
  6. <link rel="stylesheet" th:href="@{/css/style.css}"/>
  7. <script src="https://unpkg.com/axios/dist/axios.min.js"></script>
  8. <script src="https://cdn.jsdelivr.net/npm/vue/dist/vue.js"></script>
  9. </head>
  10. <body class="pg">
  11. <div class="page" id="app">
  12. <div id="mallPage" class=" mallist tmall- page-not-market ">
  13. <!-- 头部搜索 -->
  14. <div id="header" class=" header-list-app">
  15. <div class="headerLayout">
  16. <div class="headerCon ">
  17. <!-- logo-->
  18. <h1 id="malllogo">
  19. <img th:src="@{/images/jdlogo.png}" alt="">
  20. </h1>
  21. <div class="header-extra">
  22. <!--搜索-->
  23. <div id="mallSearch" class="mall-search">
  24. <form name="searchTop" class="mallSearch-form clearfix">
  25. <fieldset>
  26. <legend>天猫搜索</legend>
  27. <div class="mallSearch-input clearfix">
  28. <div class="s-comboBox" id="s-comboBox-685">
  29. <div class="s-comboBox-input-wrap">
  30. <input v-model="keyword" type="text" autocomplete="off" value="dd"
  31. id="mq"
  32. class="s-comboBox-input" aria-haspopup="true">
  33. </div>
  34. </div>
  35. <button type="submit" @click.prevent="searchKey" id="searchbtn">搜索</button>
  36. </div>
  37. </fieldset>
  38. </form>
  39. <ul class="relKeyTop">
  40. <li><a>狂神说Java</a></li>
  41. <li><a>狂神说前端</a></li>
  42. <li><a>狂神说Linux</a></li>
  43. <li><a>狂神说大数据</a></li>
  44. <li><a>狂神聊理财</a></li>
  45. </ul>
  46. </div>
  47. </div>
  48. </div>
  49. </div>
  50. </div>
  51. <!-- 商品详情页面 -->
  52. <div id="content">
  53. <div class="main">
  54. <!-- 品牌分类 -->
  55. <form class="navAttrsForm">
  56. <div class="attrs j_NavAttrs" style="display:block">
  57. <div class="brandAttr j_nav_brand">
  58. <div class="j_Brand attr">
  59. <div class="attrKey">
  60. 品牌
  61. </div>
  62. <div class="attrValues">
  63. <ul class="av-collapse row-2">
  64. <li><a href="#"> 狂神说 </a></li>
  65. <li><a href="#"> Java </a></li>
  66. </ul>
  67. </div>
  68. </div>
  69. </div>
  70. </div>
  71. </form>
  72. <!-- 排序规则 -->
  73. <div class="filter clearfix">
  74. <a class="fSort fSort-cur">综合<i class="f-ico-arrow-d"></i></a>
  75. <a class="fSort">人气<i class="f-ico-arrow-d"></i></a>
  76. <a class="fSort">新品<i class="f-ico-arrow-d"></i></a>
  77. <a class="fSort">销量<i class="f-ico-arrow-d"></i></a>
  78. <a class="fSort">价格<i class="f-ico-triangle-mt"></i><i class="f-ico-triangle-mb"></i></a>
  79. </div>
  80. <!-- 商品详情 -->
  81. <div class="view grid-nosku">
  82. <div class="product" v-for="result in results">
  83. <div class="product-iWrap">
  84. <!--商品封面-->
  85. <div class="productImg-wrap">
  86. <a class="productImg">
  87. <img :src="result.img">
  88. </a>
  89. </div>
  90. <!--价格-->
  91. <p class="productPrice">
  92. <em><b>¥</b>{{result.price}}</em>
  93. </p>
  94. <!--标题,我们传递的是一个html-->
  95. <p class="productTitle">
  96. <a v-html="result.title"></a>
  97. </p>
  98. <!-- 店铺名 -->
  99. <div class="productShop">
  100. <span>店铺: 狂神说Java </span>
  101. </div>
  102. <!-- 成交信息 -->
  103. <p class="productStatus">
  104. <span>月成交<em>999笔</em></span>
  105. <span>评价 <a>3</a></span>
  106. </p>
  107. </div>
  108. </div>
  109. </div>
  110. </div>
  111. </div>
  112. </div>
  113. </div>
  114. <!--前端使用Vue,实现前后端分离-->
  115. <script>
  116. new Vue({
  117. el: '#app',data: {
  118. keyword: '',//搜索的关键字
  119. results: [] //搜索的结果
  120. },methods: {
  121. searchKey() {
  122. let keyword = this.keyword;
  123. console.log(keyword);
  124. //对接后端的接口
  125. axios.get('search/' + keyword + "/0/10").then(response => {
  126. console.log(response.data);
  127. this.results = response.data; //绑定数据
  128. })
  129. }
  130. }
  131. })
  132. </script>
  133. </body>
  134. </html>
  • 注意
    • 由于后端高亮传回来的是一个html,我们需要解析,不能单纯的双向绑定,因此用 v-html

猜你在找的Elasticsearch相关文章