【C++|Linux|计网】构建Boost站内搜索引擎的技术实践与探索

#include <iostream>
#include <string>
#include <vector>
#include<boost/filesystem.hpp>
#include"util.hpp"

const std::string src_path = "data/input/";        //这⾥放的是原始的html⽂档 
const std::string output = "data/raw_html/raw.txt";//这是放的是去标签之后的⼲净⽂档 


typedef struct DocInfo
{
    std::string title;  // 文档的标题
    std::string contnt; // 文档内容
    std::string url;    // 该文档在官网中的url
} DocInfo_t;

// const &: 输⼊
//*: 输出
//&：输⼊输出
bool EnumFile(const std::string &src_path, std::vector<std::string>*files_list);
bool ParseHtml(const std::vector<std::string> &files_list,std::vector<DocInfo_t> *results);
bool SaveHtml(const std::vector<DocInfo_t> &results, const std::string &output);

int main()
{
    std::vector<std::string> files_list;
    // 第⼀步: 递归式的把每个html⽂件名带路径，保存到files_list中，⽅便后期进⾏⼀个⼀个的⽂件进⾏读取
    if (!EnumFile(src_path, &files_list))
    {
        std::cerr << "enum file name error!" << std::endl;
        return 1;
    }
    // 第⼆步: 按照files_list读取每个⽂件的内容，并进⾏解析
    std::vector<DocInfo_t> results;
    if (!ParseHtml(files_list, &results))
    {
        std::cerr << "parse html error" << std::endl;
        return 2;
    }
    // 第三步: 把解析完毕的各个⽂件内容，写⼊到output,按照\3作为每个⽂档的分割符
    if (!SaveHtml(results, output))
    {
        std::cerr << "sava html error" << std::endl;
        return 3;
    }

    return 0;
}

EnumFile函数的实现：

cpp 复制代码

bool EnumFile(const std::string &src_path, std::vector<std::string>*files_list)
{
    namespace fs = boost::filesystem;
    fs::path root_path(src_path);
    //判断路径是否存在，不存在，就没有必要往后走了
    if(!fs::exists(root_path))
    {
        std::cerr << src_path << "not exists" << std::endl;
        return false;
    }

    //定义一个空的迭代器，用来判断递归结束
    fs::recursive_directory_iterator end;
    for(fs::recursive_directory_iterator iter(root_path); iter != end; iter++)
    {
        //判断文件是否是普通文件，html都是普通文件
        if(!fs::is_regular_file(*iter))
        {
            continue;
        }
        if(iter->path().extension() != ".html")//判断文件名的后缀是否符合要求
        {
            continue;
        }
        //std::cout << "debug: " << iter->path().string() << std::endl;
        //当前的路径一定是一个合法的，以.html结束的普通网页文件
        files_list->push_back(iter->path().string());//将所有带路径的html保存在file_list，方便后续进行文本分析
    }
    return true;
}

EnumFile测试结果

如下，可以把所有的.html网页输出出来了

我们提取网页中的title和content都比较简单。

提取title是直接在网页内容中查找<title>,然后进行字符串的截取即可。

cpp 复制代码

bool ParseTitle(const std::string &file, std::string *title)
{
    std::size_t begin = file.find("<title>");
    if(begin == std::string::npos)
    {
        return false;
    }
    std::size_t end = file.find("</title>");
    if(end == std::string::npos)
    {
        return false;
    }
    begin += std::string("<title>").size();
    if(begin > end)
    {
        return false;
    }
    *title = file.substr(begin, end - begin);
    return true;
}

提取content就是一个去标签的过程，我们这里采用的是基于简单的状态机进行去标签。

cpp 复制代码

bool ParseContent(const std::string &file, std::string *content)
{
    //去标签，基于一个简易的状态机
    enum status
    {
        LABLE,
        CONTENT
    };

    enum status s = LABLE;
    for(char c : file)
    {
        switch(s)
        {
            case LABLE:
                if(c == '>') s = CONTENT;
                break;
            case CONTENT:
                if(c == '<') s = LABLE;
                else
                {
                    //我们不想保留原始文件中的\n，因为我们想用\n作为html解析之后文本的分隔符
                    if(c == '\n') c = ' ';
                    content->push_back(c);
                }
                break;
            default:
                break;
        }
    }

    return true;
}

如何提取网页的url呢？

boost库的官方文档，和我们下载下来的文档，是有路径的对应关系的

官网URL样例:

https://www.boost.org/doc/libs/1_86_0/doc/html/accumulators.html

我们下载下来的url样例:boost/1_86_0/doc/html/accumulators.html

我们拷贝到我们项目中的样例:data/input/accumulators.html //我们把下载下来的boost库doc/html/* copy data/input/

url head ="https://www.boost.org/doc/libs/1_86_0/doc/html";

url tail = [data/input](删除)/accumulators.html -> url tail =/accumulators.html
url = url_head + url_tail ;相当于形成了一个官网链接！

cpp 复制代码

bool ParseUrl(const std::string &file_path ,std::string *url)
{
    std::string url_head = "https://www.boost.org/doc/libs/1_86_0/doc/html/";
    std::string url_tail = file_path.substr(src_path.size());
    *url = url_head + url_tail;
    return true;
}

将解析内容写入文件中：

cpp 复制代码

bool SaveHtml(const std::vector<DocInfo_t> &results, const std::string &output)
{
#define SEP '\3'
    //按照二进制方式进行写入
    std::ofstream out (output, std::ios::out | std::ios::binary);
    if(!out.is_open())
    {
        std::cerr << "open " << output << " failed!" << std::endl;
        return false;
    }

    //可以开始进行文件内容的写入了
    for(auto &item : results)
    {
        std::string out_string;
        out_string = item.title;
        out_string += SEP;
        out_string += item.contnt;
        out_string += SEP;
        out_string += item.url;
        out_string += '\n';
        out.write(out_string.c_str(), out_string.size());
    }
    return true;
}

测试解析网页title，content，url是否正确？

vim data/input/mpi/history.html

在自己下载的文件里面进行验证，发现正确，没问题！

在网站中验证，也没问题！

最后将测试将结果内容填充到raw.txt

6.编写建立索引的模块index

6.1.index模块的基本框架：

cpp 复制代码

#pragma once

#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <mutex>
#include <unordered_map>
#include "util.hpp"

namespace ns_index
{
    struct DocInfo
    {
        std::string title;   // 文档的标题
        std::string content; // 文档对应的去标签之后的内容
        std::string url;     // 官网文档url
        uint64_t doc_id;     // 文档的ID，暂时先不做过多理解
    };

    struct InvertedElem
    {
        uint64_t doc_id;
        std::string word;
        int weight; // 权重
    };

    // 倒排拉链
    typedef std::vector<InvertedElem> InvertedList;

    class Index
    {
    private:
        // 正排索引的数据结构用数组，数组的下标天然是文档的ID
        std::vector<DocInfo> forward_index; // 正排索引
        // 倒排索引一定是一个关键字和一组（个）InvertedElem对应[关键字和倒排拉链的映射关系]
        std::unordered_map<std::string, InvertedList> inverted_index; // 倒排索引

    private:
        Index() {} // 但是一定要有函数体，不能delete
        Index(const Index &) = delete;
        Index &operator=(const Index &) = delete;

        static Index *instance;
        static std::mutex mtx;

    public:
        ~Index() {}

    public:
        static Index *GetInstance()
        {
            if (nullptr == instance)
            {
                mtx.lock();
                if (nullptr == instance)
                {
                    instance = new Index();
                }
                mtx.unlock();
            }
            return instance;
        }

    public:
        // 根据doc_id找到文档内容
        DocInfo *GetForwardIndex(uint64_t doc_id)
        {
            if (doc_id >= forward_index.size())
            {
                std::cerr << "doc_id out range, error!" << std::endl;
                return nullptr;
            }
            return &forward_index[doc_id];
        }
        // 根据关键字string，获得倒排拉链
        InvertedList *GetInvertedList(const std::string &word)
        {
            auto iter = inverted_index.find(word);
            if (iter == inverted_index.end())
            {
                std::cerr << word << " have no InvertedList" << std::endl;
                return nullptr;
            }
            return &(iter->second);
        }
        // 根据去标签，格式化之后的文档，构建正排和倒排索引
        bool BuildIndex(const std::string &input) // parse处理完毕的数据交给我
        {
            std::ifstream in(input, std::ios::in | std::ios::binary);
            if (!in.is_open())
            {
                std::cerr << "sorry, " << input << " open error" << std::endl;
                return false;
            }

            std::string line;
            int count = 0;
            while (std::getline(in, line))
            {
                DocInfo *doc = BuildForwardIndex(line);
                if (nullptr == doc)
                {
                    std::cerr << "build " << line << " error" << std::endl; // for deubg
                    continue;
                }

                BuildInvertedIndex(*doc);
                count++;
                if(count % 50 == 0){
                std::cout <<"当前已经建立的索引文档: " << count <<std::endl;
                // LOG(NORMAL, "当前的已经建立的索引文档: " + std::to_string(count));
                }
            }
            return true;
        }

6.2.建立正派索引：

cpp 复制代码

DocInfo *BuildForwardIndex(const std::string &line)
        {
            // 1. 解析line，字符串切分
            // line -> 3 string, title, content, url
            std::vector<std::string> results;
            const std::string sep = "\3"; // 行内分隔符
            ns_util::StringUtil::Split(line, &results, sep);
            // ns_util::StringUtil::CutString(line, &results, sep);
            if (results.size() != 3)
            {
                return nullptr;
            }
            // 2. 字符串进行填充到DocIinfo
            DocInfo doc;
            doc.title = results[0];            // title
            doc.content = results[1];          // content
            doc.url = results[2];              /// url
            doc.doc_id = forward_index.size(); // 先进行保存id，在插入，对应的id就是当前doc在vector中的下标!
            // 3. 插入到正排索引的vector
            forward_index.push_back(std::move(doc)); // doc,html文件内容
            return &forward_index.back();
        }

这里正排索引在切分字符串的时候，我采用了boost库中的split函数

cpp 复制代码

    class StringUtil
    {
    public:
        static void Split(const std::string &target, std::vector<std::string> *out, const std::string &sep)
        {
            // boost split
            boost::split(*out, target, boost::is_any_of(sep), boost::token_compress_on);
        }
    };

split()函数具体使用说明：

boost 库中split函数用来字符串的切割

引用的头文件 <boost/algorithm/string.hpp>

boost::split()函数用于切割string字符串，将切割之后的字符串放到一个std::vector<std::string> 之中；

有4个参数：

以boost::split(type, select_list, boost::is_any_of(","), boost::token_compress_on);

(1)、type类型是std::vector<std::string>，用于存放切割之后的字符串

(2)、select_list：传入的字符串，可以为空。

(3)、boost::is_any_of(",")：设定切割符为,(逗号)

(4)、 boost::token_compress_on：将连续多个分隔符默认为压缩一个！默认没有打开，当用的时候一般是要打开的。

测试代码：

最后输出就是三部分，没有空格！

6.3.建立倒排索引：

需要对 title && content都要先分词 --使⽤jieba分词，并且搜索内容不区分大小写，统一变成小写。

使用jieba的时候有一个坑，需要我们手动将limonp这个头文件拷贝到include头文件当中，不然编译会报错！

cpp 复制代码

        bool BuildInvertedIndex(const DocInfo &doc)
        {
            // DocInfo{title, content, url, doc_id}
            // word -> 倒排拉链
            struct word_cnt
            {
                int title_cnt;
                int content_cnt;

                word_cnt() : title_cnt(0), content_cnt(0) {}
            };
            std::unordered_map<std::string, word_cnt> word_map; // 用来暂存词频的映射表

            // 对标题进行分词
            std::vector<std::string> title_words;
            ns_util::JiebaUtil::CutString(doc.title, &title_words);

            // if(doc.doc_id == 1572){
            //     for(auto &s : title_words){
            //         std::cout << "title: " << s << std::endl;
            //     }
            // }

            // 对标题进行词频统计
            for (std::string s : title_words)
            {
                boost::to_lower(s);      // 需要统一转化成为小写
                word_map[s].title_cnt++; // 如果存在就获取，如果不存在就新建
            }

            // 对文档内容进行分词
            std::vector<std::string> content_words;
            ns_util::JiebaUtil::CutString(doc.content, &content_words);
            // if(doc.doc_id == 1572){
            //     for(auto &s : content_words){
            //         std::cout << "content: " << s << std::endl;
            //     }
            // }

            // 对内容进行词频统计
            for (std::string s : content_words)
            {
                boost::to_lower(s);
                word_map[s].content_cnt++;
            }

#define X 10
#define Y 1
            // Hello,hello,HELLO
            for (auto &word_pair : word_map)
            {
                InvertedElem item;
                item.doc_id = doc.doc_id;
                item.word = word_pair.first;
                item.weight = X * word_pair.second.title_cnt + Y * word_pair.second.content_cnt; // 相关性
                InvertedList &inverted_list = inverted_index[word_pair.first];
                inverted_list.push_back(std::move(item));
            }

            return true;
        }
    };

7.编写搜索引擎模块searcher

7.1.基本代码框架：

cpp 复制代码

#include "index.hpp"
namespace ns_searcher{
 class Searcher{
 private:
 ns_index::Index *index; //供系统进⾏查找的索引 
 public:
 Searcher(){}
 ~Searcher(){}
 public:
 void InitSearcher(const std::string &input)
{
 //1. 获取或者创建index对象 
 //2. 根据index对象建⽴索引 
 }
 //query: 搜索关键字 
 //json_string: 返回给⽤⼾浏览器的搜索结果 
 void Search(const std::string &query, std::string *json_string)
 {
 //1.[分词]:对我们的query进⾏按照searcher的要求进⾏分词 
 //2.[触发]:就是根据分词的各个"词"，进⾏index查找 
 //3.[合并排序]：汇总查找结果，按照相关性(weight)降序排序 
 //4.[构建]:根据查找出来的结果，构建json串 -- jsoncpp 
 }
 };
}

7.2.建立摘要

为什么要建立摘要？

因为我们正常在搜索引擎搜到的内容，是不可能将网页的一整个内容显示给客户的，一定要将网页的摘要返回给客户，相当于提炼出主旨，那我们怎么实现呢？

找到word在html_content中的首次出现，然后往前找50字节(如果没有，从begin开始)，往后找100字节(如果没有，到end就可以的)

注意定义start和end双指针的时候，要注意size_t类型与int类型的符号比较，很容易出错！

由于size_t是无符号类型，如果使用不当（比如使用负数做运算），可能会导致意想不到的结果。例如，将负数赋值给size_t会导致它变成一个很大的正数。

代码：

cpp 复制代码

std::string GetDesc(const std::string &html_content, const std::string &word)
            {
                //找到word在html_content中的首次出现，然后往前找50字节(如果没有，从begin开始)，往后找100字节(如果没有，到end就可以的)
                //截取出这部分内容
                const int prev_step = 50;
                const int next_step = 100;
                //1. 找到首次出现
                //不能使用find查找，可能因为大小写不匹配而报错
                auto iter = std::search(html_content.begin(), html_content.end(), word.begin(), word.end(), [](int x, int y){
                        return (std::tolower(x) == std::tolower(y));
                        });
                if(iter == html_content.end()){
                    return "None1";
                }
                int pos = std::distance(html_content.begin(), iter);

                //2. 获取start，end , std::size_t 无符号整数
                int start = 0; 
                int end = html_content.size() - 1;
                //如果之前有50+字符，就更新开始位置
                if(pos > start + prev_step) start = pos - prev_step;
                if(pos < end - next_step) end = pos + next_step;

                //3. 截取子串,return
                if(start >= end) return "None2";
                std::string desc = html_content.substr(start, end - start);
                desc += "...";
                return desc;
            }

问题：搜索结果出现重复文档的问题

比如我们在搜索"你是一个好人"时，jieba会将该语句分解为你/一个/好人/一个好人，在建立图的时候，可能会指向同一个文档，导致我们在搜索的时候会出现重复的结果。

现象：

我们将一个boost库中的文档修改内容为"你是一个好人"，我们在搜索你是一个好人的时候就会出现重复结果：

所以我们要做去重操作，如何判断相同呢？直接看文档id即可。并且要将权值修改，我们应该将搜索到的相同内容进行权值的累加，作为该文档的真正权值！

去重之后的效果：

7.3.修改后去重的代码：

cpp 复制代码

#pragma once

#include "index.hpp"
#include "util.hpp"
#include "log.hpp"
#include <algorithm>
#include <unordered_map>
#include <jsoncpp/json/json.h>

namespace ns_searcher{

    struct InvertedElemPrint{
        uint64_t doc_id;
        int weight;
        std::vector<std::string> words;
        InvertedElemPrint():doc_id(0), weight(0){}
    };

    class Searcher
    {
        private:
            ns_index::Index *index; //供系统进行查找的索引
        public:
            Searcher(){}
            ~Searcher(){}
        public:
            void InitSearcher(const std::string &input)
            {
                //1. 获取或者创建index对象
                index = ns_index::Index::GetInstance();
                std::cout << "获取index单例成功..." << std::endl;
                //LOG(NORMAL, "获取index单例成功...");
                //2. 根据index对象建立索引
                index->BuildIndex(input);
                std::cout << "建立正排和倒排索引成功..." << std::endl;
                //LOG(NORMAL, "建立正排和倒排索引成功...");
            }
            //query: 搜索关键字
            //json_string: 返回给用户浏览器的搜索结果
            void Search(const std::string &query, std::string *json_string)
            {
                //1.[分词]:对我们的query进行按照searcher的要求进行分词
                std::vector<std::string> words;
                ns_util::JiebaUtil::CutString(query, &words);
                //2.[触发]:就是根据分词的各个"词"，进行index查找,建立index是忽略大小写，所以搜索，关键字也需要
                //ns_index::InvertedList inverted_list_all; //内部InvertedElem
                std::vector<InvertedElemPrint> inverted_list_all;

                std::unordered_map<uint64_t, InvertedElemPrint> tokens_map;

                for(std::string word : words){
                    boost::to_lower(word);

                    ns_index::InvertedList *inverted_list = index->GetInvertedList(word);
                    if(nullptr == inverted_list){
                        continue;
                    }
                    //不完美的地方： 你/是/一个/好人 100
                    //inverted_list_all.insert(inverted_list_all.end(), inverted_list->begin(), inverted_list->end());
                    for(const auto &elem : *inverted_list){
                        auto &item = tokens_map[elem.doc_id]; //[]:如果存在直接获取，如果不存在新建
                        //item一定是doc_id相同的print节点
                        item.doc_id = elem.doc_id;
                        item.weight += elem.weight;
                        item.words.push_back(elem.word);
                    }
                }
                for(const auto &item : tokens_map){
                    inverted_list_all.push_back(std::move(item.second));
                }

                //3.[合并排序]：汇总查找结果，按照相关性(weight)降序排序
                //std::sort(inverted_list_all.begin(), inverted_list_all.end(),\
                //      [](const ns_index::InvertedElem &e1, const ns_index::InvertedElem &e2){
                //        return e1.weight > e2.weight;
                //        });
                  std::sort(inverted_list_all.begin(), inverted_list_all.end(),\
                          [](const InvertedElemPrint &e1, const InvertedElemPrint &e2){
                          return e1.weight > e2.weight;
                          });
                //4.[构建]:根据查找出来的结果，构建json串 -- jsoncpp --通过jsoncpp完成序列化&&反序列化
                Json::Value root;
                for(auto &item : inverted_list_all){
                    ns_index::DocInfo * doc = index->GetForwardIndex(item.doc_id);
                    if(nullptr == doc){
                        continue;
                    }
                    Json::Value elem;
                    elem["title"] = doc->title;
                    elem["desc"] = GetDesc(doc->content, item.words[0]); //content是文档的去标签的结果，但是不是我们想要的，我们要的是一部分 TODO
                    elem["url"]  = doc->url;
                    //for deubg, for delete
                    elem["id"] = (int)item.doc_id;
                    elem["weight"] = item.weight; //int->string

                    root.append(elem);
                }

                Json::StyledWriter writer;
                //Json::FastWriter writer;
                *json_string = writer.write(root);
            }

             std::string GetDesc(const std::string &html_content, const std::string &word)
            {
                //找到word在html_content中的首次出现，然后往前找50字节(如果没有，从begin开始)，往后找100字节(如果没有，到end就可以的)
                //截取出这部分内容
                const int prev_step = 50;
                const int next_step = 100;
                //1. 找到首次出现
                //不能使用find查找，可能因为大小写不匹配而报错
                auto iter = std::search(html_content.begin(), html_content.end(), word.begin(), word.end(), [](int x, int y){
                        return (std::tolower(x) == std::tolower(y));
                        });
                if(iter == html_content.end()){
                    return "None1";
                }
                int pos = std::distance(html_content.begin(), iter);

                //2. 获取start，end , std::size_t 无符号整数
                int start = 0; 
                int end = html_content.size() - 1;
                //如果之前有50+字符，就更新开始位置
                if(pos > start + prev_step) start = pos - prev_step;
                if(pos < end - next_step) end = pos + next_step;

                //3. 截取子串,return
                if(start >= end) return "None2";
                std::string desc = html_content.substr(start, end - start);
                desc += "...";
                return desc;
            }
        
    };
}

7.4.测试：

打出来的是不是按权值进行排序的呢？我们可以将weight打印出来看看

最大是16 ，最小是1，我们打开网站自己验证一下

这是16的，在文章内容中一共出现了16次，下面是1次的

一共出现1次正确！！！

8.编写 http_server 模块

我们这里不用自己去搭建轮子，直接用网上的cpp-httplib库即可搭建网络通信。

httpserver的基本测试代码：

cpp 复制代码

#include"httplib.h"

int main()
{
    httplib::Server svr;
    svr.Get("/hi", [](const httplib::Request &req, httplib::Response &rsp){
        rsp.set_content("你好,世界!", "text/plain; charset=utf-8");
        });
        svr.listen("0.0.0.0",8085);
    return 0;
}

没问题！

所以我们只要会使用基本的接口即可

9.简单的日志系统

cpp 复制代码

#pragma once

#include <iostream>
#include <string>
#include <ctime>

#define NORMAL  1
#define WARNING 2
#define DEBUG   3
#define FATAL   4

#define LOG(LEVEL, MESSAGE) log(#LEVEL, MESSAGE, __FILE__, __LINE__)

void log(std::string level, std::string message, std::string file, int line)
{
    std::cout << "[" << level << "]" << "[" << time(nullptr) << "]" << "[" << message << "]" << "[" << file << " : " << line << "]" << std::endl;
}

10.前端代码

因为我们的重点主要在于后端，所以前端的代码不讲解。

原码：

html 复制代码

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="http://code.jquery.com/jquery-2.1.1.min.js"></script>

    <title>boost 搜索引擎</title>
    <style>
        /* 去掉网页中的所有的默认内外边距，html的盒子模型 */
        * {
            /* 设置外边距 */
            margin: 0;
            /* 设置内边距 */
            padding: 0;
        }
        /* 将我们的body内的内容100%和html的呈现吻合 */
        html,
        body {
            height: 100%;
        }
        /* 类选择器.container */
        .container {
            /* 设置div的宽度 */
            width: 800px;
            /* 通过设置外边距达到居中对齐的目的 */
            margin: 0px auto;
            /* 设置外边距的上边距，保持元素和网页的上部距离 */
            margin-top: 15px;
        }
        /* 复合选择器，选中container 下的 search */
        .container .search {
            /* 宽度与父标签保持一致 */
            width: 100%;
            /* 高度设置为52px */
            height: 52px;
        }
        /* 先选中input标签， 直接设置标签的属性，先要选中， input：标签选择器*/
        /* input在进行高度设置的时候，没有考虑边框的问题 */
        .container .search input {
            /* 设置left浮动 */
            float: left;
            width: 600px;
            height: 50px;
            /* 设置边框属性：边框的宽度，样式，颜色 */
            border: 1px solid black;
            /* 去掉input输入框的有边框 */
            border-right: none;
            /* 设置内边距，默认文字不要和左侧边框紧挨着 */
            padding-left: 10px;
            /* 设置input内部的字体的颜色和样式 */
            color: #CCC;
            font-size: 14px;
        }
        /* 先选中button标签， 直接设置标签的属性，先要选中， button：标签选择器*/
        .container .search button {
            /* 设置left浮动 */
            float: left;
            width: 150px;
            height: 52px;
            /* 设置button的背景颜色，#4e6ef2 */
            background-color: #4e6ef2;
            /* 设置button中的字体颜色 */
            color: #FFF;
            /* 设置字体的大小 */
            font-size: 19px;
            font-family:Georgia, 'Times New Roman', Times, serif;
        }
        .container .result {
            width: 100%;
        }
        .container .result .item {
            margin-top: 15px;
        }

        .container .result .item a {
            /* 设置为块级元素，单独站一行 */
            display: block;
            /* a标签的下划线去掉 */
            text-decoration: none;
            /* 设置a标签中的文字的字体大小 */
            font-size: 20px;
            /* 设置字体的颜色 */
            color: #4e6ef2;
        }
        .container .result .item a:hover {
            text-decoration: underline;
        }
        .container .result .item p {
            margin-top: 5px;
            font-size: 16px;
            font-family:'Lucida Sans', 'Lucida Sans Regular', 'Lucida Grande', 'Lucida Sans Unicode', Geneva, Verdana, sans-serif;
        }

        .container .result .item i{
            /* 设置为块级元素，单独站一行 */
            display: block;
            /* 取消斜体风格 */
            font-style: normal;
            color: green;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="search">
            <input type="text" value="请输入搜索关键字">
            <button onclick="Search()">搜索一下</button>
        </div>
        <div class="result">
            <!-- 动态生成网页内容 -->
            <!-- <div class="item">
                <a href="#">这是标题</a>
                <p>这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要</p>
                <i>https://search.gitee.com/?skin=rec&type=repository&q=cpp-httplib</i>
            </div>
            <div class="item">
                <a href="#">这是标题</a>
                <p>这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要</p>
                <i>https://search.gitee.com/?skin=rec&type=repository&q=cpp-httplib</i>
            </div>
            <div class="item">
                <a href="#">这是标题</a>
                <p>这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要</p>
                <i>https://search.gitee.com/?skin=rec&type=repository&q=cpp-httplib</i>
            </div>
            <div class="item">
                <a href="#">这是标题</a>
                <p>这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要</p>
                <i>https://search.gitee.com/?skin=rec&type=repository&q=cpp-httplib</i>
            </div>
            <div class="item">
                <a href="#">这是标题</a>
                <p>这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要这是摘要</p>
                <i>https://search.gitee.com/?skin=rec&type=repository&q=cpp-httplib</i>
            </div> -->
        </div>
    </div>
    <script>
        function Search(){
            // 是浏览器的一个弹出框
            // alert("hello js!");
            // 1. 提取数据, $可以理解成就是JQuery的别称
            let query = $(".container .search input").val();
            console.log("query = " + query); //console是浏览器的对话框，可以用来进行查看js数据

            //2. 发起http请求,ajax: 属于一个和后端进行数据交互的函数，JQuery中的
            $.ajax({
                type: "GET",
                url: "/s?word=" + query,
                success: function(data){
                    console.log(data);
                    BuildHtml(data);
                }
            });
        }

        function BuildHtml(data){
            // 获取html中的result标签
            let result_lable = $(".container .result");
            // 清空历史搜索结果
            result_lable.empty();

            for( let elem of data){
                // console.log(elem.title);
                // console.log(elem.url);
                let a_lable = $("<a>", {
                    text: elem.title,
                    href: elem.url,
                    // 跳转到新的页面
                    target: "_blank"
                });
                let p_lable = $("<p>", {
                    text: elem.desc
                });
                let i_lable = $("<i>", {
                    text: elem.url
                });
                let div_lable = $("<div>", {
                    class: "item"
                });
                a_lable.appendTo(div_lable);
                p_lable.appendTo(div_lable);
                i_lable.appendTo(div_lable);
                div_lable.appendTo(result_lable);
            }
        }
    </script>
</body>
</html>