Java实现简单爬虫——爬取疫情数据

1.项目准备

在项目中使用到了jsoup和fastjson jsoup用于创建一个连接(绘画) 用于获取和解析HTML页面

而fastjson对数据进行一个格式化

在pom.xml导入坐标

XML 复制代码
    <dependencies>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>2.0.15.graal</version>
        </dependency>

        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.15.3</version>
        </dependency>
    </dependencies>

在爬取数据之前需要先找到对应的数据接口:https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=localCityNCOVDataList,diseaseh5Shelf

返回的是json数据 这边建议使用json格式化工具观看方便后面提取数据

JSON在线解析,JSON格式化,JSON解析,JSON 校验(SO JSON)

2.实现

java 复制代码
package com.czxy;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;

import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

/**
 * @author 陶然同学
 * @version 1.0
 * @date 2022/10/15 10:41
 */
public class QQData {
    public static void main(String[] args) throws IOException {
        getAllData();
    }

    public static Map<String,Object> getAllData() throws IOException {
        //1 发送请求 连接 获得疫情数据
        String resultBody = Jsoup.connect("https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=localCityNCOVDataList,diseaseh5Shelf")
                .ignoreContentType(true)
                .execute().body();
        //2 转换成Object类型
        JSONObject jsonObject = JSON.parseObject(resultBody);
        //3 获取data部分
        JSONObject data = jsonObject.getJSONObject("data");

        //4 获取高风险地区数据
        JSONArray localCityNCOVDataList = data.getJSONArray("localCityNCOVDataList");
        Map<String,Object> highCitysMap = new HashMap<>();
        System.out.println("高风险疫情地区数据");
        for (int i = 0; i < localCityNCOVDataList.size(); i++) {
            JSONObject highCity = localCityNCOVDataList.getJSONObject(i);
            //高风险地区疫情数据
//            System.out.println(highCity);
            //数据部分
            String city = highCity.getString("city");
            String province = highCity.getString("province");
            //本土确诊
            int local_confirm_add = highCity.getIntValue("local_confirm_add");
            //新增无症状
            int local_wzz_add = highCity.getIntValue("local_wzz_add");
            //高风险地区
            int highRiskAreaNum = highCity.getIntValue("highRiskAreaNum");
            //中风险地区
            int mediumRiskAreaNum = highCity.getIntValue("mediumRiskAreaNum");
            Map<String,Object> highCityMap = new HashMap<>();
            highCityMap.put("city",city);
            highCityMap.put("province",province);
            highCityMap.put("local_confirm_add",local_confirm_add);
            highCityMap.put("highRiskAreaNum",highRiskAreaNum);
            highCityMap.put("mediumRiskAreaNum",mediumRiskAreaNum);
            highCityMap.put("name",highCityMap);
        }

        JSONObject diseaseh5Shelf = data.getJSONObject("diseaseh5Shelf");
        //获取国内34个省市的疫情数据
        JSONArray areaTree = diseaseh5Shelf.getJSONArray("areaTree");
        JSONObject allProvinces = areaTree.getJSONObject(0);
        JSONArray provinces = allProvinces.getJSONArray("children");

        Map<String,Object> provincesMap = new HashMap<>();
        System.out.println("各省份疫情数据");
        for (int i = 0; i < provinces.size(); i++) {
            JSONObject province = provinces.getJSONObject(i);
            //获取省份数据
            Map<String, Object> provinceMap = getCityValues(province);
            //打印省份数据
            System.out.println((String) provinceMap.get("name") + province + "\n\t城市数据");
            provincesMap.put((String)provinceMap.get("name"),provinceMap);

            //获得省下面的市
            JSONArray citys = province.getJSONArray("children");
            Map<String,Object> citysMap = new HashMap<>();
            for (int j = 0; j < citys.size(); j++) {
                JSONObject city = citys.getJSONObject(j);
                //获取城市数据
                Map<String, Object> cityMap = getCityValues(city);
                //打印城市数据
                System.out.println("\t" +(String)cityMap.get("name") + city);
                citysMap.put((String)cityMap.get("name"),cityMap);
            }

            //获取国内全国疫情数据
            Map<String,Object> chinaMap = new HashMap<>();
            JSONObject chinaTotal = diseaseh5Shelf.getJSONObject("chinaTotal");
            //已治愈人数
            int heal = chinaTotal.getIntValue("heal");
            //累计死亡
            int dead = chinaTotal.getIntValue("dead");
            //新增无症状
            int localWzzAdd = chinaTotal.getIntValue("localWzzAdd");
            //累计确诊 (所有病例)
            int confirm = chinaTotal.getIntValue("confirm");
            //新增病例
            int confirmAdd = chinaTotal.getIntValue("confirmAdd");
            //当前病例
            int nowConfirm = chinaTotal.getIntValue("nowConfirm");
            //本土病例
            int localConfirm = chinaTotal.getIntValue("localConfirm");
            //新增死亡
            int deadAdd = chinaTotal.getIntValue("deadAdd");
            //本土新增病例
            int localConfirmAdd = chinaTotal.getIntValue("localConfirmAdd");
            //中风险地区
            int mediumRiskAreaNum = chinaTotal.getIntValue("mediumRiskAreaNum");
            //高风险地区
            int highRiskAreaNum = chinaTotal.getIntValue("highRiskAreaNum");

            chinaMap.put("head",heal);
            chinaMap.put("dead",dead);
            chinaMap.put("localWzzAdd",localWzzAdd);
            chinaMap.put("confirm",confirm);
            chinaMap.put("confirmAdd",confirmAdd);
            chinaMap.put("nowConfirm",nowConfirm);
            chinaMap.put("deadAdd",deadAdd);
            chinaMap.put("localConfirmAdd",localConfirmAdd);
            chinaMap.put("mediumRiskAreaNum",mediumRiskAreaNum);
            chinaMap.put("highRiskAreaNum",highRiskAreaNum);

            //数据截止时间
            Date lastUpdateTime = diseaseh5Shelf.getDate("lastUpdateTime");

            Map<String,Object> resultMap = new HashMap<>();
            resultMap.put("provincesMap",provincesMap);
            resultMap.put("chinaMap",chinaMap);
            resultMap.put("highCitysMap",highCitysMap);
            resultMap.put("lastUpdateTime",lastUpdateTime);

            return resultMap;
        }

        return null;
    }


    /**
     *
     * @param province 省数据
     * @return
     */
    public static Map<String,Object> getCityValues(JSONObject province){
        //省名
        String name = province.getString("name");
        //省的今天数据
        JSONObject today = province.getJSONObject("today");
        //新增确证
        int todayConfirm = today.getIntValue("confirm");
        //新增本土无症状
        int wzz_add = today.getIntValue("wzz_add");
        //新增本土
        int local_confirm_add = today.getIntValue("local_confirm_add");
        //省的历史数据
        JSONObject total = province.getJSONObject("total");
        //累计确诊
        int confirm = total.getIntValue("confirm");

        int nowConfirm = total.getIntValue("nowConfirm");

        int wzz = total.getIntValue("wzz");

        //中风险地区数量
        int mediumRiskAreaNum = total.getIntValue("mediumRiskAreaNum");
        //高风险地区数量
        int highRiskAreaNum = total.getIntValue("highRiskAreaNum");
        //累计死亡
        int heal = total.getIntValue("heal");
        //累计确诊
        int dead = total.getIntValue("confirm");
        Map<String,Object> provinceMap = new HashMap<>();
        provinceMap.put("name",name);
        provinceMap.put("todayConfirm",todayConfirm);
        provinceMap.put("confirm",confirm);
        provinceMap.put("newConfirm",nowConfirm);
        provinceMap.put("wzz",wzz);
        provinceMap.put("mediumRiskAreaNum",mediumRiskAreaNum);
        provinceMap.put("highRiskAreaNum",highRiskAreaNum);
        provinceMap.put("heal",heal);
        provinceMap.put("dead",dead);
        return provinceMap;
    }
}
相关推荐
Java学长-kirito11 分钟前
springboot/ssm养老院管理系统Java代码编写web社区养老服务项目
java·spring boot·spring
GzlAndy1 小时前
JVM对象创建过程
java
乔木剑衣1 小时前
JVM学习:CMS和G1收集器浅析
java·jvm·学习·垃圾收集
找了一圈尾巴2 小时前
Wend看源码-Java-Collections 工具集学习
java·开发语言·学习
广而不精zhu小白5 小时前
CentOS Stream 9 安装 JDK
java·linux·centos
程序员云帆哥5 小时前
【玩转23种Java设计模式】行为型模式篇:命令模式
java·设计模式·命令模式
赵谨言5 小时前
基于 Java 大数据的旅游推荐系统的设计与实现
java·经验分享·毕业设计
亦可呀6 小时前
HTML-CSS-常见标签与样式
前端·css·html
NHuan^_^6 小时前
RabbitMQ基础篇之Java客户端 Topic交换机
java·rabbitmq·java-rabbitmq
月眠老师6 小时前
网络爬虫的详细步骤及实现方法
爬虫