JAVA爬虫系列

准备工作

导入依赖

XML 复制代码
 <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>

yml

XML 复制代码
logging:
  level:
    root: info
    com.lrm: debug

1.入门程序(获取到静态页面)

java 复制代码
package com.itheima.reggie.utils;


import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

/**
 * @Author lpc
 **/
public class CrawlerFirst {
    public static void main(String[] args) throws Exception {

        //1.打开浏览器,创建Httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

       //2.输入网址,发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        //3.按回车,发起请求,返回响应,使用Httpclient对象发起请求
        CloseableHttpResponse response = httpClient.execute(httpGet);
        //4.解析响应,获取数据
        //判斯状态码是否是200
        if (response.getStatusLine().getStatusCode()==200){
            HttpEntity httpEntity = response.getEntity();
            //获取前端静态页面
            String content = EntityUtils.toString(httpEntity,"utf8");
            System.out.println(content);
        }


    }
}

2.HttpClient---Get

java 复制代码
package com.itheima.reggie.utils;


import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

/**
 * @Author lpc
 * @Date 2024 03 12 00 23
 **/
public class CrawlerFirst {
    public static void main(String[] args){

        //1.打开浏览器,创建Httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

       //2.输入网址,发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        //3.按回车,发起请求,返回响应,使用Httpclient对象发起请求
        CloseableHttpResponse response = null;

        try {
            response = httpClient.execute(httpGet);
            //4.解析响应,获取数据
            //判斯状态码是否是200
            if (response.getStatusLine().getStatusCode()==200){
                HttpEntity httpEntity = response.getEntity();
                //获取前端静态页面
                String content = EntityUtils.toString(httpEntity,"utf8");
                System.out.println(content.length());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }finally {
            try {
                //关闭response
                response.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            try {
                //关闭浏览器
                httpClient.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }


    }
}
相关推荐
嫂子的姐夫13 分钟前
042-spiderbuf第C7题
爬虫·python·逆向
波波00730 分钟前
每日一题:.NET 中的“反射”是什么?
开发语言·.net
鬼蛟1 小时前
Spring————事务
android·java·spring
qq_461489331 小时前
C++与Qt图形开发
开发语言·c++·算法
Evand J1 小时前
【三维飞行器】RRT路径规划与TOA定位仿真系统,MATLAB例程,路径起终点、障碍物、TOA锚点等均可设置。附下载链接
开发语言·matlab·无人机·定位·rrt·toa·三维航迹规划
froginwe111 小时前
Rust 异步编程
开发语言
zayzy1 小时前
前端八股总结
开发语言·前端·javascript
今天减肥吗1 小时前
前端面试题
开发语言·前端·javascript
西门吹-禅1 小时前
【sap fiori cds up error】
java·服务器·sap cap cds
码码哈哈0.02 小时前
LangChain 快速入门(从0到可用)
开发语言·python·langchain