JAVA爬虫系列

准备工作

导入依赖

XML 复制代码
 <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>

yml

XML 复制代码
logging:
  level:
    root: info
    com.lrm: debug

1.入门程序(获取到静态页面)

java 复制代码
package com.itheima.reggie.utils;


import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

/**
 * @Author lpc
 **/
public class CrawlerFirst {
    public static void main(String[] args) throws Exception {

        //1.打开浏览器,创建Httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

       //2.输入网址,发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        //3.按回车,发起请求,返回响应,使用Httpclient对象发起请求
        CloseableHttpResponse response = httpClient.execute(httpGet);
        //4.解析响应,获取数据
        //判斯状态码是否是200
        if (response.getStatusLine().getStatusCode()==200){
            HttpEntity httpEntity = response.getEntity();
            //获取前端静态页面
            String content = EntityUtils.toString(httpEntity,"utf8");
            System.out.println(content);
        }


    }
}

2.HttpClient---Get

java 复制代码
package com.itheima.reggie.utils;


import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

/**
 * @Author lpc
 * @Date 2024 03 12 00 23
 **/
public class CrawlerFirst {
    public static void main(String[] args){

        //1.打开浏览器,创建Httpclient对象
        CloseableHttpClient httpClient = HttpClients.createDefault();

       //2.输入网址,发起get请求创建HttpGet对象
        HttpGet httpGet = new HttpGet("http://www.itcast.cn");

        //3.按回车,发起请求,返回响应,使用Httpclient对象发起请求
        CloseableHttpResponse response = null;

        try {
            response = httpClient.execute(httpGet);
            //4.解析响应,获取数据
            //判斯状态码是否是200
            if (response.getStatusLine().getStatusCode()==200){
                HttpEntity httpEntity = response.getEntity();
                //获取前端静态页面
                String content = EntityUtils.toString(httpEntity,"utf8");
                System.out.println(content.length());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }finally {
            try {
                //关闭response
                response.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            try {
                //关闭浏览器
                httpClient.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }


    }
}
相关推荐
唐青枫12 小时前
Java JDBC 实战指南:从 Connection 到事务和连接池
java
一个做软件开发的牛马13 小时前
MyBatis-Plus 从零实战:完整搭建可运行 Demo,BaseMapper 零 SQL、Wrapper 条件构造、分页插件与代码生成器详解
java·后端
用户37215742613513 小时前
Java 处理 PDF 图片:提取 PDF 中的图片,并压缩 PDF 图片体积
java
用户37215742613513 小时前
Java 打印 Word 文档:从基础打印到高级设置
java
用户3521802454751 天前
当 Prompt 学会"热更新":Spring Boot × Nacos3 AI 实战
java·spring boot·ai编程
东坡白菜1 天前
破局全栈:一个前端开发的Java入门实战记录(1)
java·全栈
唐青枫1 天前
Java Tomcat 实战指南:从 Servlet 容器到 Spring Boot 部署
java
wsaaaqqq1 天前
roudan:自由选择实体、灵活操作数据、快速写入数据库的 Java 框架
java
太岁又沐风1 天前
复现并修掉ART hook框架 Pine 调用原方法时的偶发 SIGSEGV
爬虫
plainGeekDev2 天前
null 判断 → Kotlin 可空类型
android·java·kotlin