maven pom
<dependencies>
    <!--前端jqury-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.16.1</version>
    </dependency>
    <!--http工具-->
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpcore</artifactId>
        <version>4.4.16</version>
    </dependency>
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.14</version>
    </dependency>
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.13.0</version>
    </dependency>
</dependencies>====================================
遍历网站内容爬取网站网址
package com.xiaocao;
import com.sun.org.apache.bcel.internal.generic.NEW;
import com.sun.org.apache.regexp.internal.RE;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class UrlPool {
    public static void main(String[] args) {
        /*首页地址*/
        getUrl("https://www.nipic.com/");
    }
    private static void getUrl(String baseUrl) {
        Map<String, Boolean> oldMap = new LinkedHashMap<>();
        /*相对路径拼接*/
        String oldLinkHost = "";
        Pattern p = Pattern.compile("(https?://)?[^\\s]*");
        Matcher m = p.matcher(baseUrl);
        if (m.find()) {
            oldLinkHost = m.group();
        }
        oldMap.put(baseUrl, false);
        oldMap = crawlLinks(oldLinkHost, oldMap);
        for (Map.Entry<String, Boolean> mapping : oldMap.entrySet()) {
            System.out.println("连接:" + mapping.getKey());
        }
    }
    private static Map<String, Boolean> crawlLinks(String oldLinkHost, Map<String, Boolean> oldMap) {
        LinkedHashMap<String, Boolean> newMap = new LinkedHashMap<>();
        String oldLink = "";
        for (Map.Entry<String, Boolean> mapping : oldMap.entrySet()) {
            if (!mapping.getValue()){
                System.out.println(mapping.getKey()+"连接有参数:" + mapping.getKey());
                oldLink = mapping.getKey();
                try {
                    URL url = new URL(oldLink);
                    HttpURLConnection connection = (HttpURLConnection) url.openConnection();
                    connection.setRequestMethod("GET");
                    if (connection.getResponseCode() == 200) {
                        BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
//                        Pattern p = Pattern.compile("<a.*?href=[\"']?(https?://)?/?[^\"']?.*?>(.+)</a>");
                        Pattern p = Pattern.compile("<a\\b[^>]+\\bhref=\"([^\"]*)\"[^>]*>([\\s\\S]*?)</a>");
                        Matcher matcher = null;
                        String line = "";
                        while ((line = reader.readLine()) != null) {
                            matcher = p.matcher(line);
                            if (matcher.find()) {
                                String newLink = matcher.group(1);
                                if (!newLink.startsWith("http")) {
                                    /*相对路径*/
                                    if (newLink.startsWith("/")) {
                                        newLink = oldLinkHost + newLink;
                                    } else {
                                        newLink = oldLinkHost + "/" + newLink;
                                    }
                                }
                                if (newLink.endsWith("/")) {
                                    newLink = newLink.substring(0, newLink.length() - 1);
                                }
                                if (!oldMap.containsKey(newLink) && !newMap.containsKey(newLink) && newLink.startsWith(oldLinkHost)) {
                                    newMap.put(newLink, false);
                                }
                            }
                        }
                    }
                } catch (Exception e) {
                } finally {
                    oldMap.replace(oldLink, true);
                }
            }
        }
        if (!newMap.isEmpty()) {
            oldMap.putAll(newMap);
            oldMap.putAll(crawlLinks(oldLinkHost, oldMap));
        }
        return oldMap;
    }
}==============
下载网站内容
package com.xiaocao; import org.apache.commons.io.FileUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import sun.net.www.http.HttpClient; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.HttpCookie; public class ImageCraw { private static String url = "https://xxx"; public static void main(String[] args) { // apacheHttpClient(); try { Document document = Jsoup.connect(url).get(); Elements select = document.select(".newdetail-skin #J_worksImg"); try { Connection.Response src = Jsoup.connect("https:"+select.attr("src")) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0") .ignoreContentType(true) .execute(); String name = select.attr("alt"); System.out.println(name); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(src.bodyAsBytes()); FileUtils.copyInputStreamToFile(byteArrayInputStream,new File("F:\\filetest\\"+name+".jpg")); }catch (Exception e){ e.printStackTrace(); } // for (int i = 0; i < select.size(); i++) { // Elements img = select.get(i).select(".newdetail-skin #J_worksImg"); // // try { // Connection.Response src = Jsoup.connect("https:"+img.attr("src")) // .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0") // .ignoreContentType(true) // .execute(); // // String name = img.attr("alt"); // System.out.println(name); // ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(src.bodyAsBytes()); // FileUtils.copyInputStreamToFile(byteArrayInputStream,new File("F:\\filetest\\"+name+".jpg")); // }catch (Exception e){ // e.printStackTrace(); // } // // } } catch (IOException e) { e.printStackTrace(); } } private static void apacheHttpClient() { CloseableHttpClient client = HttpClients.createDefault(); HttpGet httpGet = new HttpGet(url); /*伪装浏览器*/ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0"); try { CloseableHttpResponse execute = client.execute(httpGet); HttpEntity entity = execute.getEntity(); String s = EntityUtils.toString(entity); System.out.println(s); } catch (IOException e) { e.printStackTrace(); } } }