Java爬取哔哩哔哩视频(可视化)

链接:我的讲解视频https://www.bilibili.com/video/BV14e411Q7oG/

本文仅供学术用途

先上图

代码

爬虫核心

JAVA 复制代码
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.gargoylesoftware.htmlunit.*;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.Executor;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.io.IOUtils;

import java.io.*;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class Spider {
    public void catchvideo(String url,String addr) throws IOException {
        //TODO 建立无头浏览器
        WebClient webClient = new WebClient();
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
        webClient.getOptions().setThrowExceptionOnScriptError(true);
        webClient.addRequestHeader("Referer", "https://www.bilibili.com/index.html");
        webClient.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.40");
        //TODO 设置请求参数,建立请求
        WebRequest webRequest = new WebRequest(new URL(url), HttpMethod.GET);

        //TODO 获取响应体
        Page page = webClient.getPage(webRequest);
        WebResponse webResponse = page.getWebResponse();
        String contentAsString = webResponse.getContentAsString();
//        System.out.println(contentAsString);

        //TODO 模式匹配找视频总数
        Pattern pattern = Pattern.compile("<script>window.__INITIAL_STATE__=(.*?);\\(function\\(\\)");
        Matcher matcher = pattern.matcher(contentAsString);
        String s = null;
        if (matcher.find())
            s = matcher.group(1);
        JSONObject jsonObject = JSON.parseObject(s);
        int videonum = jsonObject.getJSONObject("videoData").getIntValue("videos");
//        System.out.println("视频总数" + videonum);

        //TODO 获取目录名
        pattern = Pattern.compile("<meta data-vue-meta=\"true\" property=\"og:title\" content=\"(.*?)_哔哩哔哩_bilibili\">");
        matcher = pattern.matcher(contentAsString);
        String s1 = null;
        if (matcher.find())
            s1 = matcher.group(1);
        else
            System.out.println("没有找到");
        //目录名去除./&*这些字符
        String content = s1.replaceAll("[/&*_,《》\\s+]", "");
//        System.out.println("目录名" + content);

        //TODO 建立目录
        String dir = addr+"\\" + content + "\\";
        File directory = new File(dir);
        if (!directory.exists())
            directory.mkdirs();

        for (int i = 1; i <= videonum; i++) {
            //TODO 设置请求参数,建立请求
            webRequest = new WebRequest(new URL(url + "?p=" + i), HttpMethod.GET);
//            System.out.println(webRequest);
            //TODO 获取响应体
            page = webClient.getPage(webRequest);
            webResponse = page.getWebResponse();
            contentAsString = webResponse.getContentAsString();
            //TODO 获取视频链接
            pattern = Pattern.compile("<script>window.__playinfo__=(.*?)</script>");
            matcher = pattern.matcher(contentAsString);
            String s2 = null;
            if (matcher.find())
                s2 = matcher.group(1);
            else
                System.out.println("没有找到");
            String videolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("video").getJSONObject(0).getString("baseUrl");
            String audiolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("audio").getJSONObject(0).getString("baseUrl");
//            System.out.println("视频下载链接\n" + videolink);
//            System.out.println("音频下载链接\n" + audiolink);

            //TODO 获取视频名称
            pattern = Pattern.compile("<title data-vue-meta=\"true\">(.*?)_哔哩哔哩_bilibili</title>");
            matcher = pattern.matcher(contentAsString);
            String s3 = null;
            if (matcher.find())
                s3 = matcher.group(1);
            else
                System.out.println("没有找到");
            //目录名去除./&*这些字符
            String videoname = s3.replaceAll("[/&*_,《》\\s+]", "");
            System.out.println(i + "_________________________" + videoname);
            String videofile = dir + "tmp_" + videoname + ".mp4";
            String audiofile = dir + "tmp_" + videoname + ".mp3";

            //TODO 下载视频
            webRequest = new WebRequest(new URL(videolink), HttpMethod.GET);
            page = webClient.getPage(webRequest);
            webResponse = page.getWebResponse();
            InputStream inputStream = webResponse.getContentAsStream();
            OutputStream outputStream = new FileOutputStream(videofile);
            IOUtils.copy(inputStream, outputStream);
            inputStream.close();
            outputStream.close();


            //TODO 下载音频
            webRequest = new WebRequest(new URL(audiolink), HttpMethod.GET);
            page = webClient.getPage(webRequest);
            webResponse = page.getWebResponse();
            inputStream = webResponse.getContentAsStream();

            outputStream = new FileOutputStream(audiofile);
            IOUtils.copy(inputStream, outputStream);
            inputStream.close();
            outputStream.close();

            //TODO 执行合并命令

            // 创建命令行
            CommandLine commandLine = CommandLine.parse("ffmpeg -i " + videofile + " -i " + audiofile + " -c:v copy -c:a aac -strict experimental " + dir + i + "_" + videoname + ".mp4"); // 使用 Windows cmd 命令作为示例
            // 创建执行器
            Executor executor = new DefaultExecutor();
            // 设置输出流处理器(可选)
            PumpStreamHandler streamHandler = new PumpStreamHandler(System.out, System.err); // 将标准输出和错误输出重定向到控制台
            executor.setStreamHandler(streamHandler);
            // 执行命令
//            System.out.println(commandLine);
            executor.execute(commandLine);
//            int exitValue = executor.execute(commandLine); // 执行命令并获取退出值
//            System.out.println("Exit value: " + exitValue); // 打印退出值(通常为0表示成功)

            File file = new File(audiofile);
            file.delete();

            file = new File(videofile);
            file.delete();
        }
    }
}

可视化代码

JAVA 复制代码
import javax.swing.*;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;

public class SwingDemo {
    public static void main(String[] args) {
        JFrame jFrame = new JFrame("Swing frame");
        //设置关闭退出程序
        jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        JPanel panel = new JPanel();
        jFrame.setContentPane(panel);
        jFrame.setLocationRelativeTo(null);
        panel.setLayout(new FlowLayout());

        JLabel jLabel = new JLabel("下载地址");
        JTextField jTextField = new JTextField(20);
        jTextField.setToolTipText("下载地址");
        JButton download = new JButton("下载");

        panel.add(jLabel);
        panel.add(jTextField);
        panel.add(download);


        JLabel jLabel1 = new JLabel("文件保存位置");
        JTextField jTextField1 = new JTextField(20);
        jTextField1.setText("D:\\videos\\");
        jTextField1.setToolTipText("文件保存位置");
        JButton fileaddr = new JButton("选择文件夹");

        panel.add(jLabel1);
        panel.add(jTextField1);
        panel.add(fileaddr);

        fileaddr.addActionListener(e -> {
            JFileChooser fileChooser = new JFileChooser();
            fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
            int returnValue = fileChooser.showOpenDialog(null);
            if (returnValue == JFileChooser.APPROVE_OPTION) {
                File selectedFile = fileChooser.getSelectedFile();
                jTextField1.setText(selectedFile.getAbsolutePath());
            }
        });

        download.addActionListener(e -> {
            String url = jTextField.getText()+"/";
            String fileAddr = jTextField1.getText();
            System.out.println(url);
            System.out.println(fileAddr);
            try {
                Spider spider = new Spider();
                spider.catchvideo(url,fileAddr);
            } catch (IOException ioException) {
                ioException.printStackTrace();
            }
        });

        //自适应
        jFrame.pack();
        jFrame.setVisible(true);

    }
}
相关推荐
考虑考虑12 小时前
Jpa使用union all
java·spring boot·后端
用户37215742613512 小时前
Java 实现 Excel 与 TXT 文本高效互转
java
浮游本尊13 小时前
Java学习第22天 - 云原生与容器化
java
渣哥15 小时前
原来 Java 里线程安全集合有这么多种
java
间彧15 小时前
Spring Boot集成Spring Security完整指南
java
间彧15 小时前
Spring Secutiy基本原理及工作流程
java
Java水解16 小时前
JAVA经典面试题附答案(持续更新版)
java·后端·面试
洛小豆18 小时前
在Java中,Integer.parseInt和Integer.valueOf有什么区别
java·后端·面试
前端小张同学19 小时前
服务器上如何搭建jenkins 服务CI/CD😎😎
java·后端
ytadpole19 小时前
Spring Cloud Gateway:一次不规范 URL 引发的路由转发404问题排查
java·后端