链接:我的讲解视频https://www.bilibili.com/video/BV14e411Q7oG/
本文仅供学术用途
先上图
代码
爬虫核心
JAVA
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.gargoylesoftware.htmlunit.*;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.Executor;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.io.IOUtils;
import java.io.*;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class Spider {
public void catchvideo(String url,String addr) throws IOException {
//TODO 建立无头浏览器
WebClient webClient = new WebClient();
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
webClient.getOptions().setThrowExceptionOnScriptError(true);
webClient.addRequestHeader("Referer", "https://www.bilibili.com/index.html");
webClient.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.40");
//TODO 设置请求参数,建立请求
WebRequest webRequest = new WebRequest(new URL(url), HttpMethod.GET);
//TODO 获取响应体
Page page = webClient.getPage(webRequest);
WebResponse webResponse = page.getWebResponse();
String contentAsString = webResponse.getContentAsString();
// System.out.println(contentAsString);
//TODO 模式匹配找视频总数
Pattern pattern = Pattern.compile("<script>window.__INITIAL_STATE__=(.*?);\\(function\\(\\)");
Matcher matcher = pattern.matcher(contentAsString);
String s = null;
if (matcher.find())
s = matcher.group(1);
JSONObject jsonObject = JSON.parseObject(s);
int videonum = jsonObject.getJSONObject("videoData").getIntValue("videos");
// System.out.println("视频总数" + videonum);
//TODO 获取目录名
pattern = Pattern.compile("<meta data-vue-meta=\"true\" property=\"og:title\" content=\"(.*?)_哔哩哔哩_bilibili\">");
matcher = pattern.matcher(contentAsString);
String s1 = null;
if (matcher.find())
s1 = matcher.group(1);
else
System.out.println("没有找到");
//目录名去除./&*这些字符
String content = s1.replaceAll("[/&*_,《》\\s+]", "");
// System.out.println("目录名" + content);
//TODO 建立目录
String dir = addr+"\\" + content + "\\";
File directory = new File(dir);
if (!directory.exists())
directory.mkdirs();
for (int i = 1; i <= videonum; i++) {
//TODO 设置请求参数,建立请求
webRequest = new WebRequest(new URL(url + "?p=" + i), HttpMethod.GET);
// System.out.println(webRequest);
//TODO 获取响应体
page = webClient.getPage(webRequest);
webResponse = page.getWebResponse();
contentAsString = webResponse.getContentAsString();
//TODO 获取视频链接
pattern = Pattern.compile("<script>window.__playinfo__=(.*?)</script>");
matcher = pattern.matcher(contentAsString);
String s2 = null;
if (matcher.find())
s2 = matcher.group(1);
else
System.out.println("没有找到");
String videolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("video").getJSONObject(0).getString("baseUrl");
String audiolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("audio").getJSONObject(0).getString("baseUrl");
// System.out.println("视频下载链接\n" + videolink);
// System.out.println("音频下载链接\n" + audiolink);
//TODO 获取视频名称
pattern = Pattern.compile("<title data-vue-meta=\"true\">(.*?)_哔哩哔哩_bilibili</title>");
matcher = pattern.matcher(contentAsString);
String s3 = null;
if (matcher.find())
s3 = matcher.group(1);
else
System.out.println("没有找到");
//目录名去除./&*这些字符
String videoname = s3.replaceAll("[/&*_,《》\\s+]", "");
System.out.println(i + "_________________________" + videoname);
String videofile = dir + "tmp_" + videoname + ".mp4";
String audiofile = dir + "tmp_" + videoname + ".mp3";
//TODO 下载视频
webRequest = new WebRequest(new URL(videolink), HttpMethod.GET);
page = webClient.getPage(webRequest);
webResponse = page.getWebResponse();
InputStream inputStream = webResponse.getContentAsStream();
OutputStream outputStream = new FileOutputStream(videofile);
IOUtils.copy(inputStream, outputStream);
inputStream.close();
outputStream.close();
//TODO 下载音频
webRequest = new WebRequest(new URL(audiolink), HttpMethod.GET);
page = webClient.getPage(webRequest);
webResponse = page.getWebResponse();
inputStream = webResponse.getContentAsStream();
outputStream = new FileOutputStream(audiofile);
IOUtils.copy(inputStream, outputStream);
inputStream.close();
outputStream.close();
//TODO 执行合并命令
// 创建命令行
CommandLine commandLine = CommandLine.parse("ffmpeg -i " + videofile + " -i " + audiofile + " -c:v copy -c:a aac -strict experimental " + dir + i + "_" + videoname + ".mp4"); // 使用 Windows cmd 命令作为示例
// 创建执行器
Executor executor = new DefaultExecutor();
// 设置输出流处理器(可选)
PumpStreamHandler streamHandler = new PumpStreamHandler(System.out, System.err); // 将标准输出和错误输出重定向到控制台
executor.setStreamHandler(streamHandler);
// 执行命令
// System.out.println(commandLine);
executor.execute(commandLine);
// int exitValue = executor.execute(commandLine); // 执行命令并获取退出值
// System.out.println("Exit value: " + exitValue); // 打印退出值(通常为0表示成功)
File file = new File(audiofile);
file.delete();
file = new File(videofile);
file.delete();
}
}
}
可视化代码
JAVA
import javax.swing.*;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;
public class SwingDemo {
public static void main(String[] args) {
JFrame jFrame = new JFrame("Swing frame");
//设置关闭退出程序
jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
JPanel panel = new JPanel();
jFrame.setContentPane(panel);
jFrame.setLocationRelativeTo(null);
panel.setLayout(new FlowLayout());
JLabel jLabel = new JLabel("下载地址");
JTextField jTextField = new JTextField(20);
jTextField.setToolTipText("下载地址");
JButton download = new JButton("下载");
panel.add(jLabel);
panel.add(jTextField);
panel.add(download);
JLabel jLabel1 = new JLabel("文件保存位置");
JTextField jTextField1 = new JTextField(20);
jTextField1.setText("D:\\videos\\");
jTextField1.setToolTipText("文件保存位置");
JButton fileaddr = new JButton("选择文件夹");
panel.add(jLabel1);
panel.add(jTextField1);
panel.add(fileaddr);
fileaddr.addActionListener(e -> {
JFileChooser fileChooser = new JFileChooser();
fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
int returnValue = fileChooser.showOpenDialog(null);
if (returnValue == JFileChooser.APPROVE_OPTION) {
File selectedFile = fileChooser.getSelectedFile();
jTextField1.setText(selectedFile.getAbsolutePath());
}
});
download.addActionListener(e -> {
String url = jTextField.getText()+"/";
String fileAddr = jTextField1.getText();
System.out.println(url);
System.out.println(fileAddr);
try {
Spider spider = new Spider();
spider.catchvideo(url,fileAddr);
} catch (IOException ioException) {
ioException.printStackTrace();
}
});
//自适应
jFrame.pack();
jFrame.setVisible(true);
}
}