第三方库安装指令:
python
pip install requests -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install BeautifulSoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple
运行代码:
python
#这个代码并不完整,有很大的问题,但目前不知道怎么改,就先这样吧!
import requests
from bs4 import BeautifulSoup
header={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
begin = int(input("请输入开始页码:"))
end = int(input("请输入结束页码:"))
def save(titles):
with open("1.html", 'w', encoding='utf-8') as file:
x=str(titles)
file.write(x)
title_ls=[]
content_ls=[]
for page in range(begin,end+1):
url = f"https://www.autohome.com.cn/news/{page}/#liststart"
rsp = requests.get(url,headers=header)
bs4=BeautifulSoup(rsp.text,features='lxml')
titles=bs4.find_all("h3")[1:]
for title in titles:
title_ls.append(title.text)
contents = bs4.find_all("p")[3:]
for content in contents:
content_ls.append(content)
titles=dict(zip(title_ls,content_ls))
save(titles)
print("爬取完毕")