版本环境
- win11
- python 3.12.4
目标:爬取https://gitee.com/explore
的列表内容,并写入txt文本
效果
开始
1.安装依赖
bash
pip install requests beautifulsoup4
2.编写代码,如下,详见注释
python
import requests
from bs4 import BeautifulSoup
def get_url_nfo(web_url):
response = requests.get(web_url)
if response.status_code == 200:
# 手动设置响应的编码,可能会有乱码
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, "html.parser")
# 查找所有 class 为 nav-item-text 的元素
class_items = soup.find_all(class_='project-namespace-path')
# 提取并打印每个元素的文本内容
nav_item_texts = [item.get_text(strip=True) for item in class_items]
print('nav_item_texts', nav_item_texts)
write_str = str(nav_item_texts).replace(',', '\n')
print('已获取内容,处理格式-----------')
print('write_str', write_str)
write_content_into_txt(write_str)
title = soup.title
print(f'页面标题: {title}')
# print('soup.text', soup.text)
# print('soup', soup)
else:
print('请求失败,状态码:', response.status_code)
def write_content_into_txt(content):
try:
with open('python爬取gitee.txt', 'w', encoding='utf-8') as file:
file.write(content)
print('内容已写入文件')
except Exception as e:
print(f'写入文件时发生错误: {e}')
# 请求url
url = "https://gitee.com/explore"
get_url_nfo(url)