步骤:
-
导入需要使用的包
-
定位正确的url地址
-
发请求
-
获取响应
-
解析响应的内容
-
将获取的xpath语法转换成bs4语法
7.下载图片
python
import urllib.request
from bs4 import BeautifulSoup
# url
url = "https://www.mcdonalds.com.cn/index/Food/menu/burger"
# 请求
response = urllib.request.urlopen(url=url)
# 响应
content = response.read().decode("utf-8")
# print(content)
# 解析
soup = BeautifulSoup(content, "lxml")
# img_list = "//div[@class="pic"]/img/@src" # xpath语法
# name_list = "//div[@class="col-md-3 col-sm-4 col-xs-6"]//span/text()" # xpath语法
name_list = soup.select("div[class='col-md-3 col-sm-4 col-xs-6'] span") # BeautifulSoup语法
img_list = soup.select(".pic>img")
# print(type(name_list))
# print(name_list, len(name_list))
# print(img_list)
# for name in name_list:
# print(name.get_text()) # 每个名字
# print(name.string) # 每个名字
# print(name_list.index(name)) # 每个名字的索引
for img in img_list:
# print(img["src"]) # 每张图片的地址
img_src = img["src"]
# print(img_list.index(img)) # 每个图片的索引
# name = img_src.split("/")[-1] # 图片命名
name = name_list[img_list.index(img)].get_text() # 图片命名
urllib.request.urlretrieve(url=img_src, filename="./image/1/" + name + ".jpg") # 下载图片
print(f"图片{name}下载了")