爬虫实战-房天下(bengbu.zu.fang.com/)数据爬取

详细代码链接https://flowus.cn/hbzx/3c42674d-8e6f-42e3-a3f6-bc1258034676

import requests

from lxml import etree #xpath解析库

def 源代码(url):

cookies = {

'global_cookie': 'xeqnmumh38dvpj96uzseftwdr20lvkwkfb9',

'otherid': 'b44a1837638234f1a0a15e37877e0685',

'g_sourcepage': 'zf_fy%5Elb_pc',

'__utma': '147393320.818863681.1714391725.1714391725.1714391725.1',

'__utmc': '147393320',

'__utmz': '147393320.1714391725.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',

'__utmt_t0': '1',

'__utmt_t1': '1',

'__utmt_t2': '1',

'keyWord_recenthousebengbu': '%5b%7b%22name%22%3a%22%e9%be%99%e5%ad%90%e6%b9%96%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a011914%2fs31%2f%22%2c%22sort%22%3a1%7d%5d',

'city': 'sh',

'ASP.NET_SessionId': '4fpr5u3w5zqqzitrnwafk3cr',

'zf_csrfcookie': '1cCMHJcsaY7XgtGVMdiMdsydBeGKPxx7G1pYnsK0yn4vRI361O_aeBQfC7SAKi4gktL0kQ2',

'unique_cookie': 'U_xeqnmumh38dvpj96uzseftwdr20lvkwkfb9*6',

'__utmb': '147393320.18.10.1714391725',

}

headers = {

'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',

'accept-language': 'zh-CN,zh;q=0.9',

'cache-control': 'max-age=0',

'cookie': 'global_cookie=xeqnmumh38dvpj96uzseftwdr20lvkwkfb9; otherid=b44a1837638234f1a0a15e37877e0685; g_sourcepage=zf_fy%5Elb_pc; __utma=147393320.818863681.1714391725.1714391725.1714391725.1; __utmc=147393320; __utmz=147393320.1714391725.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt_t0=1; __utmt_t1=1; __utmt_t2=1; keyWord_recenthousebengbu=%5b%7b%22name%22%3a%22%e9%be%99%e5%ad%90%e6%b9%96%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a011914%2fs31%2f%22%2c%22sort%22%3a1%7d%5d; city=sh; ASP.NET_SessionId=4fpr5u3w5zqqzitrnwafk3cr; zf_csrfcookie=1cCMHJcsaY7XgtGVMdiMdsydBeGKPxx7G1pYnsK0yn4vRI361O_aeBQfC7SAKi4gktL0kQ2; unique_cookie=U_xeqnmumh38dvpj96uzseftwdr20lvkwkfb9*6; __utmb=147393320.18.10.1714391725',

'priority': 'u=0, i',

'referer': 'https://sh.zu.fang.com/house/i33/',

'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',

'sec-ch-ua-mobile': '?0',

'sec-ch-ua-platform': '"Windows"',

'sec-fetch-dest': 'document',

'sec-fetch-mode': 'navigate',

'sec-fetch-site': 'same-origin',

'sec-fetch-user': '?1',

'upgrade-insecure-requests': '1',

'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',

}

response = requests.get(url, cookies=cookies, headers=headers).text

#response是源代码

return response

if name=='main':

for fan in range(1,10):

url='https://sh.zu.fang.com/house/i3{fan}/'

res=源代码(url)

res=etree.HTML(res) #初始化

#//*[@id="rentid_D09_01_02"]/a a后面没有加/text()

#//*[@id="rentid_D09_60_02"]/a

#//*[@id="rentid_D09_02_02"]/a

#//*[@id="rentid_D09_31_02"]/a

for i in range(1,61):

if i<10:

ix='0'+str(i)

xp=f'//*[@id="rentid_D09_{ix}_02"]/a/text()'

else:

xp=f'//*[@id="rentid_D09_{i}_02"]/a/text()'

title=res.xpath(xp)[0]

print(title,end=' ')

xp=f'//*[@id="listBox"]/div[3]/dl[{i}]/dd/p[2]/text()'

#//*[@id="listBox"]/div[3]/dl[60]/dd/p[2]

p=res.xpath(xp)[0].strip()

print(p,end=' ')

xp=f'//*[@id="listBox"]/div[3]/dl[{i}]/dd/div[2]/p/span/text()'

#//*[@id="listBox"]/div[3]/dl[60]/dd/div[2]/p/span

jg=res.xpath(xp)[0]

print(jg)

相关推荐
Amo Xiang9 小时前
JavaScript逆向与爬虫实战——基础篇(css反爬之动态字体实现原理及绕过)
爬虫·python·js逆向·动态字体
电商API_1800790524717 小时前
微店常用API:获取商品详情接口|关键字搜索商品接口|获取快递费接口-打通商品运营与用户体验的技术桥梁
大数据·服务器·人工智能·爬虫·数据挖掘
绒绒毛毛雨1 天前
爬虫前奏--基于macos的ip代理池构建
爬虫·tcp/ip·macos
B站_计算机毕业设计之家1 天前
基于大数据的短视频数据分析系统 Spark哔哩哔哩视频数据分析可视化系统 Hadoop大数据技术 情感分析 舆情分析 爬虫 推荐系统 协同过滤推荐算法 ✅
大数据·hadoop·爬虫·spark·音视频·短视频·1024程序员节
一晌小贪欢2 天前
Python爬虫第10课:分布式爬虫架构与Scrapy-Redis
分布式·爬虫·python·网络爬虫·python爬虫·python3
疏狂难除2 天前
关于spiderdemo第二题的奇思妙想
javascript·爬虫
麦麦大数据2 天前
D030知识图谱科研文献论文推荐系统vue+django+Neo4j的知识图谱|论文本文相似度推荐|协同过滤
vue.js·爬虫·django·知识图谱·科研·论文文献·相似度推荐
Serendipity_Carl3 天前
爬虫数据清洗可视化案例之全球灾害数据
爬虫·python·pycharm·数据可视化·数据清洗
B站_计算机毕业设计之家4 天前
spark实战:python股票数据分析可视化系统 Flask框架 金融数据分析 Echarts可视化 大数据技术 ✅
大数据·爬虫·python·金融·数据分析·spark·股票
深蓝电商API4 天前
反爬升级:WAF、行为检测、指纹追踪,我们该如何应对?
爬虫·waf·反爬