python爬虫实战

python 复制代码
from requests import get
import pymysql
class baidu:
    cookies = {
        'BAIDUID_BFESS': 'E695E9B2AF2F6BFFED9BD684584A8956:FG=1',
        'BIDUPSID': 'E695E9B2AF2F6BFFED9BD684584A8956',
        'PSTM': '1712380467',
        'ZFY': 'chbmO0bdpF7bbm3HXNhicZ5O5VMDmhHCB:Avy72gQwyE:C',
        'BAIDU_WISE_UID': 'wapp_1712447420718_476',
        'RT': '"z=1&dm=baidu.com&si=50f27fa9-37fb-4712-bdc9-55bca99e3155&ss=luor0zji&sl=d&tt=8hp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=4dhs&ul=4q99&hd=4qa1"',
        'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
        'H_PS_PSSID': '40373_40366_40416_40298_40466_40505_40397_40445_60023_60037_60047_40510',
        'PHPSESSID': 'ph9pu9rp8dgtq0o2c68fs0ek44',
        'ab_sr': '1.0.1_MjRkMTcxMDY3ODZlMTZiOGM0YjVkMDc5YjA0NTY2YjUwYzE5YTczNjc5M2JjZTE5MzdmZDQzY2M5ZGE0MDBiZGFiN2U1YzFlNTIwNGRmYmJkNTliODAzZmZkNWFmNzYxOWY5NDZlNmQxODRkYjE3NWNkOWUxN2U5ZTVjYzk5NTA3NjE5Y2ZiNmE2Mzc2NmNhNjRmOGQzNjYzYThjMWQ5MA==',
    }
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        # 'Cookie': 'BAIDUID_BFESS=E695E9B2AF2F6BFFED9BD684584A8956:FG=1; BIDUPSID=E695E9B2AF2F6BFFED9BD684584A8956; PSTM=1712380467; ZFY=chbmO0bdpF7bbm3HXNhicZ5O5VMDmhHCB:Avy72gQwyE:C; BAIDU_WISE_UID=wapp_1712447420718_476; RT="z=1&dm=baidu.com&si=50f27fa9-37fb-4712-bdc9-55bca99e3155&ss=luor0zji&sl=d&tt=8hp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=4dhs&ul=4q99&hd=4qa1"; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=40373_40366_40416_40298_40466_40505_40397_40445_60023_60037_60047_40510; PHPSESSID=ph9pu9rp8dgtq0o2c68fs0ek44; ab_sr=1.0.1_MjRkMTcxMDY3ODZlMTZiOGM0YjVkMDc5YjA0NTY2YjUwYzE5YTczNjc5M2JjZTE5MzdmZDQzY2M5ZGE0MDBiZGFiN2U1YzFlNTIwNGRmYmJkNTliODAzZmZkNWFmNzYxOWY5NDZlNmQxODRkYjE3NWNkOWUxN2U5ZTVjYzk5NTA3NjE5Y2ZiNmE2Mzc2NmNhNjRmOGQzNjYzYThjMWQ5MA==',
        'Referer': 'https://qianxi.baidu.com/',
        'Sec-Fetch-Dest': 'script',
        'Sec-Fetch-Mode': 'no-cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    def __init__(self):
        li=[
            'https://huiyan.baidu.com/migration/historycurve.jsonp?dt=country&id=0&type=move_in&callback=jsonp_1712742016499_9375183',
            'https://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=0&type=move_in&date=20240409&callback=jsonp_1712740241816_6607068',
            'https://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=0&type=move_out&date=20240409&callback=jsonp_1712740241816_6607068'
            ]
        ls=[]
        for i in li:
            ls.append(self.发送请求(i))
        self.解析数据(ls)
    def 发送请求(self,url):
        response = get(url, cookies=self.cookies, headers=self.headers).text
        return response
    def 解析数据(self,ls):
        s0=ls[0].split('list');s0.pop(0);s0=eval(':'.join(''.join(s0).split(':')[1:]).rstrip(')')[0:-2])
        s1=self.解析s2(ls[1])
        s2=self.解析s2(ls[2])
        self.存储数据(s0,s1,s2)
    def 解析s2(self,ls):
        return eval(':'.join(ls.split('list')[1].split(':')[1:]).rstrip(')').rstrip('}'))
    def 存储数据(self,*ls):
        db=pymysql.connect(host='localhost',user='root',password='root',port=3306)
        cursor=db.cursor()
        sql='create database 百度地图;'
        cursor.execute(sql)
        cursor.execute('use 百度地图;')
        sql = '''
            CREATE TABLE IF NOT EXISTS your_table_name (
                date DATE PRIMARY KEY,
                value FLOAT
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
            '''  
        cursor.execute(sql)
        for date, value in ls[0].items():
            insert_sql = f"INSERT INTO your_table_name (date, value) VALUES ('{date}', {value}) ON DUPLICATE KEY UPDATE value={value};"
            cursor.execute(insert_sql)
        db.commit()
        
       # 创建数据表
        create_table_sql = '''
        CREATE TABLE IF NOT EXISTS city_data (
            id INT AUTO_INCREMENT PRIMARY KEY,
            city_name VARCHAR(255) NOT NULL,
            province_name VARCHAR(255) NOT NULL,
            value FLOAT NOT NULL
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
        '''
        cursor.execute(create_table_sql)
        data=ls[1]+ls[2]
        for item in data:
            insert_sql = f"INSERT INTO city_data (city_name, province_name, value) VALUES ('{item['city_name']}', '{item['province_name']}', {item['value']})"
            cursor.execute(insert_sql)
        
        # 提交事务
        db.commit()
        db.close()
                
        
        
x=baidu()
相关推荐
Caco_D2 小时前
一行代码抓遍全网 20 个热榜!Aneiang.Pa 4.0 发布 — 极简 .NET 爬虫库
爬虫·.net
太岁又沐风5 天前
复现并修掉ART hook框架 Pine 调用原方法时的偶发 SIGSEGV
爬虫
隔窗听雨眠6 天前
大模型加爬虫上篇:技术融合与架构革新
爬虫·架构
Super Scraper6 天前
如何批量抓取 TikTok 数据而不被封锁?完整指南
爬虫·ai·自动化·抖音·tiktok·ai agent
深蓝电商API6 天前
自动化录屏 + 截图:打造爬虫调试的上帝视角
爬虫
tang777896 天前
市场调研自动化采集架构:基于住宅IP轮换的APP数据抓取与反风控方案
爬虫·动态代理ip·爬虫代理ip·爬虫动态ip·住宅代理ip·动态住宅ip
数据知道6 天前
指纹浏览器环境的导入、导出、快照与云端同步机制
爬虫·数据采集·指纹浏览器
星川皆无恙6 天前
大数据k-means聚类算法:基于k-means聚类算法+NLP微博舆情数据爬虫可视化分析推荐系统(新版)
大数据·人工智能·爬虫·算法·机器学习·自然语言处理·kmeans
小二·6 天前
Rust 爬虫与数据处理实战:大规模并发抓取 + 流式处理
开发语言·爬虫·rust