python爬虫实战

python 复制代码
from requests import get
import pymysql
class baidu:
    cookies = {
        'BAIDUID_BFESS': 'E695E9B2AF2F6BFFED9BD684584A8956:FG=1',
        'BIDUPSID': 'E695E9B2AF2F6BFFED9BD684584A8956',
        'PSTM': '1712380467',
        'ZFY': 'chbmO0bdpF7bbm3HXNhicZ5O5VMDmhHCB:Avy72gQwyE:C',
        'BAIDU_WISE_UID': 'wapp_1712447420718_476',
        'RT': '"z=1&dm=baidu.com&si=50f27fa9-37fb-4712-bdc9-55bca99e3155&ss=luor0zji&sl=d&tt=8hp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=4dhs&ul=4q99&hd=4qa1"',
        'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
        'H_PS_PSSID': '40373_40366_40416_40298_40466_40505_40397_40445_60023_60037_60047_40510',
        'PHPSESSID': 'ph9pu9rp8dgtq0o2c68fs0ek44',
        'ab_sr': '1.0.1_MjRkMTcxMDY3ODZlMTZiOGM0YjVkMDc5YjA0NTY2YjUwYzE5YTczNjc5M2JjZTE5MzdmZDQzY2M5ZGE0MDBiZGFiN2U1YzFlNTIwNGRmYmJkNTliODAzZmZkNWFmNzYxOWY5NDZlNmQxODRkYjE3NWNkOWUxN2U5ZTVjYzk5NTA3NjE5Y2ZiNmE2Mzc2NmNhNjRmOGQzNjYzYThjMWQ5MA==',
    }
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        # 'Cookie': 'BAIDUID_BFESS=E695E9B2AF2F6BFFED9BD684584A8956:FG=1; BIDUPSID=E695E9B2AF2F6BFFED9BD684584A8956; PSTM=1712380467; ZFY=chbmO0bdpF7bbm3HXNhicZ5O5VMDmhHCB:Avy72gQwyE:C; BAIDU_WISE_UID=wapp_1712447420718_476; RT="z=1&dm=baidu.com&si=50f27fa9-37fb-4712-bdc9-55bca99e3155&ss=luor0zji&sl=d&tt=8hp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=4dhs&ul=4q99&hd=4qa1"; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=40373_40366_40416_40298_40466_40505_40397_40445_60023_60037_60047_40510; PHPSESSID=ph9pu9rp8dgtq0o2c68fs0ek44; ab_sr=1.0.1_MjRkMTcxMDY3ODZlMTZiOGM0YjVkMDc5YjA0NTY2YjUwYzE5YTczNjc5M2JjZTE5MzdmZDQzY2M5ZGE0MDBiZGFiN2U1YzFlNTIwNGRmYmJkNTliODAzZmZkNWFmNzYxOWY5NDZlNmQxODRkYjE3NWNkOWUxN2U5ZTVjYzk5NTA3NjE5Y2ZiNmE2Mzc2NmNhNjRmOGQzNjYzYThjMWQ5MA==',
        'Referer': 'https://qianxi.baidu.com/',
        'Sec-Fetch-Dest': 'script',
        'Sec-Fetch-Mode': 'no-cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    def __init__(self):
        li=[
            'https://huiyan.baidu.com/migration/historycurve.jsonp?dt=country&id=0&type=move_in&callback=jsonp_1712742016499_9375183',
            'https://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=0&type=move_in&date=20240409&callback=jsonp_1712740241816_6607068',
            'https://huiyan.baidu.com/migration/cityrank.jsonp?dt=country&id=0&type=move_out&date=20240409&callback=jsonp_1712740241816_6607068'
            ]
        ls=[]
        for i in li:
            ls.append(self.发送请求(i))
        self.解析数据(ls)
    def 发送请求(self,url):
        response = get(url, cookies=self.cookies, headers=self.headers).text
        return response
    def 解析数据(self,ls):
        s0=ls[0].split('list');s0.pop(0);s0=eval(':'.join(''.join(s0).split(':')[1:]).rstrip(')')[0:-2])
        s1=self.解析s2(ls[1])
        s2=self.解析s2(ls[2])
        self.存储数据(s0,s1,s2)
    def 解析s2(self,ls):
        return eval(':'.join(ls.split('list')[1].split(':')[1:]).rstrip(')').rstrip('}'))
    def 存储数据(self,*ls):
        db=pymysql.connect(host='localhost',user='root',password='root',port=3306)
        cursor=db.cursor()
        sql='create database 百度地图;'
        cursor.execute(sql)
        cursor.execute('use 百度地图;')
        sql = '''
            CREATE TABLE IF NOT EXISTS your_table_name (
                date DATE PRIMARY KEY,
                value FLOAT
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
            '''  
        cursor.execute(sql)
        for date, value in ls[0].items():
            insert_sql = f"INSERT INTO your_table_name (date, value) VALUES ('{date}', {value}) ON DUPLICATE KEY UPDATE value={value};"
            cursor.execute(insert_sql)
        db.commit()
        
       # 创建数据表
        create_table_sql = '''
        CREATE TABLE IF NOT EXISTS city_data (
            id INT AUTO_INCREMENT PRIMARY KEY,
            city_name VARCHAR(255) NOT NULL,
            province_name VARCHAR(255) NOT NULL,
            value FLOAT NOT NULL
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
        '''
        cursor.execute(create_table_sql)
        data=ls[1]+ls[2]
        for item in data:
            insert_sql = f"INSERT INTO city_data (city_name, province_name, value) VALUES ('{item['city_name']}', '{item['province_name']}', {item['value']})"
            cursor.execute(insert_sql)
        
        # 提交事务
        db.commit()
        db.close()
                
        
        
x=baidu()
相关推荐
Amo Xiang11 小时前
2024最新版JavaScript逆向爬虫教程-------基础篇之Chrome开发者工具学习
javascript·chrome·爬虫·js逆向
小爬虫程序猿16 小时前
Python爬虫精准获取京东(JD)商品SKU信息
开发语言·爬虫·python
好看资源平台17 小时前
Python网络爬虫与数据采集实战——网络协议与HTTP
爬虫·python·网络协议
闲人编程18 小时前
爬虫反爬机制和解决方案
开发语言·c++·爬虫·python·验证码
chusheng18401 天前
Python 如何通过 cron 或 schedule 实现爬虫的自动定时运行
java·爬虫·python
易辰君1 天前
【Python爬虫实战】轻量级爬虫利器:DrissionPage之SessionPage与WebPage模块详解
开发语言·爬虫·python
亿牛云爬虫专家1 天前
如何在Puppeteer中实现表单自动填写与提交:问卷调查
javascript·爬虫·爬虫代理·puppeteer·问卷调查·代理ip·表单
鱼灯几许1 天前
Python爬虫
爬虫·python·numpy
B站计算机毕业设计超人1 天前
计算机毕业设计Python+大模型斗鱼直播可视化 直播预测 直播爬虫 直播数据分析 直播大数据 大数据毕业设计 机器学习 深度学习
爬虫·python·深度学习·机器学习·数据分析·课程设计·数据可视化
J不A秃V头A1 天前
Python爬虫:获取国家货币编码、货币名称
开发语言·爬虫·python