selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
AI+程序员在路上2 小时前
CANopen 协议:介绍、调试命令与应用
linux·c语言·开发语言·网络
爱编码的小八嘎2 小时前
C语言完美演绎4-4
c语言
Book思议-4 小时前
【数据结构实战】线性表的应用
c语言·数据结构·算法·链表
计算机安禾7 小时前
【C语言程序设计】第35篇:文件的打开、关闭与读写操作
c语言·开发语言·c++·vscode·算法·visual studio code·visual studio
shughui8 小时前
Fiddler下载、安装、使用、汉化,详细图文教程(2026附安装包)
前端·测试工具·fiddler
CODE_RabbitV8 小时前
【3min 解决】keil5 编译stm32 出现一堆 core_cm3.c 报错问题
c语言·stm32·嵌入式硬件
weixin_537590458 小时前
《C程序语言设计》练习答案(练习1-3)
c语言·开发语言
爱编码的小八嘎9 小时前
C语言完美演绎4-10
c语言
智者知已应修善业11 小时前
【51单片机用两个定时计数器级联实现定时】2023-04-12
c语言·经验分享·笔记·算法·51单片机
vx-程序开发11 小时前
springboot在线装修管理系统-计算机毕业设计源码56278
java·c语言·spring boot·python·spring·django·php