selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
祈安_1 天前
C语言内存函数
c语言·后端
norlan_jame3 天前
C-PHY与D-PHY差异
c语言·开发语言
czy87874753 天前
除了结构体之外,C语言中还有哪些其他方式可以模拟C++的面向对象编程特性
c语言
m0_531237173 天前
C语言-数组练习进阶
c语言·开发语言·算法
Z9fish3 天前
sse哈工大C语言编程练习23
c语言·数据结构·算法
代码无bug抓狂人3 天前
C语言之单词方阵——深搜(很好的深搜例题)
c语言·开发语言·算法·深度优先
CodeJourney_J3 天前
从“Hello World“ 开始 C++
c语言·c++·学习
枫叶丹43 天前
【Qt开发】Qt界面优化(七)-> Qt样式表(QSS) 样式属性
c语言·开发语言·c++·qt
with-the-flow3 天前
从数学底层的底层原理来讲 random 的函数是怎么实现的
c语言·python·算法
se-tester3 天前
JMeter、Postman 和 SoapUI 在做接口测试上的优势和缺点
测试工具·jmeter·接口测试·postman·soapui