selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div@class="embed center fullWidth" or @class="embed fullWidth"//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input@id="id13"').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input@id="id1"').send_keys('chen')

d.find_element(By.XPATH,'//input@id="id2"').send_keys('chen')

d.find_element(By.XPATH,'//input@id="id10"').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input@id="id4"').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input@id="id3"').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input@id="id6"').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input@id="id9"').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input@id="id11"').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div@class="downloadReport-btn"/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div@class="items-wrapper"//a@class="title-wrapper"')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div@class="items-wrapper"//a@class="title-wrapper"/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
apocelipes21 小时前
常用编程语言和库的正则表达式性能对比
c语言·c++·python·性能优化·golang·开发工具和环境
ClouGence1 天前
Selenium、Playwright、CueCast 深度对比:Web 自动化测试工具怎么选
selenium·测试
LDR00616 天前
Type-C 快充全面升级!LDR6601 赋能个人护理便携电机,重塑剃须刀 / 理发器新体验
c语言·开发语言
Luminous.16 天前
C语言--day30
c语言·开发语言
玖玥拾16 天前
C/C++ 数据结构(七)栈、容器适配器
c语言·数据结构·c++··容器适配器
謓泽16 天前
C语言不是语法,是通往机器的地图。
c语言·开发语言
不会C语言的男孩16 天前
Linux 系统编程 · 第 8 章:进程基础
linux·c语言
2601_9516438816 天前
C语言长文整理,关键字和数据类型
c语言·数据类型·关键字·嵌入式开发·格式化输出
程序员小远16 天前
自动化测试基础知识总结
自动化测试·软件测试·python·selenium·测试工具·职场和发展·测试用例
m0_5474866616 天前
《C#语言程序设计与实践》 全套PPT课件
c语言·c#·c语言程序设计