selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div@class="embed center fullWidth" or @class="embed fullWidth"//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input@id="id13"').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input@id="id1"').send_keys('chen')

d.find_element(By.XPATH,'//input@id="id2"').send_keys('chen')

d.find_element(By.XPATH,'//input@id="id10"').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input@id="id4"').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input@id="id3"').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input@id="id6"').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input@id="id9"').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input@id="id11"').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div@class="downloadReport-btn"/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div@class="items-wrapper"//a@class="title-wrapper"')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div@class="items-wrapper"//a@class="title-wrapper"/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
十月的皮皮10 分钟前
C语言学习笔记20260612-菱形图案打印(两种写法)
c语言·笔记·学习
cfm_291411 分钟前
JVM垃圾收集算法与收集器深度解析
jvm·测试工具·算法·性能优化
AI科技星15 分钟前
第三卷:质数王朝志(全卷定稿)
c语言·开发语言·汇编·electron·概率论
Luminbox紫创测控1 小时前
金属卤素灯工作原理与汽车零部件老化测试应用
测试工具·汽车·安全性测试·测试标准
2601_951645742 小时前
C语言基础语法,分支语句
c语言·运算符·if语句·switch语句·分支语句
dtq04242 小时前
C语言刷题函数1-判断素数(分支语句,函数两种方法)
c语言·开发语言·学习
AI科技星2 小时前
第四卷:橡皮泥江湖(拓扑学)
c语言·开发语言·网络·量子计算·agi·拓扑学
2601_951643882 小时前
关于C语言中getchar()的详细使用
c语言·输入输出·getchar()·eof·文件结束符
小七在进步2 小时前
数据结构:线性表之单链表
c语言·数据结构
鱼子星_3 小时前
【数据结构】排序的拓展——快速排序的生态多样性与归并排序沾染文件操作
c语言·数据结构·算法