selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
_F_y24 分钟前
MySQL用C/C++连接
c语言·c++·mysql
BackCatK Chen1 小时前
C语言学习栏目目录
c语言·保姆级教程·c语言入门·c语言学习栏目目录
极客数模2 小时前
【2026美赛赛题初步翻译F题】2026_ICM_Problem_F
大数据·c语言·python·数学建模·matlab
请注意这个女生叫小美6 小时前
C语言 斐波那契而数列
c语言
Legendary_0086 小时前
Type-C 一拖二快充线:突破单口限制的技术逻辑
c语言·开发语言
智者知已应修善业6 小时前
【查找字符最大下标以*符号分割以**结束】2024-12-24
c语言·c++·经验分享·笔记·算法
91刘仁德7 小时前
c++类和对象(下)
c语言·jvm·c++·经验分享·笔记·算法
Wpa.wk7 小时前
容器编排 - 了解K8s(pod, deployment,service,lable等概念)
经验分享·测试工具·docker·云原生·容器·kubernetes
橘颂TA8 小时前
【测试】自动化测试函数介绍——web 测试
python·功能测试·selenium·测试工具·dubbo
No0d1es8 小时前
电子学会青少年软件编程(C语言)等级考试试卷(四级)2025年12月
c语言·青少年编程·电子学会·四级·2025年