selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
GanGuaGua13 小时前
Linux系统:线程的互斥和安全
linux·运维·服务器·c语言·c++·安全
别来无恙14914 小时前
使用Python和Selenium进行Web自动化测试:从入门到实践
selenium·测试工具
Python大数据分析@14 小时前
python用selenium怎么规避检测?
开发语言·python·selenium·网络爬虫
ThreeAu.14 小时前
Miniconda3搭建Selenium的python虚拟环境全攻略
开发语言·python·selenium·minicoda·python环境配置
神里流~霜灭16 小时前
(C++)数据结构初阶(顺序表的实现)
linux·c语言·数据结构·c++·算法·顺序表·单链表
草莓熊Lotso17 小时前
【C++】递归与迭代:两种编程范式的对比与实践
c语言·开发语言·c++·经验分享·笔记·其他
鹿鹿学长1 天前
2025年全国大学生数学建模竞赛(C题) 建模解析|婴儿染色体数学建模|小鹿学长带队指引全代码文章与思路
c语言·开发语言·数学建模
伴杯猫1 天前
【ESP32-IDF】基础外设开发2:系统中断矩阵
c语言·单片机·嵌入式硬件·mcu·物联网·github
测试老哥1 天前
6个步骤实现Postman接口压力测试
自动化测试·软件测试·测试工具·测试用例·接口测试·压力测试·postman