selenium webdriver使用

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select

import time

import requests

from urllib.parse import urlparse

import os

from lxml import etree

from urllib.parse import urljoin

def get_pdf(cur_url):

proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\

'referer':'https://endeavor.dragonforms.com/'}

d=webdriver.Chrome()

d.get(cur_url)

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

time.sleep(10)

frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')

iframe = d.find_element(By.ID, frame_id)

d.switch_to.frame(iframe)

d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')

d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')

d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')

except Exception as e:

pass

d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')

try:

d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')

except Exception as e:

pass

try:

d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')

except Exception as e:

pass

#####多选框城市和

select_element = d.find_element(By.ID, "id7")

select = Select(select_element)

#select.select_by_index(2)

select.select_by_visible_text("CHINA")

select_element = d.find_element(By.ID, "id8")

select = Select(select_element)

select.select_by_visible_text("FOREIGN")

try:

select_element = d.find_element(By.ID, "id5082617")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

try:

select_element = d.find_element(By.ID, "id5082616")

select = Select(select_element)

select.select_by_visible_text("No")

except Exception as e:

pass

time.sleep(5)

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")

d.switch_to.frame(iframe)

d.find_element(By.ID,"custombtn").click()

d.switch_to.default_content()

d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")

d.switch_to.frame(iframe)

url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')

parsed_url = urlparse(url)

pdf_name = os.path.basename(parsed_url.path)

f=open(pdf_name,'wb+')

f.write(requests.get(url,proxies=proxies,headers=headers).content)

f.close()

if name=="main":

#headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}

#proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}

d=webdriver.Chrome()

d.get("https://www.militaryaerospace.com/white-papers")

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

d.execute_script("window.scrollTo(0, document.body.scrollHeight);")

eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')

#html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)

#url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')

for ele in eles:

for i in range(3):

try:

cur_url=ele.get_attribute('href')

#cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)

get_pdf(cur_url)

break

except Exception as e:

continue

#cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'

#get_pdf(cur_url)

相关推荐
代码中介商11 小时前
银行管理系统的业务血肉 —— 流程、状态机、输入校验与持久化(下篇)
c语言·算法
爱编码的小八嘎14 小时前
C语言完美演绎9-12
c语言
武帝为此14 小时前
【Selenium 屏幕截图】
python·selenium·测试工具
Navigator_Z16 小时前
LeetCode //C - 1031. Maximum Sum of Two Non-Overlapping Subarrays
c语言·算法·leetcode
leoufung21 小时前
LeetCode 30:Substring with Concatenation of All Words 题解(含 C 语言 uthash 实现)
c语言·leetcode·c#
爱编码的小八嘎21 小时前
C语言完美演绎9-6
c语言
武帝为此21 小时前
【Selenium 执行 JavaScript】
javascript·selenium·测试工具
SunnyByte1 天前
线性表——单链表的增删查改操作
c语言·单链表
SunnyByte1 天前
线性表——双向链表
c语言·链表
jimy11 天前
C 语言的 static 关键字作用
c语言·开发语言·算法