1.csdn登录
存在多个内部框架,学习使用driver.switch_to.default_content()
python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome()
driver.get('https://www.csdn.net/')
time.sleep(3)
driver.find_element(By.XPATH,'//*[@id="csdn-toolbar"]/div/div/div[3]/div/div[1]/a').click()
time.sleep(3)
# 弹出小窗口,是内部框架需要切换
driver.switch_to.frame(driver.find_element(By.XPATH,'//*[@id="passportbox"]/iframe'))
driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/div[2]/div[1]/div[1]/span[4]').click()
time.sleep(3)
# 进行账号密码登录
driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[1]/div/input').send_keys("18734256736")
driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[2]/div/input').send_keys("123456")
driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[2]/div/i').click()
time.sleep(2)
driver.find_element(By.XPATH,'/html/body/div/div/div/div[2]/div[2]/div/div[2]/div[1]/div[4]/button').click()
time.sleep(10)
# 退出内部框架
driver.switch_to.default_content()
# 存在又一个内部框架
driver.switch_to.frame(driver.find_element(By.XPATH, '//*[@id="csdn-cert-iframe"]'))
time.sleep(3)
driver.find_element(By.XPATH,'//*[@id="app"]/div/div[1]/button').click()
driver.switch_to.default_content()
time.sleep(3)
# 找到悬浮元素
hover_element = driver.find_element(By.XPATH, '//*[@id="csdn-toolbar"]/div/div/div[3]/div/div[1]/a/img')
# 创建 ActionChains 对象并执行悬浮动作
actions = ActionChains(driver)
actions.move_to_element(hover_element).perform()
time.sleep(5)
# 找到悬浮后才出现的元素
hovered_elment = driver.find_element(By.XPATH,'//p[@class="csdn-profile-nickName"]').text
print(hovered_elment)
time.sleep(10)
driver.quit()
2.豆瓣滑动验证码
方法一:使用ddddcor实现
python
import time
import ddddocr
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import random
# 初始化Selenium WebDriver
url = 'https://www.douban.com/'
driver = webdriver.Chrome()
driver.get(url)
# 登录
driver.switch_to.frame(driver.find_element(By.XPATH, '//*[@id="anony-reg-new"]/div/div[1]/iframe'))
driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/ul[1]/li[2]').click()
time.sleep(3)
driver.find_element(By.XPATH, '//*[@id="username"]').send_keys('18356734521')
driver.find_element(By.XPATH, '//*[@id="password"]').send_keys('123456')
driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div[1]/div[5]/a').click()
time.sleep(5)
# 处理验证码
# 1.保存背景整个图片
driver.switch_to.frame(driver.find_element(By.XPATH, '//*[@id="tcaptcha_iframe_dy"]'))
time.sleep(3)
yzm_all_img = driver.find_element(By.XPATH, '//*[@id="slideBg"]')
yzm_all_path = 'D:\\yzm_all.png'
yzm_all_img.screenshot(yzm_all_path)
# 2.保存缺口滑块小图片
yzm_img1 = driver.find_element(By.XPATH, '//*[@id="tcOperation"]/div[8]')
yzm_img2 = driver.find_element(By.XPATH, '//*[@id="tcOperation"]/div[7]')
s1 = yzm_img1.size
s2 = yzm_img2.size
# print(s1,s2)
if s1['width'] > 100 and s1['height'] < 20:
yzm_small_img = yzm_img2
else:
yzm_small_img = yzm_img1
yzm_small_path = 'D:\\yzm_small.png'
yzm_small_img.screenshot(yzm_small_path)
time.sleep(5)
# 3.计算滑动距离
slide = ddddocr.DdddOcr(det=False, ocr=False)
with open('D:\\yzm_small.png', 'rb') as f:
target_bytes = f.read()
with open('D:\\yzm_all.png', 'rb') as f:
background_bytes = f.read()
res = slide.slide_match(target_bytes, background_bytes, simple_target=True)
print(res)
# 如果无法计算出位置,或者库没有提供相应方法,请根据实际情况自行处理
target_pos = res.get('target')[0] if res else 0
# 4.模拟人滑动过程
small_slider = driver.find_element(By.XPATH, '//*[@id="tcOperation"]/div[6]')
ActionChains(driver).click_and_hold(small_slider).perform()
moved = 0
while moved < target_pos:
# 随机滑动步进值,模仿人的滑动习惯
step = random.randint(3, 8)
if moved + step > target_pos:
step = target_pos - moved
ActionChains(driver).move_by_offset(xoffset=step, yoffset=0).perform()
moved += step
time.sleep(random.uniform(0.02, 0.1)) # 模拟不均匀滑动
ActionChains(driver).release().perform()
print(f"滑动到位置: {moved}")
driver.quit()
方法二:使用cv2模块实现
1.首先进行登录
python
url = 'https://www.douban.com/'
driver = webdriver.Chrome()
driver.get(url)
# 登录
driver.switch_to.frame(driver.find_element(By.XPATH, '//*[@id="anony-reg-new"]/div/div[1]/iframe'))
driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/ul[1]/li[2]').click()
time.sleep(3)
driver.find_element(By.XPATH, '//*[@id="username"]').send_keys('123456')
driver.find_element(By.XPATH, '//*[@id="password"]').send_keys('123456')
driver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div[1]/div[5]/a').click()
time.sleep(5)
2.然后需要进行验证码操作,获取背景图片和滑块缺口图片
python
yzm_all_img = driver.find_element(By.XPATH, '//div[@class="geetest_item_wrap"]')
yzm_all_path = 'D:/yzm_all.png'
yzm_all_img.screenshot(yzm_all_path)
time.sleep(5)
- 模拟人鼠标进行滑动
python
import random
import time
# 生成随机滑动轨迹
def generate_track(distance):
track = []
current = 0
mid = distance * 4 / 5
t = 0.2
v = 0
while current < distance:
if current < mid:
a = 2
else:
a = -3
v0 = v
v = v0 + a * t
move = v0 * t + 1 / 2 * a * t * t
current += move
track.append(round(move))
return track
# 使用 ActionChains 模拟拖动
def simulate_dragging(browser, slider, distance):
track = generate_track(distance)
action = ActionChains(browser)
action.click_and_hold(slider)
for x in track:
action.move_by_offset(xoffset=x, yoffset=0)
action.release().perform()
slider = browser.find_element(By.CSS_SELECTOR, '.slider-button')
simulate_dragging(browser, slider, 300)
豆瓣selenium滑动验证码
3.b站点选验证码
方法与豆瓣使用的ddddcor一样类似的方法
1.登录b站
python
driver = webdriver.Chrome()
driver.get('https://www.bilibili.com/')
driver.find_element(By.XPATH, '//*[@id="i_cecream"]/div[2]/div[1]/div[1]/ul[2]/li[1]/li/div[1]/div/span').click()
time.sleep(5)
driver.find_element(By.XPATH, '/html/body/div[5]/div/div[4]/div[2]/form/div[1]/input').send_keys('123456')
driver.find_element(By.XPATH, '/html/body/div[5]/div/div[4]/div[2]/form/div[3]/input').send_keys('123456')
driver.find_element(By.XPATH, '/html/body/div[5]/div/div[4]/div[2]/div[2]/div[2]').click()
time.sleep(5)
2.保存验证码
python
# 1. 保存背景整个图片
yzm_all_img = driver.find_element(By.XPATH, '//div[@class="geetest_item_wrap"]')
yzm_all_path = 'D:/yzm_all.png'
yzm_all_img.screenshot(yzm_all_path)
time.sleep(5)
3.使用ddddcor进行验证文字识别
python
# 2. 使用ddddocr进行文字检测
det = ddddocr.DdddOcr(det=True)
with open("D:/yzm_all.png", 'rb') as f:
image = f.read()
bboxes = det.detection(image)
print(bboxes)
4.使用selenium模拟鼠标点击
python
# 4. 获取截图元素的位置和大小
element_location = yzm_all_img.location
element_size = yzm_all_img.size
# 获取截图元素的宽高
element_width = element_size['width']
element_height = element_size['height']
# 5. 通过 OCR 坐标计算点击位置
# 获取每个矩形框的坐标并计算点击位置(这里以中心点为例)
for bbox in bboxes:
x1, y1, x2, y2 = bbox
# 计算点击坐标(矩形框的中心点)
click_x = element_location['x'] + x1 + (x2 - x1) / 2 # 水平中心
click_y = element_location['y'] + y1 + (y2 - y1) / 2 # 垂直中心
# 6. 使用 ActionChains 进行点击
action = ActionChains(driver)
# 将鼠标移动到目标位置并点击
action.move_to_element_with_offset(yzm_all_img, click_x, click_y).click().perform()
print(f"Clicking at ({click_x}, {click_y})")
b站点选验证码