一、Nodejs+Puppeteer实现登陆官网
1.环境说明
Nodejs------直接从官网下载最新版本,并安装
使用npm安装puppeteer:npm install puppeteer
npm install xxx -registry https://registry.npm.taobao.org
Chromium会自动下载,前提是网络通畅
2.实践操作:Nodejs+Puppeteer介绍
Puppeteer登录官网首页:
1.打开浏览器并访问首页
2.键入数据并访问登录后的页面
3.截图保留记录,存储成本地图片
python
const puppeteer = require("puppeteer")
const sleep = time => new Promise(resolve => {
setTimeout(resolve, time);
});
(async() => {
const browser = await puppeteer.launch({
headless:false,//无头模式,默认是隐藏界面的,true.改成false,显示界面。
slowMo:100 //设置浏览器每一步之间的时间间隔,单位毫秒
defaultViewport:{width:1366, height:768},//默认的网页大小是800*800,可以自行设置
});
const page = await browser.newPage();
await page.goto("http://shanzhi.spbeen.com/index");
await sleep(3000);
const login_link_button = await page.$('a.btn.btn-primary');
await login_link_button.click()
await sleep(2000);
const username_input = await page.$('input#username');
await username_input.type('demo1234');
await sleep(2000);
const password_input = await page.$('input#MemberPassword');
await password_input.type('demo1234');
await sleep(2000);
const submit_button = await page.$("button.btn.btn-primary");
await submit_button.click();
await sleep(2000);
await page.screenshot({path:'shanzhi_login_index.png'});
await sleep(2000);
await browser.close();
})();
3.总结:
Puppeteer有更多且更全的接口,可以快捷的操作网页
Puppeteer可以实现多种数据的存储,例如截图、pdf等
浏览器的标签页不要太多,容易卡电脑
二、nodejs+puppeteer实现滑动验证码全自动识别
1.滑动验证码破解方法
数据来源:滑动验证码的图片偏移
破解方法一:分析请求,用数据做正确的请求操作
破解方法二:浏览器实现滑动验证码
2.滑动验证码偏移计算方法
数据来源:滑动验证码的图片
方法一:相似度对比
方法二:像素的RGB值对比
方法三:调用经过数据训练过的机器学习模型
3.图片的预处理
数据来源:滑动验证码的图片
阶段一:缩放图片[将图片尺寸进行压缩]
阶段二:简化色彩[灰度处理]
阶段三:计算平均值或灰度平均值
4.实践操作:图形以及效果展示
python
// const puppeteer = require("puppeteer");//puppeteer启动的chromium会被知乎识别
const puppeteer = require('puppeteer-extra');//消除特征 npm install puppeteer-extra
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); //npm install puppeteer-extra-plugin-stealth
puppeteer.use(StealthPlugin()); //消除特征
const Rembrant = require('rembrandt');//rembrandt算法库,导入使用 npm install rembrandt
const fs = require('fs');// nodejs 操作本地文件的库 npm install fs
var ssim = require('ssim');//ssim算法库,导入使用 npm install ssim
// 睡眠函数,单位毫秒
const sleep = time => new Promise(resolve => {
setTimeout(resolve, time);
});
//程序的主体部分
(async() => {
const browser = await puppeteer.launch({//启动浏览器
headless:false,//无头模式,默认是隐藏界面的,true.改成false,显示界面。
defaultViewport:{width:1366, height:768},//默认的网页大小是800*800,可以自行设置
args: [
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
]
});
const page = await browser.newPage();
await page.goto("http://www.zhihu.com/signin");
//点击"密码登录",从"手机短信登陆"切换到"账号密码登录"
const mima_button = await page.$('form.SignFlow.Login-content > div:nth-child(1) div:nth-child(2)');
await mima_button.click;
const username = await page.$('div.SignFlow-account input.Input');
await username.type('18296198879');
const password = await page.$('div.SignFlow-password input.Input');
await password.type('demodemodemo');
// 定位登录按钮,点击登录,弹出滑动验证码,开始验证
const login_button = await page.$('button.Button.SignFlow-submitButton.Button--primary.Button--blue');
await sleep(1000)
await login_button.click()
await sleep(1000);
// 重复破解 滑块验证码 代码部分
for (let num = 1; num < 1000; num++){
await sleep(2000);
console.log('----------------\n当前循环次数:',num);
// 判断 验证码多次失败后,弹出的错误提示,需要点击之后才能继续验证
// 判断依据 "失败过多,点此重试",然后会恢复到等到滑块滑动的验证状态
let yidun_msg = await page.$eval("span.yidun_tips_text.yidun-fallback_tip",el => el.innerHTML)
if (yidun_msg = '失败过多,点此重试'){
const yidun_tips = await page.$(".yidun_tips");
await yidun_tips.click();
};
// 验证成功后,滑动验证码的框会隐藏
// 如果滑动验证码隐藏了,则自动停止,并输出"验证成功,正常退出"
let yidun_popup = await page.$('div.yidun_popup--light.yidun_popup');
const yidun_popup_style = await page.evaluate(//根据yidun_popup标签,通过window窗口,计算出标签的style样式
(x) => {return JSON.parse(JSON.stringify(window.getComputedStyle(x)))},
yidun_popup
);
console.log('yidun_popup style.display:',yidun_popup_style.display);//输出样式值
if(yidun_popup_style.display == 'none'){
console.log("验证成功,正常退出");
break;// 如果样式值为none,则表示验证成功了,使用break跳出for循环
};
// page.waitForSelector('.yidun_tips', {timeout:1000}).then((yidun_tips) => yidun_tips.click());
//calculateDistance函数中的console.log内容全部输出在网页的开发者工具console栏【记得拉长延迟,慢慢看】
calculateDistance = async (page) =>{
var distance = await page.evaluate(() => {
//将图片写入canvas,截取canvas的图片内容,通过toGrayBinary全部换成二维数组,进行像素RGB的对比
toGrayBinary = (pixels, binary, value, sn) => {
var r, g, b, g, avg = 0, len = pixels.length, s = '';
for (var i = 0;i < len; i+= 4){
avg += (.299 * pixels[i] + .587 * pixels[i+1]+.114*pixels[i+2]);
}
avg /= (len /4);
for (var i=0;i<len;i+=4){
r = .299*pixels[i],
g = .587* pixels[i+1],
b = .114*pixels[i+2];
if (binary){
if ((r+g+b)>=(value||avg)){
g = 255;
if (sn) s+= '1';
} else {
g = 0;
if (sn) s+= '0';
}
g = (r+g+b) > (value || avg)?255:0;
}else{
g = r+g+b;
}
pixels[i] = g,
};
//将截取的canvas图片,转成base64,方便存储本地
imgCanvasToBase64 = (img,width,height) => {
let canvas3 = document.createElement("canvas");
let context3 = canvas3.getContext("2d");
canvas3.width = width;
canvas3.height = height;
context3.putImageData(img,0,0,0,0,width,height);
let base64Img = canvas3.toDataURL('image/jpeg');
return base64Img;
};
const smallbgimg = document.getElementByClassName('yidun_jigsaw')[0];//提取图片
const smallcanvas = document.createElement('canvas');//创建画布
const smallcontext = smallcanvas.getContext('2d');//设定2d界面
console.log('smallbgimg:', smallbgimg, smallbgimg.naturalWidth, smallbgimg.naturalHeight);
smallcontext.drawImage(smallbgimg,0,0,smallbgimg.naturalWidth, smallbgimg.naturalHeight);//写入图片到画布中
//将图片亮度降低,颜色减弱【灰度处理】
//从0,0位置,读取图片的宽高
var pixels = smallcontext.getImageData(0,0,smallbgimg.naturalWidth, smallbgimg.naturalHeight);
var pixeldata = pixels.data;//读取了图片,取出具体的数值data
//循环设置,降低像素的RGB值。RGB分别是0,1,2
//完整的值,是RGBA,A一直是1,所以不需要处理A,所以i每次增加4
for (var i=0,len = pixeldata.length;i<len;i+=4){
pixels.data[i] = pixels.data[i] - 95;//R
pixels.data[i+1] = pixels.data[i+1] - 55;//G
pixels.data[i+2] = pixels.data[i+2] -45;//B
}
smallcontext.putImageData(pixels,0,0); //把数据写回到画布中
var minwidth = smallbgimg.naturalWidth;
var maxwidth = 0;
var minheight = smallbgimg.naturalHeight;
var maxheight = 0;
for (let i = 1;i<smallbgimg.naturalWidth;i++){
let times=0;
//因为缺口只会出现在中间位置,所以不用对比整个纵坐标,只需要对比中间位置即可
//这里我们从上面45像素开始到下面55像素结束
for (let j=1;i<smallbgimg.naturalHeight;j++){
const smallimgData = smallcontext.getImageData(1*i,1*j,1,1).data:
const r = smallimgData[0];
const g = smallimgData[1];
const b = smallimgData[2];
if (r >0&g>0&b>0){
//不含无色的长方形图片
if(minwidth >i){minwidth=i;}
if(maxheight<=j){maxheight=j;}
if(maxwidth<i){maxwidth=i};
if(minheight>j){minheight=j};
};
};
};
//maxheight = maxheight-12
//minwidth = minwidth+2
//minheight = minheight +2
//maxwidth = maxwidth -2
console.log("图片最大和最小宽高",minwidth,maxwidth,minheight,maxheight)
var height = maxheight -minheight;
var width = maxwidth - minwidth;
var smallimg = smallcontext.getImageData(minwidth,minheight,width,height);
var smallimgdata = smallimg.data;
var smallimggb = toGratBinary(smallimgdata);
//console.log('smallimggb',smallimggb)
var small_img_canvas = imgCanvasToBase64(smallimg,width,height);
console.log('smallimg canvas),small_img_canvas);
//背景图转canvas
const bgimg = document.getElementsByClassName('yidun_bg-img')[0];
console.log('bgimg',bgImg.naturalWidth,bgImg.naturalHeight);
const convas = document.createElement('canvas');
const context = canvas.getContext('2d');
context.drawImage(bgImg,0,0,bgImg.naturalWidth,bgImg.naturalHeight);
const contextBigimg = context.getImageData(1,1,bgImg.naturalWidth,bgImg.naturalHeight);
var bigimg = imgCanvasToBase64(contextbigimg,bgImg.naturalWidth,bgImg.naturalHeight);
console.log("bigimg canvas:",bigimg);
var xAxis = [];
var tmpmax = 0.0;
var part_bigimg = {};
//这个for循环,进行的就是图片二维数组的对比,找出最大的相似度
//将截图的canvas图片,放入part_bigimg对象,方便返回并保存成本地图片
for (let i = minwidth+width+2;i<bgImg.naturalWidth-width;i++){
let times=0;
i = minheight +2;
const bigimg = context.getImageData(1*i,1*j,width,height);//根据小图的尺寸截取大图的部分内容,能得到小图同高不同宽的逐帧所有同尺寸图片
const bigimgData = bigimg.data;
const imggb = toGrayBinary(bigimgData);
let similar = 0;
for (let n=0,len = width*height;n<len;n++){
if(smallimggb[n]==imggb[n]){similar++};
}
similar = (similar/(width*height))*100;
var bigimg_part = imgCanvasToBase64(bigimg,width,height);
part_bigimg[i] = bigimg_part;
if (parseFloat(similar)>tmpmax){
tmpmax = parseFloat(similar);
console.log('yes:',i,j,width,height,similar,bigimg_part);
xAxis = [];
xAxis.push(i);
} else if(parseFloat(similar)==tmpmax){
console.log('yes:',i,j,width,height,similar,bigimg_part);
xAxis.push(i);
} else{
console.log('error---',i,j,width,height,similsr,bigimg_part);
};
};
return [xAxis[xAxis.length-1],small_img_canvas,part_bigimg];//返回多个参数,请修改这里
});
return distance;//这里不影响结果值的返回
}
//const distance = await calculateDistance(page);
const adata = await calculateDistance(page);
const distance = adata[0];
const smallimg = adata[1];//base64的小图数据
const part_bigimg = adata[2];//base64的截取大图特定部分的所有图片,用于对比小图
// const distance, smallimg, part_bigimg = await calculateDistance(page);
console.log('像素RGB值对比算法结果值:',distance);
//小图存储地址, 保存到本地
var small_img_path = './assets/smallimg.jpg';
// console.log('smallimg:', smalling);
var small_img_data = smallimg.replace("data:image/jpeg;base64,","")
const smallimg_buffer = new Buffer.from(small_img_data,'base64');
fs.writeFile(small_img_path, smallimg_buffer, function(err){//用fs写入文件
//if(err) { console.log(err);}else{
// console.log('写入成功!');
//}
});
var maxdiff = 0.0;
var offset_size = 0;
var ssim_maxdiff = 0.0;
var ssim_offset_size = 0;
for (var partb in part_bigimg){
//截取的所有大图, 保存本地
var part_big_img_path = './assets/part_big/'+parseInt(partb)+'.jpg';
var part_big_img_data = part_bigimg[partb].replace("data:image/jpeg;base64,","")
const partbimg_buffer = new Buffer.from(part_big_img_data,'base64');
fs.writeFile(part_big_img_path, partbimg_buffer, function(err){//用fs写入文件
//if(err) { console.log(err);}else{
// console.log('写入成功!');
//}
});
//ssim算法 比较图片相似度
//参数是base64转换成图片字节,也是通过路径读取到的图片内容
const ssim_result = ssim(smallimg_buffer, partbimg_buffer);
//console.log("ssim:",ssim_result,part_big_img_path);
if (ssim_result>ssim_maxdiff){ssim_maxdiff = ssim_result;ssim_offset_size=parseInt(partb);}
//randbrandt算法,参数是图片路径
const rembrandt = new Rembrandt({
imageA:small_img_path,
imageB:part_big_img_path,
thresholdType:Rembrandt.THRESHOLD_PIXELS
//thresholdType:Rembrandt.THRESHOLD_PERCENT
});
let result = await rembrant.compare();
let difference = result.percentageDifference*100;
if (difference > maxdiff){maxdiff = difference;offset_size=parseInt(partb);}
};
console.log("{*}SSIM算法计算偏移结果值:", ssim_offset_size);
console.log("rembrandt算法计算偏移结果值:", offset_size);
await sleep(1000);
// 拿到了滑动验证码的偏差值,开始使用鼠标移动滑块,定位到偏差值的具体位置。
const _moveTrace = function* (dis) {//定义移动函数,从起始地址到目标地址,中间要计算出很多个坐标,结合延迟,达到缓慢的滑块效果
let trace = [];
let t0 = 0.2;
let curr = 0;
let step = 0;
let a = 0.8;
while (curr < dis){
let t = t0 * (++step);
curr = parseFloat((1/2*a*t*t).toFixed(2));
trace.push(curr);
};
for (let i = 0;i<trace.length;++i){
yield trace[i];
};
};
const yidun_slider = await page.$(".yidun_slider");// 定位滑块标签位置
const bounding_box = await yidun_slider.boundingBox();// 通过bounding_Box()函数,拿到标签的起始坐标,标签的左上角
await page.mouse.move(bounding_box.x + bounding_box.width/2,bounding_box.y+bounding_box.height/2);//等待页面上,鼠标移动到标签的中间位置
await page.mouse.down();//鼠标按住滑块
let gen = _moveTrace(ssim_offset_size);
for (let ret of gen){//循环读取_moveTrace返回的生成器的值,每次都挪动一点点。而且y轴【垂直方向】也需要加一个简单的偏移
await page.mouse.move(bounding_box.x+ret,bounding_box.y+6);//移动鼠标
};
await page.mouse.move(bounding_box.x+ssim_offset_size,bounding_box.y+6);//把鼠标移动到目标位置上
await.sleep(100);//睡眠100毫秒
await page.mouse.up();//到了目标位置上,松开鼠标按键,完成滑块的拖动
};
await sleep(2000);
await browser.close();
})();
总结:
使用canvas进行图片的灰度处理、剪辑处理。
图片的对比,要查看算法函数的调用方法,注意数据类型要对上。
修改代码前,要详细阅读每一行代码,修改起来会更顺畅
三、滑动验证码之像素RGB对比算法实现
算法介绍:1.拿到小图,减弱图片颜色2.根据小图的宽高,截图同等宽高的对比图3.循环取出图片的所有RGB值并比较4.取出相似度最大的一个偏移值,做滑动操作
对比步骤:1.图片预处理2.逐个像素对比3.将相似度最大的偏移量进行滑动操作
图片预处理:1.颜色减弱2.宽高设定