JS脚本实现RPA模拟人工操作网页获取数据

一、首先我们可以根据查询条件去预置一个Excel,比如我们以公司名称为例。

二、然后我们用JS读取Excel内容,进行页面打开与条件记录

<!DOCTYPE html>
<html>
<div style="text-align: center;margin-top: 300px;">
    <input type="file" id="input-excel" accept=".xls,.xlsx"/>
    <button onclick="START()"
            style="background-color: #4CAF50; color: white; padding: 15px 32px; text-align: center;
             text-decoration: none; display: inline-block; font-size: 16px; margin: 4px 2px;
             cursor: pointer;">
        START
    </button>
</div>
</html>

<script src="./xlsx/xlsx.js" charset="utf-8"></script>
<script>
    function START() {
        const input = document.getElementById('input-excel');
        const file = input.files[0];
        if (!file) {
            alert('请选择一个Excel文件');
            return;
        }

        const reader = new FileReader();
        reader.onload = function (e) {
            const data = e.target.result;
            const workbook = XLSX.read(data, {type: 'array'});
            const sheetName = workbook.SheetNames[0];
            const sheet = workbook.Sheets[sheetName];
            const jsonData = XLSX.utils.sheet_to_json(sheet);
            console.log(jsonData);
            let companyList = [];
            for (var i = 0; i < jsonData.length; i++) {
                companyList.push(jsonData[i].company);
            }
            // alert(JSON.stringify(companyList))
            setTimeout(function () {
                window.open("https://sjfw.scjs.net.cn:8801/xxgx/Enterprise/eList.aspx?myindex=0&mycompany="
                    + JSON.stringify(companyList), "_self")
            }, 3000)
        };
        reader.readAsArrayBuffer(file);
    }
</script>

三、打开页面后,我们要利用Cookie来记录我们的关键值,如公司名称、读取下标记,其次最重要的就是页面的元素操作。

四、我们根据条件查询到我们所需要的信息,就可以利用JS自动保存这些信息到Cookie,如果页面会自动刷新也需要利用条件判断来控制。处理好全部后我们也可以用Cookie存储全局信息。

let situationNameList = [];
let situationTimeList = [];
let finalSituationList = [];
let finalSituationTimeList = [];
if (getCookie("part_1_click") == "1") {
    console.log("part_1_click 点击过");
    situationNameList = JSON.parse(getCookie("situationNameList"));
    situationTimeList = JSON.parse(getCookie("situationTimeList"));
    finalSituationList = JSON.parse(getCookie("finalSituationList"));
    finalSituationTimeList = JSON.parse(getCookie("finalSituationTimeList"));
}

let personList = []; // 三类人员数
if (getCookie("part_3_click") == "1") {
    personList = JSON.parse(getCookie("personList"));
}


// 测试打印
console.log("我是wdfgdzx");
if (localStorage.getItem('hasRun3') !== 'true') {
    // 这里放置只需要执行一次的代码
    console.log('这段代码只会在页面刷新的情况下执行一次');
    clearCookie("clickTime")
    // 更新localStorage,表示代码已经执行过
    localStorage.setItem('hasRun3', 'true');
}

// 1、获取上个地址传递的URL参数方法
function getURLParameter(name) {
    const urlParams = new URLSearchParams(window.location.search);
    return urlParams.get(name);
}

if (!getCookie("clickTime") == "1") {
    // 2、获取公司集合
    let companyList;
    let index = 0;
    if (getCookie("myindex") == null) {
        index = parseInt(getURLParameter("myindex")); //公司下标
        companyList = JSON.parse(getURLParameter("mycompany"));
    } else {
        index = parseInt(getCookie("myindex")); // 如果myindex存过cookie 执行过则从cookie取下标更新
        companyList = JSON.parse(getCookie("mycompany"));
    }
// 3、用公司名称自动填写
    let companyInput = document.getElementById("mc");
    companyInput.value = companyList[index];
// 4、点击搜索按钮
    let searchButton = document.getElementById("MainContent_Button1");
    setCookie("mycompany", JSON.stringify(companyList), 1); // 设置到cookie里
    setCookie("myindex", JSON.stringify(index), 1); // 不管是否用过都要更新cookie
    setTimeout(function () {
        searchButton.click()
        setCookie("clickTime", "1", "1");
    }, 1000)
// 5、自动点击a标签!!!!!!!!!!!!!!!!!!!!这个是每次都要执行的
    setTimeout(function () {
        let current_target = document.getElementsByTagName('a');
        console.log(current_target.length)
        for (let i = 0; i < current_target.length; i++) {
            let temp = current_target[i];
            console.log("我打印的" + temp.innerText)
            if (temp.innerText == companyList[index]) {
                console.log("我打印的" + temp.innerText)
                // temp.click()
            }
        }
    }, 5000)
} else {
    setTimeout(function () {
        let companyList = JSON.parse(getCookie("mycompany"));
        let index = parseInt(getCookie("myindex")); //公司下标
        let current_target = document.getElementsByTagName('a');
        console.log(current_target.length)
        if (!getCookie("part_1_click") == "1") {
            part_1();
            console.log("执行了part_1方法")
        }
        if (!getCookie("part_2_click") == "1") {
            part_2();
            console.log("执行了part_2方法")
        }
        if (!getCookie("part_3_click") == "1") {
            part_3();
            console.log("执行了part_3方法")
        }

        for (let i = 0; i < current_target.length; i++) {
            let temp = current_target[i];
            // console.log("我打印的" + temp.innerText)
            if (temp.innerText == companyList[index]) {
                console.log("我打印的" + temp.innerText)
                temp.click()
                // alert("执行了")
                // 先不着急清理
                // clearCookie("clickTime");
            }
        }
    }, 2000)
}

// 设置cookie
function setCookie(name, value, days) {
    var expires = "";
    if (days) {
        var date = new Date();
        date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
        expires = "; expires=" + date.toUTCString();
    }
    document.cookie = name + "=" + (value || "") + expires + "; path=/";
}

// 获取cookie
function getCookie(name) {
    var nameEQ = name + "=";
    var ca = document.cookie.split(';');
    for (var i = 0; i < ca.length; i++) {
        var c = ca[i];
        while (c.charAt(0) == ' ') c = c.substring(1, c.length);
        if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length, c.length);
    }
    return null;
}


function part_1() {
    setTimeout(function () {
        let thList = document.getElementsByTagName('th')
        for (let i = 0; i < thList.length; i++) {
            // console.log(thList[i].innerText);
            // console.log(thList[i].innerText.includes("质"));
            if (thList[i].innerText.includes("质")) {
                let nextTh = thList[i].nextElementSibling;
                // console.log(thList[i])
                // console.log(thList[i].innerText);
                // console.log(nextTh.innerText);
                situationNameList.push(nextTh.innerText);// 添加资质内容
            }
            if (thList[i].innerText.includes("有效期")) {
                let nextTh = thList[i].nextElementSibling;
                // console.log(nextTh.innerText)
                situationTimeList.push(nextTh.innerText);// 添加资质过期时间
            }
        }
        for (let i = 0; i < situationNameList.length; i++) {
            let temp = situationNameList[i] + "---有效期" + situationTimeList[i];
            finalSituationList.push(temp);
        }
        console.log(finalSituationList)// 资质和到期时间
        console.log(situationTimeList[situationTimeList.length - 1]) //安装许可证日期
        let AX = "安许 有效期" + situationTimeList[situationTimeList.length - 1];
        finalSituationList.push(AX);
        // alert("执行了")
        /*  for (let i = 0; i < situationTimeList.length - 1; i++) { // 单独展示到期时间
              finalSituationTimeList.push(situationTimeList[i]);
          }*/
        finalSituationTimeList.push(situationTimeList[situationTimeList.length - 1]);  //安许到期时间

        let temp = situationTimeList; // 剔除安许,展示其他证书的到期时间
        situationTimeList = []
        for (let i = 0; i < temp.length - 1; i++) {
            situationTimeList.push(temp[i]);
        }
        setCookie("part_1_click", "1", 1);
        setCookie("situationNameList", JSON.stringify(situationNameList), 1);
        setCookie("situationTimeList", JSON.stringify(situationTimeList), 1);
        setCookie("finalSituationList", JSON.stringify(finalSituationList), 1);
        setCookie("finalSituationTimeList", JSON.stringify(finalSituationTimeList), 1);


    }, 2000)
}

function part_2() {
    setTimeout(function () {
        let aList = document.getElementsByTagName('a')
        for (let i = 0; i < aList.length; i++) {
            if (aList[i].innerText.includes("企业人员")) {
                console.log(aList[i].innerText)
                aList[i].click()
                setCookie("part_2_click", "1", 1);
            }
        }
    }, 3000)
}

function part_3() {
    setTimeout(function () {
        let tdList = document.getElementsByTagName('td')
        for (let i = 0; i < tdList.length; i++) {
            if (tdList[i].innerText == "三类人员") {
                // console.log(tdList[i].innerText)
                let upNode = tdList[i].previousElementSibling;
                // console.log(upNode.innerText) // 人名
                let nextNode = tdList[i].nextElementSibling;
                // console.log(nextNode.innerText);//证书登记
                // console.log(extractLetters(nextNode.innerText));
                let nextTwoNode = nextNode.nextElementSibling;
                let haveNode = nextTwoNode.nextElementSibling;
                console.log(haveNode.innerText);//证书有效期
                personList.push(upNode.innerText + "--" + extractLetters(nextNode.innerText) + "--" + haveNode.innerText);
            }
        }
        console.log(personList)
        setCookie("part_3_click", "1", 1);
        setCookie("personList", JSON.stringify(personList), 1)
        let companyList = JSON.parse(getCookie("mycompany"));
        let index = parseInt(getCookie("myindex")); //公司下标
        let obj = {
            "name": companyList[index],
            "situation": finalSituationList,
            "end": situationTimeList,
            "install": finalSituationTimeList,
            "person": personList
        }
        let ALL_GLOBAL_DATA_LIST = [];
        // part_3点击完毕就可以记录全局cookie的List了
        if (getCookie("ALL_GLOBAL_DATA_LIST") == null) {
            ALL_GLOBAL_DATA_LIST.push(obj);
            setCookie("ALL_GLOBAL_DATA_LIST", JSON.stringify(ALL_GLOBAL_DATA_LIST), 1);
        } else {
            ALL_GLOBAL_DATA_LIST = JSON.parse(getCookie("ALL_GLOBAL_DATA_LIST"));
            ALL_GLOBAL_DATA_LIST.push(obj);
            setCookie("ALL_GLOBAL_DATA_LIST", JSON.stringify(ALL_GLOBAL_DATA_LIST), 1);
        }
        console.log("全局数据动态记录" + getCookie("ALL_GLOBAL_DATA_LIST"));
        if (parseInt(getCookie("myindex")) < JSON.parse(getCookie("mycompany")).length - 1) {
            let temp = parseInt(getCookie("myindex"));
            temp = temp + 1;
            setCookie("myindex", temp + "", 1);
        } else {
            let list = JSON.parse(getCookie("ALL_GLOBAL_DATA_LIST"));
            list.forEach(item => {
                item.situation = item.situation.map(s => s.replace(/\n/g, ' '));
                item.situation = item.situation.map(s => s.replace(/\s+/g, ''));
                item.person = item.person.map(p => p.replace(/\n/g, ' '));
                item.person = item.person.map(p => p.replace(/\s+/g, ''));
            });
            let finalJson = []
            list.forEach(item => {
                let exportSituation = "";
                let tempArray = item.situation;
                for (let i = 0; i < tempArray.length; i++) {
                    exportSituation = exportSituation + tempArray[i] + "&&&&&&&&&&";
                }
                let exportEnd = "";
                tempArray = item.end;
                for (let i = 0; i < tempArray.length; i++) {
                    exportEnd = exportEnd + tempArray[i] + "&&&&&&&&&&";
                }
                let exportInstall = "";
                tempArray = item.install;
                for (let i = 0; i < tempArray.length; i++) {
                    exportInstall = exportInstall + tempArray[i] + "&&&&&&&&&&";
                }
                let exportPerson = "";
                tempArray = item.person;
                for (let i = 0; i < tempArray.length; i++) {
                    exportPerson = exportPerson + tempArray[i] + "&&&&&&&&&&";
                }
                tempJson = {
                    "name": item.name,
                    "exportSituation": exportSituation,
                    "exportEnd": exportEnd,
                    "exportInstall": exportInstall,
                    "exportPerson": exportPerson
                }
                finalJson.push(tempJson)
            });
            // console.log(JSON.stringify(finalJson))
            part_4(finalJson)
            return false; // 可以执行导出了
        }
        clearAllMark();
        setTimeout(function () {
            window.open("https://sjfw.scjs.net.cn:8801/xxgx/Enterprise/eList.aspx", "_self")
        }, 1000)
    }, 5000)

    function extractLetters(str) {
        return str.replace(/[^a-zA-Z]/g, '');
    }
}

function part_4(my_export_data) { // 触发导出
    finalClear();// 清除所有的cookie
    //要导出的json数据
    const jsonData = my_export_data;
    //列标题,逗号隔开,每一个逗号就是隔开一个单元格
    let str = `公司名称,资质情况,资质到期时间,安许到期时间,三类人员\n`;
    //增加\t为了不让表格显示科学计数法或者其他格式
    for (let i = 0; i < jsonData.length; i++) {
        for (let item in jsonData[i]) {
            str += `${jsonData[i][item] + '\t'},`;
        }
        str += '\n';
    }
    //encodeURIComponent解决中文乱码
    let uri = 'data:text/csv;charset=utf-8,\ufeff' + encodeURIComponent(str);
    //通过创建a标签实现
    let link = document.createElement("a");
    link.href = uri;
    //对下载的文件命名
    link.download = "本次数据导出的明细.csv";
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
}

// 清除cookie
function clearCookie(cookieName) {
    document.cookie = cookieName + "=; expires=" + new Date(0).toUTCString() + "; path=/";
}

function clearAllMark() {
    clearCookie("hasRun3");
    clearCookie("part_1_click");
    clearCookie("part_2_click");
    clearCookie("part_3_click");
    clearCookie("clickTime");
    console.log("清除所有标记cookie成功")
}

function finalClear() {
    clearCookie("hasRun3");
    clearCookie("part_1_click");
    clearCookie("part_2_click");
    clearCookie("part_3_click");
    clearCookie("clickTime");
    console.log("最后清除cookie成功")
    clearCookie("ALL_GLOBAL_DATA_LIST");
    clearCookie("mycompany");
    clearCookie("myindex");
}

五、最后利用JS拿到所有我们需要的数据后,我们就可以保存到Excel或者CSV中了。这样我们就顺利的实现了利用JS实现数字员工,模拟人的操作来获取网页数据,大大降低人工操作的成本了。下面这下数据都是全流程的自动化查询、自动化填充、自动化保存为Excel的。是不是非常赞!

相关推荐
微刻时光3 天前
RPA真的是人工智能吗?
人工智能·rpa
知数SEO21 天前
Automa入门教程详解(Automa工作流概述)
自动化·rpa·automa
荔枝学Python21 天前
一文总结AI智能体与传统RPA机器人的16个关键区别
人工智能·自然语言处理·机器人·大模型·agent·rpa·ai智能体
微刻时光24 天前
影刀RPA实战:常见实用功能指令
笔记·机器人·自动化·rpa·水印·影刀·影刀rpa
微刻时光1 个月前
影刀RPA实战番外:excel函数应用指南
笔记·自动化·excel·rpa·影刀·影刀rpa
微刻时光1 个月前
影刀RPA实战:网页爬虫之桌面壁纸图片
爬虫·python·scrapy·机器人·rpa·影刀·影刀rpa
法海爱捉虫1 个月前
手机淘宝自动下单退货自动化RPA脚本机器人
运维·自动化·rpa
金智维科技官方1 个月前
RPA好用吗?RPA机器人如何使用?
机器人·rpa
微刻时光1 个月前
影刀RPA实战:Excel数据透视表指令
低代码·数据分析·自动化·excel·rpa·影刀·影刀rpa
微刻时光1 个月前
影刀RPA实战:操作Mysql数据库
数据库·mysql·oracle·自动化·rpa·影刀·影刀rpa