获取贝壳中介列表,并且自动导入excel

javascript 复制代码
(async function() {
    console.log("脚本已启动:Tab格式输出(支持Excel直接粘贴),左上角控制暂停。");

    let isPaused = false;
    let count = 0;
    const seenUfids = new Set(); 

    // --- 1. 创建 UI 控制面板 ---
    const panel = document.createElement('div');
    panel.innerHTML = `
        <div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif;">
            <b style="display:block;margin-bottom:10px;">Excel 助手型抓取</b>
            <div style="font-size:12px;color:#666;margin-bottom:8px;">抓取后复制控制台内容到 Excel 即可</div>
            <div>已抓取: <span id="crawler-count" style="font-weight:bold;color:red;">0</span> 位</div>
            <button id="btn-pause" style="margin-top:10px;cursor:pointer;padding:5px 12px;border-radius:4px;border:1px solid #333;">暂停滚动</button>
        </div>
    `;
    document.body.appendChild(panel);

    const btnPause = document.getElementById('btn-pause');
    const countDisplay = document.getElementById('crawler-count');

    btnPause.onclick = () => {
        isPaused = !isPaused;
        btnPause.innerText = isPaused ? "恢复滚动" : "暂停滚动";
        btnPause.style.background = isPaused ? "#ffcccc" : "#f0f0f0";
    };

    // 打印 Excel 表头 (方便你第一次粘贴)
    console.warn("姓名\t学校\t服务公司\t服务年限\t客户评分\t贝壳分\t个人简介\t标签");

    // --- 2. 解析函数 ---
    function parseAndLog(node) {
        if (!node || !node.querySelector) return;
        const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
        if (!agentItem) return;

        const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
        if (id && seenUfids.has(id)) return;
        if (id) seenUfids.add(id);

        // 基础字段提取
        const name = agentItem.querySelector('.name')?.innerText || "-";
        const school = agentItem.querySelector('.school')?.innerText || "-";
        const company = agentItem.querySelector('.desc')?.innerText || "-";
        
        // 个人简介提取(包含清理多余换行)
        const intro = agentItem.querySelector('.agent-intro-content')?.innerText.replace("...更多", "").replace(/\n/g, " ").trim() || "-";

        // 分数提取
        const counts = agentItem.querySelectorAll('.count-item');
        let years = "0", rating = "-", score = "0";
        counts.forEach(c => {
            const title = c.querySelector('.count-title')?.innerText;
            const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
            if (title?.includes("服务年限")) years = val;
            if (title?.includes("客户评分")) rating = val;
            if (title?.includes("贝壳分")) score = val;
        });

        // 标签提取
        const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');

        count++;
        countDisplay.innerText = count;
        
        // 使用 \t (Tab) 分隔,Excel 识别此符号自动分列
        console.log(`${name}\t${school}\t${company}\t${years}\t${rating}\t${score}\t${intro}\t${tags}`);
    }

    // --- 3. 监听器 ---
    const observer = new MutationObserver((mutations) => {
        mutations.forEach(mutation => {
            mutation.addedNodes.forEach(node => {
                if (node.nodeType === 1) {
                    if (node.classList.contains('jingjiren-list-item')) {
                        parseAndLog(node);
                    } else {
                        node.querySelectorAll('.jingjiren-list-item').forEach(parseAndLog);
                    }
                }
            });
        });
    });

    observer.observe(document.body, { childList: true, subtree: true });
    document.querySelectorAll('.jingjiren-list-item').forEach(parseAndLog);

    // --- 4. 自动滚动 ---
    while (true) {
        if (!isPaused) {
            window.scrollBy(0, 180); 
            if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
                await new Promise(r => setTimeout(r, 2500)); 
                if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
                    console.log("--- 采集结束 ---");
                    break;
                }
            }
        }
        await new Promise(r => setTimeout(r, 150)); 
    }
})();

https://m.ke.com/nj/jingjiren/d320111/ao12sy0pg11

2.0版本直接导入excel

javascript 复制代码
(async function() {
    console.log("脚本已启动:支持一键导出 Excel (.csv),左上角控制。");

    let isPaused = false;
    let grabbedData = []; // 用于存储所有抓取到的对象
    const seenUfids = new Set(); 

    // --- 1. 创建 UI 控制面板 ---
    const panel = document.createElement('div');
    panel.innerHTML = `
        <div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif; min-width:150px;">
            <b style="display:block;margin-bottom:10px;border-bottom:1px solid #eee;">中介数据采集器</b>
            <div style="margin-bottom:10px;">已采集: <span id="crawler-count" style="font-weight:bold;color:red;font-size:18px;">0</span> 位</div>
            <button id="btn-pause" style="width:100%;margin-bottom:5px;cursor:pointer;padding:8px;background:#f0f0f0;border:1px solid #333;border-radius:4px;">暂停滚动</button>
            <button id="btn-export" style="width:100%;cursor:pointer;padding:8px;background:#28a745;color:white;border:none;border-radius:4px;font-weight:bold;">导出 Excel (CSV)</button>
        </div>
    `;
    document.body.appendChild(panel);

    const btnPause = document.getElementById('btn-pause');
    const btnExport = document.getElementById('btn-export');
    const countDisplay = document.getElementById('crawler-count');

    // 暂停/恢复逻辑
    btnPause.onclick = () => {
        isPaused = !isPaused;
        btnPause.innerText = isPaused ? "恢复滚动" : "暂停滚动";
        btnPause.style.background = isPaused ? "#ffcccc" : "#f0f0f0";
    };

    // --- 导出 CSV 逻辑 ---
    btnExport.onclick = () => {
        if (grabbedData.length === 0) return alert("还没有抓取到数据!");
        
        // CSV 表头
        const headers = ["姓名", "学校", "服务公司", "服务年限", "客户评分", "贝壳分", "个人简介", "标签"];
        
        // 将数据转换为 CSV 字符串(处理逗号和引号防止乱码)
        const csvContent = [
            "\ufeff" + headers.join(","), // 添加 BOM 头解决 Excel 打开中文乱码
            ...grabbedData.map(item => [
                `"${item.name}"`,
                `"${item.school}"`,
                `"${item.company}"`,
                `"${item.years}"`,
                `"${item.rating}"`,
                `"${item.score}"`,
                `"${item.intro.replace(/"/g, '""')}"`, // 处理简介里的双引号
                `"${item.tags}"`
            ].join(","))
        ].join("\n");

        // 创建下载链接
        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement("a");
        const url = URL.createObjectURL(blob);
        link.setAttribute("href", url);
        link.setAttribute("download", `中介数据汇总_${new Date().toLocaleDateString()}.csv`);
        link.style.visibility = 'hidden';
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    };

    // --- 2. 解析函数 ---
    function parseAndStore(node) {
        if (!node || !node.querySelector) return;
        const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
        if (!agentItem) return;

        const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
        if (id && seenUfids.has(id)) return;
        if (id) seenUfids.add(id);

        const name = agentItem.querySelector('.name')?.innerText || "-";
        const school = agentItem.querySelector('.school')?.innerText || "-";
        const company = agentItem.querySelector('.desc')?.innerText || "-";
        const intro = agentItem.querySelector('.agent-intro-content')?.innerText.replace("...更多", "").replace(/\s+/g, " ").trim() || "-";

        const counts = agentItem.querySelectorAll('.count-item');
        let years = "0", rating = "-", score = "0";
        counts.forEach(c => {
            const title = c.querySelector('.count-title')?.innerText;
            const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
            if (title?.includes("服务年限")) years = val;
            if (title?.includes("客户评分")) rating = val;
            if (title?.includes("贝壳分")) score = val;
        });

        const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');

        // 存入数组
        grabbedData.push({ name, school, company, years, rating, score, intro, tags });
        
        countDisplay.innerText = grabbedData.length;
        console.log(`已采集: ${name}`);
    }

    // --- 3. 监听器 ---
    const observer = new MutationObserver((mutations) => {
        mutations.forEach(mutation => {
            mutation.addedNodes.forEach(node => {
                if (node.nodeType === 1) {
                    if (node.classList.contains('jingjiren-list-item')) {
                        parseAndStore(node);
                    } else {
                        node.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
                    }
                }
            });
        });
    });

    observer.observe(document.body, { childList: true, subtree: true });
    document.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);

    // --- 4. 自动滚动 ---
    while (true) {
        if (!isPaused) {
            window.scrollBy(0, 200); 
            if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
                await new Promise(r => setTimeout(r, 2000)); 
                if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
                    console.log("--- 采集结束 ---");
                    alert("全部采集完成,点击导出按钮即可下载!");
                    break;
                }
            }
        }
        await new Promise(r => setTimeout(r, 100)); 
    }
})();

3.0快速导出

javascript 复制代码
(async function() {
    console.log("脚本已启动:增加品牌/区域/店名拆分,优化简介提取。");

    let isPaused = false;
    let grabbedData = []; 
    const seenUfids = new Set(); 

    // --- 1. UI 面板 ---
    const panel = document.createElement('div');
    panel.innerHTML = `
        <div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif; min-width:180px;">
            <b style="display:block;margin-bottom:10px;border-bottom:1px solid #eee;">🏠 房产中介全能采集</b>
            <div style="margin-bottom:10px;">已采集: <span id="crawler-count" style="font-weight:bold;color:red;font-size:20px;">0</span></div>
            <div id="crawler-status" style="font-size:12px;color:blue;margin-bottom:5px;">正在采集...</div>
            <button id="btn-pause" style="width:100%;margin-bottom:5px;cursor:pointer;padding:8px;background:#f0f0f0;border:1px solid #333;border-radius:4px;">暂停/恢复</button>
            <button id="btn-export" style="width:100%;cursor:pointer;padding:8px;background:#28a745;color:white;border:none;border-radius:4px;font-weight:bold;">导出 Excel (CSV)</button>
        </div>
    `;
    document.body.appendChild(panel);

    const btnPause = document.getElementById('btn-pause');
    const btnExport = document.getElementById('btn-export');
    const countDisplay = document.getElementById('crawler-count');
    const statusDisplay = document.getElementById('crawler-status');

    btnPause.onclick = () => { isPaused = !isPaused; statusDisplay.innerText = isPaused ? "已暂停" : "正在采集..."; };

    btnExport.onclick = () => {
        const headers = ["姓名", "学校", "品牌", "区域", "店名", "全称", "服务年限", "客户评分", "贝壳分", "个人简介", "标签"];
        const csvContent = ["\ufeff" + headers.join(","), ...grabbedData.map(i => [
            `"${i.name}"`,`"${i.school}"`,`"${i.brand}"`,`"${i.area}"`,`"${i.store}"`,`"${i.fullName}"`,`"${i.years}"`,`"${i.rating}"`,`"${i.score}"`,`"${i.intro.replace(/"/g, '""')}"`,`"${i.tags}"`
        ].join(","))].join("\n");
        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement("a");
        link.href = URL.createObjectURL(blob);
        link.download = `中介数据明细_${new Date().getTime()}.csv`;
        link.click();
    };

    // --- 2. 解析逻辑 ---
    function parseAndStore(node) {
        if (!node || !node.querySelector) return;
        const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
        if (!agentItem) return;

        const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
        if (!id || seenUfids.has(id)) return;
        seenUfids.add(id);

        const name = agentItem.querySelector('.name')?.innerText.trim() || "-";
        const school = agentItem.querySelector('.school')?.innerText.trim() || "-";
        
        // --- 拆分公司、区域、店名 ---
        const fullDesc = agentItem.querySelector('.desc')?.innerText.trim() || "-";
        let brand = "-", area = "-", store = "-";
        if (fullDesc !== "-") {
            const parts = fullDesc.split('·');
            brand = parts[0] || "-";
            area = parts[1] || "-";
            store = parts[2] || "-";
        }

        // --- 优化简介提取 (处理换行) ---
        const introElem = agentItem.querySelector('.agent-intro-content');
        let intro = "-";
        if (introElem) {
            // 克隆一个节点防止影响页面,移除"...更多"字样
            const tempDiv = introElem.cloneNode(true);
            const moreTag = tempDiv.querySelector('.more');
            if (moreTag) moreTag.remove();
            // 将换行替换为空格,避免 CSV 换行导致格式错乱
            intro = tempDiv.innerText.replace(/\s+/g, ' ').trim();
        }

        const counts = agentItem.querySelectorAll('.count-item');
        let years = "0", rating = "-", score = "0";
        counts.forEach(c => {
            const title = c.querySelector('.count-title')?.innerText;
            const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
            if (title?.includes("服务年限")) years = val;
            if (title?.includes("客户评分")) rating = val;
            if (title?.includes("贝壳分")) score = val;
        });

        const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');

        grabbedData.push({ name, school, brand, area, store, fullName: fullDesc, years, rating, score, intro, tags });
        countDisplay.innerText = grabbedData.length;
    }

    // 监听 DOM 加载
    const observer = new MutationObserver(muts => muts.forEach(m => m.addedNodes.forEach(n => {
        if (n.nodeType === 1) {
            if (n.classList.contains('jingjiren-list-item')) parseAndStore(n);
            else n.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
        }
    })));
    observer.observe(document.body, { childList: true, subtree: true });
    document.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);

    // --- 3. 智能滚动逻辑 ---
    let retryCount = 0;
    const MAX_RETRIES = 5;

    while (true) {
        if (!isPaused) {
            window.scrollBy(0, 1000); 
            let currentScroll = window.innerHeight + window.scrollY;
            let totalHeight = document.documentElement.scrollHeight;

            if (currentScroll >= totalHeight - 150) {
                retryCount++;
                statusDisplay.innerText = `触底重试 (${retryCount}/${MAX_RETRIES})...`;
                await new Promise(r => setTimeout(r, 2000));
                if (document.documentElement.scrollHeight > totalHeight) {
                    retryCount = 0;
                }
                if (retryCount >= MAX_RETRIES) {
                    statusDisplay.innerText = "采集结束";
                    alert(`采集完成,共抓取 ${grabbedData.length} 条。`);
                    break;
                }
            } else {
                retryCount = 0;
            }
        }
        await new Promise(r => setTimeout(r, 100)); 
    }
})();
相关推荐
特种加菲猫2 小时前
C++ 容器适配器揭秘:stack, queue 和 priority_queue 的模拟实现
开发语言·c++
布局呆星2 小时前
Vue3 笔记:过渡动画与自定义指令
javascript·css·vue.js·python·es6·html5
M ? A2 小时前
Vue Suspense 组件在 React 中,VuReact 会如何实现?
前端·javascript·vue.js·经验分享·react.js·面试·vureact
im_AMBER2 小时前
Leetcode 159 无重复字符的最长子串 | 长度最小的子数组
javascript·数据结构·学习·算法·leetcode
筱璦2 小时前
C#期货分仓、策略交易模拟演示系统(含资源下载)
开发语言·c#·策略模式·量化交易·期货交易
天才熊猫君2 小时前
通用 Loading 状态管理器
前端·javascript·vue.js
froginwe112 小时前
Highcharts 测量图:全面解析与优化实践
开发语言
雪芽蓝域zzs2 小时前
uni-app x 使用 UTS 语言使用 mixins
开发语言·javascript·uni-app
DaqunChen2 小时前
全栈开发的演变:从LAMP到MEAN再到现代JavaScript
开发语言·javascript·ecmascript