javascript
(async function() {
console.log("脚本已启动:Tab格式输出(支持Excel直接粘贴),左上角控制暂停。");
let isPaused = false;
let count = 0;
const seenUfids = new Set();
// --- 1. 创建 UI 控制面板 ---
const panel = document.createElement('div');
panel.innerHTML = `
<div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif;">
<b style="display:block;margin-bottom:10px;">Excel 助手型抓取</b>
<div style="font-size:12px;color:#666;margin-bottom:8px;">抓取后复制控制台内容到 Excel 即可</div>
<div>已抓取: <span id="crawler-count" style="font-weight:bold;color:red;">0</span> 位</div>
<button id="btn-pause" style="margin-top:10px;cursor:pointer;padding:5px 12px;border-radius:4px;border:1px solid #333;">暂停滚动</button>
</div>
`;
document.body.appendChild(panel);
const btnPause = document.getElementById('btn-pause');
const countDisplay = document.getElementById('crawler-count');
btnPause.onclick = () => {
isPaused = !isPaused;
btnPause.innerText = isPaused ? "恢复滚动" : "暂停滚动";
btnPause.style.background = isPaused ? "#ffcccc" : "#f0f0f0";
};
// 打印 Excel 表头 (方便你第一次粘贴)
console.warn("姓名\t学校\t服务公司\t服务年限\t客户评分\t贝壳分\t个人简介\t标签");
// --- 2. 解析函数 ---
function parseAndLog(node) {
if (!node || !node.querySelector) return;
const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
if (!agentItem) return;
const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
if (id && seenUfids.has(id)) return;
if (id) seenUfids.add(id);
// 基础字段提取
const name = agentItem.querySelector('.name')?.innerText || "-";
const school = agentItem.querySelector('.school')?.innerText || "-";
const company = agentItem.querySelector('.desc')?.innerText || "-";
// 个人简介提取(包含清理多余换行)
const intro = agentItem.querySelector('.agent-intro-content')?.innerText.replace("...更多", "").replace(/\n/g, " ").trim() || "-";
// 分数提取
const counts = agentItem.querySelectorAll('.count-item');
let years = "0", rating = "-", score = "0";
counts.forEach(c => {
const title = c.querySelector('.count-title')?.innerText;
const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
if (title?.includes("服务年限")) years = val;
if (title?.includes("客户评分")) rating = val;
if (title?.includes("贝壳分")) score = val;
});
// 标签提取
const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');
count++;
countDisplay.innerText = count;
// 使用 \t (Tab) 分隔,Excel 识别此符号自动分列
console.log(`${name}\t${school}\t${company}\t${years}\t${rating}\t${score}\t${intro}\t${tags}`);
}
// --- 3. 监听器 ---
const observer = new MutationObserver((mutations) => {
mutations.forEach(mutation => {
mutation.addedNodes.forEach(node => {
if (node.nodeType === 1) {
if (node.classList.contains('jingjiren-list-item')) {
parseAndLog(node);
} else {
node.querySelectorAll('.jingjiren-list-item').forEach(parseAndLog);
}
}
});
});
});
observer.observe(document.body, { childList: true, subtree: true });
document.querySelectorAll('.jingjiren-list-item').forEach(parseAndLog);
// --- 4. 自动滚动 ---
while (true) {
if (!isPaused) {
window.scrollBy(0, 180);
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
await new Promise(r => setTimeout(r, 2500));
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
console.log("--- 采集结束 ---");
break;
}
}
}
await new Promise(r => setTimeout(r, 150));
}
})();
https://m.ke.com/nj/jingjiren/d320111/ao12sy0pg11
2.0版本直接导入excel
javascript
(async function() {
console.log("脚本已启动:支持一键导出 Excel (.csv),左上角控制。");
let isPaused = false;
let grabbedData = []; // 用于存储所有抓取到的对象
const seenUfids = new Set();
// --- 1. 创建 UI 控制面板 ---
const panel = document.createElement('div');
panel.innerHTML = `
<div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif; min-width:150px;">
<b style="display:block;margin-bottom:10px;border-bottom:1px solid #eee;">中介数据采集器</b>
<div style="margin-bottom:10px;">已采集: <span id="crawler-count" style="font-weight:bold;color:red;font-size:18px;">0</span> 位</div>
<button id="btn-pause" style="width:100%;margin-bottom:5px;cursor:pointer;padding:8px;background:#f0f0f0;border:1px solid #333;border-radius:4px;">暂停滚动</button>
<button id="btn-export" style="width:100%;cursor:pointer;padding:8px;background:#28a745;color:white;border:none;border-radius:4px;font-weight:bold;">导出 Excel (CSV)</button>
</div>
`;
document.body.appendChild(panel);
const btnPause = document.getElementById('btn-pause');
const btnExport = document.getElementById('btn-export');
const countDisplay = document.getElementById('crawler-count');
// 暂停/恢复逻辑
btnPause.onclick = () => {
isPaused = !isPaused;
btnPause.innerText = isPaused ? "恢复滚动" : "暂停滚动";
btnPause.style.background = isPaused ? "#ffcccc" : "#f0f0f0";
};
// --- 导出 CSV 逻辑 ---
btnExport.onclick = () => {
if (grabbedData.length === 0) return alert("还没有抓取到数据!");
// CSV 表头
const headers = ["姓名", "学校", "服务公司", "服务年限", "客户评分", "贝壳分", "个人简介", "标签"];
// 将数据转换为 CSV 字符串(处理逗号和引号防止乱码)
const csvContent = [
"\ufeff" + headers.join(","), // 添加 BOM 头解决 Excel 打开中文乱码
...grabbedData.map(item => [
`"${item.name}"`,
`"${item.school}"`,
`"${item.company}"`,
`"${item.years}"`,
`"${item.rating}"`,
`"${item.score}"`,
`"${item.intro.replace(/"/g, '""')}"`, // 处理简介里的双引号
`"${item.tags}"`
].join(","))
].join("\n");
// 创建下载链接
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement("a");
const url = URL.createObjectURL(blob);
link.setAttribute("href", url);
link.setAttribute("download", `中介数据汇总_${new Date().toLocaleDateString()}.csv`);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
};
// --- 2. 解析函数 ---
function parseAndStore(node) {
if (!node || !node.querySelector) return;
const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
if (!agentItem) return;
const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
if (id && seenUfids.has(id)) return;
if (id) seenUfids.add(id);
const name = agentItem.querySelector('.name')?.innerText || "-";
const school = agentItem.querySelector('.school')?.innerText || "-";
const company = agentItem.querySelector('.desc')?.innerText || "-";
const intro = agentItem.querySelector('.agent-intro-content')?.innerText.replace("...更多", "").replace(/\s+/g, " ").trim() || "-";
const counts = agentItem.querySelectorAll('.count-item');
let years = "0", rating = "-", score = "0";
counts.forEach(c => {
const title = c.querySelector('.count-title')?.innerText;
const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
if (title?.includes("服务年限")) years = val;
if (title?.includes("客户评分")) rating = val;
if (title?.includes("贝壳分")) score = val;
});
const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');
// 存入数组
grabbedData.push({ name, school, company, years, rating, score, intro, tags });
countDisplay.innerText = grabbedData.length;
console.log(`已采集: ${name}`);
}
// --- 3. 监听器 ---
const observer = new MutationObserver((mutations) => {
mutations.forEach(mutation => {
mutation.addedNodes.forEach(node => {
if (node.nodeType === 1) {
if (node.classList.contains('jingjiren-list-item')) {
parseAndStore(node);
} else {
node.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
}
}
});
});
});
observer.observe(document.body, { childList: true, subtree: true });
document.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
// --- 4. 自动滚动 ---
while (true) {
if (!isPaused) {
window.scrollBy(0, 200);
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
await new Promise(r => setTimeout(r, 2000));
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - 5) {
console.log("--- 采集结束 ---");
alert("全部采集完成,点击导出按钮即可下载!");
break;
}
}
}
await new Promise(r => setTimeout(r, 100));
}
})();
3.0快速导出
javascript
(async function() {
console.log("脚本已启动:增加品牌/区域/店名拆分,优化简介提取。");
let isPaused = false;
let grabbedData = [];
const seenUfids = new Set();
// --- 1. UI 面板 ---
const panel = document.createElement('div');
panel.innerHTML = `
<div id="crawler-ctrl" style="position:fixed;top:20px;left:20px;z-index:9999;padding:15px;background:#fff;border:2px solid #000;box-shadow:5px 5px 0px #888;font-family:sans-serif; min-width:180px;">
<b style="display:block;margin-bottom:10px;border-bottom:1px solid #eee;">🏠 房产中介全能采集</b>
<div style="margin-bottom:10px;">已采集: <span id="crawler-count" style="font-weight:bold;color:red;font-size:20px;">0</span></div>
<div id="crawler-status" style="font-size:12px;color:blue;margin-bottom:5px;">正在采集...</div>
<button id="btn-pause" style="width:100%;margin-bottom:5px;cursor:pointer;padding:8px;background:#f0f0f0;border:1px solid #333;border-radius:4px;">暂停/恢复</button>
<button id="btn-export" style="width:100%;cursor:pointer;padding:8px;background:#28a745;color:white;border:none;border-radius:4px;font-weight:bold;">导出 Excel (CSV)</button>
</div>
`;
document.body.appendChild(panel);
const btnPause = document.getElementById('btn-pause');
const btnExport = document.getElementById('btn-export');
const countDisplay = document.getElementById('crawler-count');
const statusDisplay = document.getElementById('crawler-status');
btnPause.onclick = () => { isPaused = !isPaused; statusDisplay.innerText = isPaused ? "已暂停" : "正在采集..."; };
btnExport.onclick = () => {
const headers = ["姓名", "学校", "品牌", "区域", "店名", "全称", "服务年限", "客户评分", "贝壳分", "个人简介", "标签"];
const csvContent = ["\ufeff" + headers.join(","), ...grabbedData.map(i => [
`"${i.name}"`,`"${i.school}"`,`"${i.brand}"`,`"${i.area}"`,`"${i.store}"`,`"${i.fullName}"`,`"${i.years}"`,`"${i.rating}"`,`"${i.score}"`,`"${i.intro.replace(/"/g, '""')}"`,`"${i.tags}"`
].join(","))].join("\n");
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement("a");
link.href = URL.createObjectURL(blob);
link.download = `中介数据明细_${new Date().getTime()}.csv`;
link.click();
};
// --- 2. 解析逻辑 ---
function parseAndStore(node) {
if (!node || !node.querySelector) return;
const agentItem = node.classList.contains('jingjiren-list-item') ? node : node.closest('.jingjiren-list-item');
if (!agentItem) return;
const id = agentItem.querySelector('.jingjiren-list__agent-item')?.getAttribute('data-id');
if (!id || seenUfids.has(id)) return;
seenUfids.add(id);
const name = agentItem.querySelector('.name')?.innerText.trim() || "-";
const school = agentItem.querySelector('.school')?.innerText.trim() || "-";
// --- 拆分公司、区域、店名 ---
const fullDesc = agentItem.querySelector('.desc')?.innerText.trim() || "-";
let brand = "-", area = "-", store = "-";
if (fullDesc !== "-") {
const parts = fullDesc.split('·');
brand = parts[0] || "-";
area = parts[1] || "-";
store = parts[2] || "-";
}
// --- 优化简介提取 (处理换行) ---
const introElem = agentItem.querySelector('.agent-intro-content');
let intro = "-";
if (introElem) {
// 克隆一个节点防止影响页面,移除"...更多"字样
const tempDiv = introElem.cloneNode(true);
const moreTag = tempDiv.querySelector('.more');
if (moreTag) moreTag.remove();
// 将换行替换为空格,避免 CSV 换行导致格式错乱
intro = tempDiv.innerText.replace(/\s+/g, ' ').trim();
}
const counts = agentItem.querySelectorAll('.count-item');
let years = "0", rating = "-", score = "0";
counts.forEach(c => {
const title = c.querySelector('.count-title')?.innerText;
const val = c.querySelector('.small')?.innerText.replace(/\s+/g, '');
if (title?.includes("服务年限")) years = val;
if (title?.includes("客户评分")) rating = val;
if (title?.includes("贝壳分")) score = val;
});
const tags = Array.from(agentItem.querySelectorAll('.agent-label')).map(t => t.innerText).join(' | ');
grabbedData.push({ name, school, brand, area, store, fullName: fullDesc, years, rating, score, intro, tags });
countDisplay.innerText = grabbedData.length;
}
// 监听 DOM 加载
const observer = new MutationObserver(muts => muts.forEach(m => m.addedNodes.forEach(n => {
if (n.nodeType === 1) {
if (n.classList.contains('jingjiren-list-item')) parseAndStore(n);
else n.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
}
})));
observer.observe(document.body, { childList: true, subtree: true });
document.querySelectorAll('.jingjiren-list-item').forEach(parseAndStore);
// --- 3. 智能滚动逻辑 ---
let retryCount = 0;
const MAX_RETRIES = 5;
while (true) {
if (!isPaused) {
window.scrollBy(0, 1000);
let currentScroll = window.innerHeight + window.scrollY;
let totalHeight = document.documentElement.scrollHeight;
if (currentScroll >= totalHeight - 150) {
retryCount++;
statusDisplay.innerText = `触底重试 (${retryCount}/${MAX_RETRIES})...`;
await new Promise(r => setTimeout(r, 2000));
if (document.documentElement.scrollHeight > totalHeight) {
retryCount = 0;
}
if (retryCount >= MAX_RETRIES) {
statusDisplay.innerText = "采集结束";
alert(`采集完成,共抓取 ${grabbedData.length} 条。`);
break;
}
} else {
retryCount = 0;
}
}
await new Promise(r => setTimeout(r, 100));
}
})();