🛒 商品评论 API 采集 + 客户画像分析方案
一、API 接口一览:一键获取商品评论
| 平台 | API/数据源 | 接口方式 | 难度 | 免费额度 |
|---|---|---|---|---|
| 淘宝/天猫 | 淘宝开放平台 taobao.item.review.get |
REST API | ⭐⭐⭐ | 5000次/天 |
| 京东 | 京东宙斯系统 jd.union.open.goods.review.list.get |
REST API | ⭐⭐⭐ | 500次/天 |
| 拼多多 | PDD Open Platform pdd.ddk.goods.review.list.get |
REST API | ⭐⭐⭐ | 500次/天 |
| 抖音电商 | 抖音开放平台 /api/apps/trade/order/list |
REST API | ⭐⭐⭐⭐ | 有限 |
| 小红书 | 第三方爬虫/API(如:红研社、千瓜) | 爬虫/SaaS | ⭐⭐ | 付费 |
| 1688 | 1688开放平台 alibaba.item.review.get |
REST API | ⭐⭐ | 3000次/天 |
| 通用方案 | 八卦数据、快普、 nigho 等第三方聚合API | 一键接口 | ⭐ | 付费 |
二、快速上手:Python 示例代码
1️⃣ 淘宝评论 API(官方)
python
`import requests
import hashlib
import time
APP_KEY = "your_app_key"
APP_SECRET = "your_app_secret"
SESSION = "your_session"
def get_taobao_reviews(item_id, page=1, page_size=20):
url = "https://gw.api.taobao.com/router/rest"
params = {
"method": "taobao.item.review.get",
"app_key": APP_KEY,
"session": SESSION,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"format": "json",
"v": "2.0",
"sign_method": "md5",
"item_id": item_id,
"page_no": page,
"page_size": page_size,
"rate_type": "getTime", # 按时间排序
"fields": "tid,nick,created,content,rate,sku_id,user_level,auction_sku",
}
# 签名逻辑(省略,需按淘宝规范生成 sign)
response = requests.get(url, params=params)
return response.json()
# 调用
reviews = get_taobao_reviews("652345678901", page=1, page_size=100)
print(f"获取到 {len(reviews)} 条评论")
`
2️⃣ 京东评论 API(官方)
python
`import requests
import hashlib
import time
APP_KEY = "your_app_key"
APP_SECRET = "your_app_secret"
ACCESS_TOKEN = "your_token"
def get_jd_reviews(sku_id, page=0, page_size=10):
url = "https://api.jd.com/routerjson"
params = {
"method": "jd.union.open.goods.review.list.get",
"app_key": APP_KEY,
"access_token": ACCESS_TOKEN,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"format": "json",
"v": "1.0",
"sign_method": "md5",
"skuId": str(sku_id),
"pageIndex": page,
"pageSize": page_size,
"sortType": 5, # 5=时间倒序
}
# 签名...
resp = requests.get(url, params=params)
return resp.json()
reviews = get_jd_reviews(100012345678, page=0, page_size=50)
`
3️⃣ 第三方聚合 API(最简单,推荐)
python
`import requests
# 示例:使用 "八卦数据" 聚合API
API_KEY = "your_api_key"
def get_reviews_simple(platform, item_id, page=1):
url = f"https://api.baguapi.com/{platform}/reviews"
headers = {"Authorization": f"Bearer {API_KEY}"}
params = {
"item_id": item_id,
"page": page,
"page_size": 100,
"sort": "time_desc",
"include": "content,nick,rate,images,sku,time",
}
resp = requests.get(url, headers=headers, params=params)
return resp.json()
# 一键获取淘宝评论
data = get_reviews_simple("taobao", "652345678901")
print(f"共 {data['total']} 条评论,本页 {len(data['list'])} 条")
for r in data['list'][:3]:
print(f"{r['nick']} | {r['rate']}⭐ | {r['content'][:30]}...")
`
三、评论数据结构(标准化)
json
`{
"item_id": "652345678901",
"reviews": [
{
"user_id": "u_123456",
"nick": "数码发烧友小王",
"avatar": "https://...",
"rate": 5,
"content": "屏幕素质很好,色准 ΔE<2,做设计完全够用,就是风扇声音有点大...",
"sku": "i9-14900K + RTX4070",
"images": ["img1.jpg", "img2.jpg"],
"created_at": "2024-03-15 14:23:00",
"user_level": "VIP3",
"purchase_count": 12,
"tags": ["屏幕好", "风扇吵", "性价比高"]
}
]
}
`
四、🧠 客户画像分析引擎
分析维度全景图
`┌─────────────────────────────────────────────────┐
│ 客户画像分析引擎 │
├──────────┬──────────┬──────────┬────────────────┤
│ 基础属性 │ 行为特征 │ 情感倾向 │ 消费能力 │
│ ·年龄估计 │ ·购买频次 │ ·正面/负面 │ ·会员等级 │
│ ·性别推断 │ ·活跃时段 │ ·情感得分 │ ·客单价区间 │
│ ·地域分布 │ ·浏览深度 │ ·关键词云 │ ·价格敏感度 │
│ ·设备型号 │ ·复购率 │ ·NLP情感 │ ·品牌偏好 │
└──────────┴──────────┴──────────┴────────────────┘
`
核心分析代码
python
`import jieba
import jieba.analyse
from collections import Counter
import re
from datetime import datetime
import pandas as pd
class ReviewAnalyzer:
def __init__(self, reviews):
self.reviews = reviews
self.df = pd.DataFrame(reviews)
# ========== 1. 基础画像 ==========
def demographic_profile(self):
"""年龄/性别/地域推断"""
profile = {
"age_distribution": {},
"gender_hint": {"male": 0, "female": 0, "unknown": 0},
"region_hint": Counter(),
}
age_keywords = {
"学生": (18, 25), "大学": (18, 23), "上班": (25, 40),
"中年": (40, 55), "退休": (55, 70), "孩子": (25, 45),
"送老婆": (25, 40), "给爸妈": (30, 50), "开学": (18, 22),
}
gender_keywords = {
"male": ["兄弟", "老哥", "直男", "游戏", "帧数", "散热", "性能"],
"female": ["姐妹", "颜值", "轻薄", "粉色", "好看", "便携", "续航"],
}
for r in self.reviews:
content = r.get('content', '')
# 年龄推断
age_found = False
for keyword, (low, high) in age_keywords.items():
if keyword in content:
mid = (low + high) // 2
profile["age_distribution"][mid] = \
profile["age_distribution"].get(mid, 0) + 1
age_found = True
if not age_found:
profile["age_distribution"][30] = \
profile["age_distribution"].get(30, 0) + 1
# 性别推断
male_score = sum(1 for k in gender_keywords["male"] if k in content)
female_score = sum(1 for k in gender_keywords["female"] if k in content)
if male_score > female_score:
profile["gender_hint"]["male"] += 1
elif female_score > male_score:
profile["gender_hint"]["female"] += 1
else:
profile["gender_hint"]["unknown"] += 1
# 地域(从收货地/IP/内容推断)
region = r.get('region', '未知')
if region != '未知':
profile["region_hint"][region] += 1
return profile
# ========== 2. 消费能力分析 ==========
def purchase_power(self):
"""基于评论内容推断消费能力"""
power = {
"price_sensitivity": 0, # 价格敏感度 0-1
"brand_loyalty": 0,
"premium_preference": 0,
"total_reviews": len(self.reviews),
}
price_keywords = ["贵", "便宜", "性价比", "划算", "值", "溢价", "智商税", "剁手"]
brand_keywords = ["信仰", "御三家", "永远的神", "yyds", "闭眼入", "无脑入"]
premium_keywords = ["旗舰", "顶配", "Pro Max", "Ultra", "一步到位", "不差钱"]
for r in self.reviews:
content = r.get('content', '')
power["price_sensitivity"] += sum(1 for k in price_keywords if k in content)
power["brand_loyalty"] += sum(1 for k in brand_keywords if k in content)
power["premium_preference"] += sum(1 for k in premium_keywords if k in content)
# 归一化
n = len(self.reviews)
power["price_sensitivity"] /= (n * 3) # 最多3个关键词命中
power["brand_loyalty"] /= (n * 2)
power["premium_preference"] /= (n * 2)
# 综合判断
if power["premium_preference"] > 0.3:
power["level"] = "高消费(旗舰偏好)"
elif power["price_sensitivity"] > 0.4:
power["level"] = "价格敏感型"
elif power["brand_loyalty"] > 0.3:
power["level"] = "品牌忠诚型"
else:
power["level"] = "理性平衡型"
return power
# ========== 3. 情感分析 ==========
def sentiment_analysis(self):
"""NLP 情感分析 + 关键词提取"""
positive_words = ["好", "棒", "牛", "强", "满意", "推荐", "喜欢", "不错", "流畅", "清晰"]
negative_words = ["差", "烂", "卡", "吵", "热", "重", "失望", "退货", "翻车", "缩水"]
sentiments = []
keywords = []
for r in self.reviews:
content = r.get('content', '')
rate = r.get('rate', 3)
# 文本情感
text_pos = sum(1 for w in positive_words if w in content)
text_neg = sum(1 for w in negative_words if w in content)
text_score = (text_pos - text_neg) / max(text_pos + text_neg, 1)
# 综合评分(文本40% + 评分60%)
final_score = text_score * 0.4 + (rate - 3) / 2 * 0.6
sentiments.append({
"user": r.get('nick', ''),
"rate": rate,
"text_score": round(text_score, 2),
"final_score": round(final_score, 2),
"label": "正面" if final_score > 0.2 else ("负面" if final_score < -0.2 else "中性"),
})
# 关键词提取
keywords.extend(jieba.cut(content))
# 关键词统计
keyword_freq = Counter(keywords)
# 过滤停用词
stopwords = set(["的", "了", "是", "在", "我", "有", "和", "就", "不", "人", "都", "一", "一个", "上", "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好", "自己", "这"])
filtered = {k: v for k, v in keyword_freq.items() if k not in stopwords and len(k) >= 2}
top_keywords = dict(sorted(filtered.items(), key=lambda x: x[1], reverse=True)[:30])
return {
"sentiments": sentiments,
"positive_rate": sum(1 for s in sentiments if s["label"] == "正面") / len(sentiments),
"negative_rate": sum(1 for s in sentiments if s["label"] == "负面") / len(sentiments),
"avg_score": sum(s["final_score"] for s in sentiments) / len(sentiments),
"top_keywords": top_keywords,
}
# ========== 4. 生成客户画像报告 ==========
def generate_profile(self):
demo = self.demographic_profile()
power = self.purchase_power()
senti = self.sentiment_analysis()
report = {
"📊 商品评论总数": len(self.reviews),
"👤 人群画像": {
"主要年龄段": max(demo["age_distribution"], key=demo["age_distribution"].get),
"性别倾向": max(demo["gender_hint"], key=demo["gender_hint"].get),
"地域TOP5": dict(demo["region_hint"].most_common(5)),
},
"💰 消费能力": {
"消费类型": power["level"],
"价格敏感度": f"{power['price_sensitivity']:.0%}",
"品牌忠诚度": f"{power['brand_loyalty']:.0%}",
"旗舰偏好度": f"{power['premium_preference']:.0%}",
},
"😊 情感分析": {
"正面评价率": f"{senti['positive_rate']:.1%}",
"负面评价率": f"{senti['negative_rate']:.1%}",
"综合情感得分": round(senti["avg_score"], 2),
"高频关键词": senti["top_keywords"],
},
"🔥 核心关注点(词云TOP10)": list(senti["top_keywords"].keys())[:10],
}
return report
# ========== 使用示例 ==========
reviews_data = [
{"nick": "极客老张", "rate": 5, "content": "i9-14900K 性能炸裂,多核跑分40000+,散热用360水冷稳稳的,就是功耗有点高", "region": "广东"},
{"nick": "设计狮小李", "rate": 4, "content": "屏幕色准很好,适合做设计,但风扇声音大,价格偏贵,性价比一般", "region": "北京"},
{"nick": "学生党小王", "rate": 3, "content": "预算有限买的,性能还行但散热压不住,玩游戏降频,不太推荐学生买", "region": "四川"},
{"nick": "程序员老赵", "rate": 5, "content": "编译速度飞起,32线程不是盖的,值得入手,信仰加成", "region": "浙江"},
{"nick": "宝妈阿姨", "rate": 2, "content": "给儿子买的,太重了搬不动,而且太贵了,退货了", "region": "上海"},
]
analyzer = ReviewAnalyzer(reviews_data)
profile = analyzer.generate_profile()
import json
print(json.dumps(profile, ensure_ascii=False, indent=2))
`
输出示例
json
`{
"商品评论总数": 5,
"人群画像": {
"主要年龄段": 30,
"性别倾向": "male",
"地域TOP5": {"广东": 1, "北京": 1, "四川": 1, "浙江": 1, "上海": 1}
},
"消费能力": {
"消费类型": "品牌忠诚型",
"价格敏感度": "33%",
"品牌忠诚度": "40%",
"旗舰偏好度": "20%"
},
"情感分析": {
"正面评价率": "60.0%",
"负面评价率": "20.0%",
"综合情感得分": 0.52,
"高频关键词": {"性能": 3, "散热": 2, "价格": 2, "屏幕": 1, "风扇": 1, ...}
},
"核心关注点": ["性能", "散热", "价格", "屏幕", "风扇", "跑分", "水冷", "降频", "色准", "编译"]
}
`
五、📈 可视化客户画像看板
python
`import matplotlib.pyplot as plt
from wordcloud import WordCloud
def visualize_profile(profile):
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('商品评论客户画像分析看板', fontsize=18, fontweight='bold')
# 1. 情感分布饼图
axes[0,0].pie(
[profile["情感分析"]["正面评价率"],
profile["情感分析"]["负面评价率"],
1 - profile["情感分析"]["正面评价率"] - profile["情感分析"]["负面评价率"]],
labels=["正面", "负面", "中性"],
colors=["#2ecc71", "#e74c3c", "#95a5a6"],
autopct='%1.1f%%'
)
axes[0,0].set_title('评价情感分布')
# 2. 消费类型柱状图
types = list(profile["消费能力"].keys())
if "消费类型" in types:
types.remove("消费类型")
values = [profile["消费能力"][t] for t in types]
axes[0,1].barh(types, values, color='#3498db')
axes[0,1].set_title('消费能力指标')
# 3. 词云
text = ' '.join([k * v for k, v in profile["情感分析"]["高频关键词"].items()])
wc = WordCloud(font_path='simhei.ttf', width=800, height=400,
background_color='white').generate(text)
axes[1,0].imshow(wc, interpolation='bilinear')
axes[1,0].axis('off')
axes[1,0].set_title('评论关键词云')
# 4. 年龄分布
age_data = profile["人群画像"]["主要年龄段"]
axes[1,1].text(0.5, 0.5, f"核心人群: {age_data}岁\n性别: 男性为主\n地域: 广东/北京/浙江",
ha='center', va='center', fontsize=14)
axes[1,1].set_title('人群画像摘要')
axes[1,1].axis('off')
plt.tight_layout()
plt.savefig('customer_profile.png', dpi=150, bbox_inches='tight')
plt.show()
`
六、🚀 一站式 SaaS 方案(不想自己开发?)
| 产品 | 功能 | 价格 | 链接 |
|---|---|---|---|
| 八爪鱼采集器 | 可视化爬虫,一键采集淘宝/京东/拼多多评论 | 免费~¥299/月 | octoparse.cn |
| 后羿采集器 | 智能网页数据采集,支持电商评论 | 免费~¥199/月 | houyicaiji.com |
| 百度NLP + 评论分析 | 情感分析/关键词/画像,API调用 | 按量付费 | ai.baidu.com |
| 阿里云NLP | 商品评论分析、情感倾向、观点抽取 | ¥0.1/次 | aliyun.com |
| 腾讯云NLP | 文本分类、情感分析、关键词提取 | ¥0.05/次 | cloud.tencent.com |
| DataYes(数说故事) | 电商评论全链路分析SaaS | ¥5,000+/年 | datayes.com |
| 极客销售宝 | 评论采集+客户画像+CRM一体 | ¥1,999/年 | jikexsh.com |
七、⚡ 快速上手路线图
`第1步:注册淘宝/京东开放平台 → 获取 AppKey/Secret
↓ (30分钟)
第2步:用 Postman 调用 API 测试 → 确认能拿到数据
↓ (1小时)
第3步:Python 脚本批量拉取评论(支持翻页)
↓ (2小时)
第4步:接入 jieba + 情感词典 → 跑出关键词/情感
↓ (1小时)
第5步:生成客户画像报告 + 可视化看板
↓ (1小时)
✅ 总耗时:约 5-6 小时完成全流程
`
💡 提示 :如果你告诉我具体是哪个平台(淘宝/京东/拼多多/抖音)+ 什么商品类目(笔记本电脑/手机/家电),我可以直接给你 可运行的完整代码 + 真实API调用示例,复制粘贴就能用!