🛒 京东商品详情抓取实战
📋 目录
- https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%87%86%E5%A4%87%E5%B7%A5%E4%BD%9C
- https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%B8%80
- https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%BA%8C
- https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%AE%8C%E6%95%B4%E4%BB%A3%E7%A0%81
- https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%95%B0%E6%8D%AE%E8%A7%A3%E6%9E%90
🚀 准备工作
1️⃣ 申请京东联盟API权限
bash
`# 注册地址:https://union.jd.com/
# 步骤:
# 1. 注册账号 → 2. 创建应用 → 3. 获取 AppKey 和 AppSecret
`
2️⃣ 安装依赖
bash
`pip install requests hashlib time json
`
⭐ 方案一:使用京东联盟API(推荐✅)
完整代码实现
python
`import requests
import hashlib
import time
import json
from urllib.parse import quote
class JDApiClient:
"""京东联盟API客户端"""
def __init__(self, app_key, app_secret):
self.app_key = app_key
self.app_secret = app_secret
self.access_token = None
self.token_expire = 0
def _get_timestamp(self):
"""获取时间戳"""
return str(int(time.time() * 1000))
def _get_sign(self, params):
"""生成签名"""
sorted_params = sorted(params.items())
sign_str = self.app_secret
for k, v in sorted_params:
sign_str += f"{k}{v}"
sign_str += self.app_secret
return hashlib.md5(sign_str.encode()).hexdigest().upper()
def get_access_token(self):
"""获取访问令牌"""
if self.access_token and time.time() < self.token_expire:
return self.access_token
url = "https://api.jd.com/token"
params = {
"grant_type": "client_credentials",
"app_key": self.app_key,
"app_secret": self.app_secret,
"timestamp": self._get_timestamp(),
"sign_method": "md5",
}
params["sign"] = self._get_sign(params)
response = requests.get(url, params=params)
data = response.json()
if "access_token" in data:
self.access_token = data["access_token"]
self.token_expire = time.time() + data["expires_in"] - 300
return self.access_token
else:
raise Exception(f"获取Token失败: {data}")
def get_goods_detail(self, sku_id):
"""
获取商品详情
:param sku_id: 商品SKU ID(京东商品ID)
"""
url = "https://api.jd.com/routerjson"
params = {
"method": "jingdong.union.open.goods.query",
"app_key": self.app_key,
"access_token": self.get_access_token(),
"timestamp": self._get_timestamp(),
"format": "json",
"v": "2.0",
"sign_method": "md5",
"skuIds": sku_id,
}
params["sign"] = self._get_sign(params)
response = requests.get(url, params=params)
return response.json()
def get_goods_promotion(self, sku_id):
"""获取商品促销信息(价格、优惠券等)"""
url = "https://api.jd.com/routerjson"
params = {
"method": "jd.union.open.goods.promotion.get",
"app_key": self.app_key,
"access_token": self.get_access_token(),
"timestamp": self._get_timestamp(),
"format": "json",
"v": "2.0",
"sign_method": "md5",
"skuId": sku_id,
}
params["sign"] = self._get_sign(params)
response = requests.get(url, params=params)
return response.json()
# 🔥 使用示例
if __name__ == "__main__":
# 替换为你的 AppKey 和 AppSecret
APP_KEY = "your_app_key_here"
APP_SECRET = "your_app_secret_here"
client = JDApiClient(APP_KEY, APP_SECRET)
# 示例:抓取 iPhone 15 的商品详情
# SKU ID 可以从京东商品URL中获取:item.jd.com/100038004356.html -> 100038004356
sku_id = "100038004356"
print("🔍 正在获取商品详情...")
detail = client.get_goods_detail(sku_id)
print(json.dumps(detail, ensure_ascii=False, indent=2))
print("\n💰 正在获取促销信息...")
promo = client.get_goods_promotion(sku_id)
print(json.dumps(promo, ensure_ascii=False, indent=2))
`
🔧 方案二:直接爬取(无需API Key)
python
`import requests
from bs4 import BeautifulSoup
import json
import re
class JDCrawler:
"""京东商品爬虫(无需认证)"""
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/json, text/plain, */*',
'Referer': 'https://www.jd.com/'
}
def get_product_json(self, item_id):
"""
从商品页面提取JSON数据
:param item_id: 商品ID(从URL获取)
"""
url = f"https://item.jd.com/{item_id}.html"
response = requests.get(url, headers=self.headers)
soup = BeautifulSoup(response.text, 'html.parser')
# 查找页面中的JSON数据
scripts = soup.find_all('script', type='text/javascript')
for script in scripts:
text = script.string
if text and 'window.__INITIAL_STATE__' in text:
# 提取JSON部分
json_str = re.search(r'window\.__INITIAL_STATE__=({.*?});', text, re.DOTALL)
if json_str:
return json.loads(json_str.group(1))
return None
def get_product_api(self, item_id):
"""
使用京东内部API(更稳定)
"""
url = f"https://p.3.cn/prices/mgets?skuIds=J_{item_id}"
response = requests.get(url, headers=self.headers)
return response.json()
# 🔥 使用示例
crawler = JDCrawler()
# 抓取 iPhone 15
item_id = "100038004356"
print("📦 方法1: 页面解析")
data = crawler.get_product_json(item_id)
if data:
print(f"商品名: {data['pcData']['productInfo']['name']}")
print(f"价格: ¥{data['pcData']['productInfo']['price']}")
print("\n💰 方法2: 价格API")
price_data = crawler.get_product_api(item_id)
print(json.dumps(price_data, ensure_ascii=False, indent=2))
`
🎯 完整实战:批量抓取 + 数据存储
python
`import requests
import json
import csv
from datetime import datetime
import time
class JDBatchCrawler:
"""批量商品抓取工具"""
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
def get_product_info(self, item_id):
"""获取单个商品信息"""
try:
# 使用价格API
url = f"https://p.3.cn/prices/mgets?skuIds=J_{item_id}"
resp = requests.get(url, headers=self.headers, timeout=10)
price_data = resp.json()
# 获取商品详情
detail_url = f"https://item.jd.com/{item_id}.html"
detail_resp = requests.get(detail_url, headers=self.headers, timeout=10)
# 提取关键信息
name_match = re.search(r'<h1 class="name">(.*?)</h1>', detail_resp.text)
shop_match = re.search(r'class="name".*?>(.*?)</a>', detail_resp.text)
product = {
'item_id': item_id,
'name': name_match.group(1) if name_match else 'N/A',
'shop': shop_match.group(1) if shop_match else 'N/A',
'price': price_data[0].get('p', 'N/A') if price_data else 'N/A',
'crawl_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}
return product
except Exception as e:
print(f"❌ 抓取失败 {item_id}: {e}")
return None
def batch_crawl(self, item_ids, output_file='products.csv'):
"""批量抓取"""
results = []
for i, item_id in enumerate(item_ids, 1):
print(f"📊 [{i}/{len(item_ids)}] 正在抓取: {item_id}")
product = self.get_product_info(item_id)
if product:
results.append(product)
time.sleep(1) # 避免请求过快
# 保存到CSV
if results:
with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.DictWriter(f, fieldnames=results[0].keys())
writer.writeheader()
writer.writerows(results)
print(f"\n✅ 成功抓取 {len(results)} 个商品,已保存到 {output_file}")
return results
# 🚀 运行示例
if __name__ == "__main__":
# 商品ID列表
item_ids = [
"100038004356", # iPhone 15
"100012043978", # MacBook Pro
"100026789012", # AirPods Pro
]
crawler = JDBatchCrawler()
crawler.batch_crawl(item_ids)
`
📊 数据解析示例
python
`import json
# 模拟API返回的数据
api_response = {
"code": "0",
"msg": "成功",
"data": {
"goodsInfo": {
"skuId": "100038004356",
"spuId": "100038004356",
"name": "Apple iPhone 15 (A3092) 128GB 蓝色",
"shopId": "1000001",
"categoryId": "9987",
"brandId": "1320",
"price": "5999.00",
"jdPrice": "5999.00",
"promotionPrice": "5499.00",
"couponPrice": "5299.00",
"commissionRate": "1.5",
"commission": "89.99"
}
}
}
# 解析数据
data = api_response["data"]["goodsInfo"]
print(f"🏷️ 商品名称: {data['name']}")
print(f"💰 京东价: ¥{data['jdPrice']}")
print(f"🔥 促销价: ¥{data['promotionPrice']}")
print(f"🎫 优惠券价: ¥{data['couponPrice']}")
print(f"📈 佣金比例: {data['commissionRate']}%")
print(f"💵 预估佣金: ¥{data['commission']}")
`
⚠️ 注意事项
| 事项 | 说明 |
|---|---|
| 🔐 频率限制 | API每秒最多10次请求,爬虫建议间隔1-2秒 |
| 🛡️ 反爬机制 | 添加随机User-Agent,使用代理IP |
| 📜 合规使用 | 仅用于学习研究,遵守robots.txt |
| 🔑 Token有效期 | Access Token通常2小时过期,需刷新 |
🎁 快速开始模板
python
`# 一键复制即可运行(替换APP_KEY和APP_SECRET)
from jd_api import JDApiClient
client = JDApiClient("your_app_key", "your_app_secret")
data = client.get_goods_detail("100038004356")
print(data)
`
需要我详细讲解某个部分吗?比如:
- 🔐 Token刷新机制
- 📊 数据存储到数据库
- 🤖 自动化定时抓取