基于Python 实现亚马逊销售数据可视化

python 复制代码
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from tabulate import tabulate

# 显示设置
sns.set(style='whitegrid', palette='muted', color_codes=True)

# 预测建模
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance

# 随机安装/随机固定
import random
random.seed(42)
np.random.seed(42)

# 随机性的确定性、统计学知识及警告信息
import scipy.stats as stats
import warnings
warnings.filterwarnings('ignore')
python 复制代码
df = pd.read_csv("/Amazon.csv")
df.head()

|---|------------|------------|------------|---------------|-----------|---------------------|-----------------|------------|----------|-----------|----------|-------|--------------|-------------|------------------|-------------|-------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 0 | ORD0000001 | 2023-01-31 | CUST001504 | Vihaan Sharma | P00014 | Drone Mini | Books | BrightLux | 3 | 106.59 | 0.00 | 0.00 | 0.09 | 319.86 | Debit Card | Delivered | Washington | DC | India | SELL01967 |
| 1 | ORD0000002 | 2023-12-30 | CUST000178 | Pooja Kumar | P00040 | Microphone | Home & Kitchen | UrbanStyle | 1 | 251.37 | 0.05 | 19.10 | 1.74 | 259.64 | Amazon Pay | Delivered | Fort Worth | TX | United States | SELL01298 |
| 2 | ORD0000003 | 2022-05-10 | CUST047516 | Sneha Singh | P00044 | Power Bank 20000mAh | Clothing | UrbanStyle | 3 | 35.03 | 0.10 | 7.57 | 5.91 | 108.06 | Debit Card | Delivered | Austin | TX | United States | SELL00908 |
| 3 | ORD0000004 | 2023-07-18 | CUST030059 | Vihaan Reddy | P00041 | Webcam Full HD | Home & Kitchen | Zenith | 5 | 33.58 | 0.15 | 11.42 | 5.53 | 159.66 | Cash on Delivery | Delivered | Charlotte | NC | India | SELL01164 |
| 4 | ORD0000005 | 2023-02-04 | CUST048677 | Aditya Kapoor | P00029 | T-Shirt | Clothing | KiddoFun | 2 | 515.64 | 0.25 | 38.67 | 9.23 | 821.36 | Credit Card | Cancelled | San Antonio | TX | Canada | SELL01411 |

python 复制代码
df.tail()

|-------|------------|------------|------------|---------------|-----------|-------------------|--------------------|-----------|----------|-----------|----------|--------|--------------|-------------|------------------|-------------|--------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 99995 | ORD0099996 | 2023-03-07 | CUST001356 | Karan Joshi | P00047 | Memory Card 128GB | Electronics | Apex | 2 | 492.34 | 0.00 | 78.77 | 2.75 | 1066.20 | UPI | Delivered | Jacksonville | FL | India | SELL00041 |
| 99996 | ORD0099997 | 2021-11-24 | CUST031254 | Sunita Kapoor | P00046 | Car Charger | Sports & Outdoors | Apex | 5 | 449.30 | 0.00 | 179.72 | 6.07 | 2432.29 | Credit Card | Delivered | San Jose | CA | United States | SELL01449 |
| 99997 | ORD0099998 | 2023-04-29 | CUST012579 | Aman Gupta | P00030 | Dress Shirt | Sports & Outdoors | BrightLux | 4 | 232.40 | 0.00 | 74.37 | 12.43 | 1016.40 | Cash on Delivery | Delivered | Indianapolis | IN | United States | SELL00028 |
| 99998 | ORD0099999 | 2021-11-01 | CUST026243 | Simran Gupta | P00046 | Car Charger | Sports & Outdoors | HomeEase | 1 | 294.05 | 0.00 | 23.52 | 13.09 | 330.66 | Debit Card | Delivered | Charlotte | NC | United States | SELL00324 |
| 99999 | ORD0100000 | 2021-12-04 | CUST029492 | Sunita Reddy | P00019 | LED Desk Lamp | Home & Kitchen | CoreTech | 5 | 166.70 | 0.05 | 63.35 | 3.34 | 858.52 | Debit Card | Delivered | New York | NY | United States | SELL00761 |

python 复制代码
df.describe()

|-------|---------------|---------------|---------------|---------------|---------------|---------------|
| | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount |
| count | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 |
| mean | 3.001400 | 302.905748 | 0.074226 | 68.468902 | 7.406660 | 918.256479 |
| std | 1.413548 | 171.840797 | 0.082583 | 74.131180 | 4.324057 | 724.508332 |
| min | 1.000000 | 5.000000 | 0.000000 | 0.000000 | 0.000000 | 4.270000 |
| 25% | 2.000000 | 154.190000 | 0.000000 | 15.920000 | 3.680000 | 340.890000 |
| 50% | 3.000000 | 303.070000 | 0.050000 | 45.250000 | 7.300000 | 714.315000 |
| 75% | 4.000000 | 451.500000 | 0.100000 | 96.060000 | 11.150000 | 1349.765000 |
| max | 5.000000 | 599.990000 | 0.300000 | 538.460000 | 15.000000 | 3534.980000 |

python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()
python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.isnull(), cbar=False, cmap="viridis")
plt.title("Missing Values Heatmap")
plt.show()
python 复制代码
numeric_cols = df.select_dtypes(include=['number']).columns

# رسم التوزيع لكل عمود رقمي
for col in numeric_cols:
    sns.histplot(x=col, data=df, kde=True)
    plt.show()
python 复制代码
for col in df:
    if df[col].dtype == 'O':
        sns.countplot(x=col,data=df)
        plt.show()
相关推荐
ZhengEnCi2 小时前
09bad-斯坦福CS336作业一-构建优化器
人工智能
ZhengEnCi2 小时前
09bac-斯坦福CS336作业一-实现训练损失计算
人工智能
冬奇Lab3 小时前
Skill 系列(01):Skill 评测体系——如何量化一个 AI Skill 的质量
人工智能
IT_陈寒6 小时前
Redis内存爆了,原来我漏掉了这个致命配置
前端·人工智能·后端
用户3521802454757 小时前
🎆从 Prompt 到 Skill:让 Spring AI Agent 学会"装新技能"
人工智能·spring boot·ai编程
米小虾8 小时前
手把手教你搭建第一个生产级AI Agent:从选型到实战的完整指南
人工智能·agent
任沫8 小时前
Agent之Function Call
javascript·人工智能·go
米小虾8 小时前
2026年AI Agent全面爆发:从开源生态到企业级应用的进化之路
人工智能·agent
用户6919026813398 小时前
Vibe Coding 开发项目的基本范式
人工智能·设计模式·代码规范
To_OC8 小时前
别再跟 AI 死磕 prompt 了,我写了个 Loop 让它自己改到满意为止
人工智能·aigc·agent