基于Python 实现亚马逊销售数据可视化

python 复制代码
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from tabulate import tabulate

# 显示设置
sns.set(style='whitegrid', palette='muted', color_codes=True)

# 预测建模
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance

# 随机安装/随机固定
import random
random.seed(42)
np.random.seed(42)

# 随机性的确定性、统计学知识及警告信息
import scipy.stats as stats
import warnings
warnings.filterwarnings('ignore')
python 复制代码
df = pd.read_csv("/Amazon.csv")
df.head()

|---|------------|------------|------------|---------------|-----------|---------------------|-----------------|------------|----------|-----------|----------|-------|--------------|-------------|------------------|-------------|-------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 0 | ORD0000001 | 2023-01-31 | CUST001504 | Vihaan Sharma | P00014 | Drone Mini | Books | BrightLux | 3 | 106.59 | 0.00 | 0.00 | 0.09 | 319.86 | Debit Card | Delivered | Washington | DC | India | SELL01967 |
| 1 | ORD0000002 | 2023-12-30 | CUST000178 | Pooja Kumar | P00040 | Microphone | Home & Kitchen | UrbanStyle | 1 | 251.37 | 0.05 | 19.10 | 1.74 | 259.64 | Amazon Pay | Delivered | Fort Worth | TX | United States | SELL01298 |
| 2 | ORD0000003 | 2022-05-10 | CUST047516 | Sneha Singh | P00044 | Power Bank 20000mAh | Clothing | UrbanStyle | 3 | 35.03 | 0.10 | 7.57 | 5.91 | 108.06 | Debit Card | Delivered | Austin | TX | United States | SELL00908 |
| 3 | ORD0000004 | 2023-07-18 | CUST030059 | Vihaan Reddy | P00041 | Webcam Full HD | Home & Kitchen | Zenith | 5 | 33.58 | 0.15 | 11.42 | 5.53 | 159.66 | Cash on Delivery | Delivered | Charlotte | NC | India | SELL01164 |
| 4 | ORD0000005 | 2023-02-04 | CUST048677 | Aditya Kapoor | P00029 | T-Shirt | Clothing | KiddoFun | 2 | 515.64 | 0.25 | 38.67 | 9.23 | 821.36 | Credit Card | Cancelled | San Antonio | TX | Canada | SELL01411 |

python 复制代码
df.tail()

|-------|------------|------------|------------|---------------|-----------|-------------------|--------------------|-----------|----------|-----------|----------|--------|--------------|-------------|------------------|-------------|--------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 99995 | ORD0099996 | 2023-03-07 | CUST001356 | Karan Joshi | P00047 | Memory Card 128GB | Electronics | Apex | 2 | 492.34 | 0.00 | 78.77 | 2.75 | 1066.20 | UPI | Delivered | Jacksonville | FL | India | SELL00041 |
| 99996 | ORD0099997 | 2021-11-24 | CUST031254 | Sunita Kapoor | P00046 | Car Charger | Sports & Outdoors | Apex | 5 | 449.30 | 0.00 | 179.72 | 6.07 | 2432.29 | Credit Card | Delivered | San Jose | CA | United States | SELL01449 |
| 99997 | ORD0099998 | 2023-04-29 | CUST012579 | Aman Gupta | P00030 | Dress Shirt | Sports & Outdoors | BrightLux | 4 | 232.40 | 0.00 | 74.37 | 12.43 | 1016.40 | Cash on Delivery | Delivered | Indianapolis | IN | United States | SELL00028 |
| 99998 | ORD0099999 | 2021-11-01 | CUST026243 | Simran Gupta | P00046 | Car Charger | Sports & Outdoors | HomeEase | 1 | 294.05 | 0.00 | 23.52 | 13.09 | 330.66 | Debit Card | Delivered | Charlotte | NC | United States | SELL00324 |
| 99999 | ORD0100000 | 2021-12-04 | CUST029492 | Sunita Reddy | P00019 | LED Desk Lamp | Home & Kitchen | CoreTech | 5 | 166.70 | 0.05 | 63.35 | 3.34 | 858.52 | Debit Card | Delivered | New York | NY | United States | SELL00761 |

python 复制代码
df.describe()

|-------|---------------|---------------|---------------|---------------|---------------|---------------|
| | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount |
| count | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 |
| mean | 3.001400 | 302.905748 | 0.074226 | 68.468902 | 7.406660 | 918.256479 |
| std | 1.413548 | 171.840797 | 0.082583 | 74.131180 | 4.324057 | 724.508332 |
| min | 1.000000 | 5.000000 | 0.000000 | 0.000000 | 0.000000 | 4.270000 |
| 25% | 2.000000 | 154.190000 | 0.000000 | 15.920000 | 3.680000 | 340.890000 |
| 50% | 3.000000 | 303.070000 | 0.050000 | 45.250000 | 7.300000 | 714.315000 |
| 75% | 4.000000 | 451.500000 | 0.100000 | 96.060000 | 11.150000 | 1349.765000 |
| max | 5.000000 | 599.990000 | 0.300000 | 538.460000 | 15.000000 | 3534.980000 |

python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()
python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.isnull(), cbar=False, cmap="viridis")
plt.title("Missing Values Heatmap")
plt.show()
python 复制代码
numeric_cols = df.select_dtypes(include=['number']).columns

# رسم التوزيع لكل عمود رقمي
for col in numeric_cols:
    sns.histplot(x=col, data=df, kde=True)
    plt.show()
python 复制代码
for col in df:
    if df[col].dtype == 'O':
        sns.countplot(x=col,data=df)
        plt.show()
相关推荐
东坡肘子2 小时前
Swift、SwiftUI 与 SwiftData:走向成熟的 2025 -- 肘子的 Swift 周报 #116
人工智能·swiftui·swift
智慧化智能化数字化方案2 小时前
解读 2025 高质量数据集 建设指南【附全文阅读】
大数据·人工智能·高质量数据集 建设指南
buttonupAI9 小时前
今日Reddit各AI板块高价值讨论精选(2025-12-20)
人工智能
2501_9048764810 小时前
2003-2021年上市公司人工智能的采纳程度测算数据(含原始数据+计算结果)
人工智能
曹文杰151903011210 小时前
2025 年大模型背景下应用统计本科 计算机方向 培养方案
python·线性代数·机器学习·学习方法
竣雄10 小时前
计算机视觉:原理、技术与未来展望
人工智能·计算机视觉
救救孩子把10 小时前
44-机器学习与大模型开发数学教程-4-6 大数定律与中心极限定理
人工智能·机器学习
Rabbit_QL11 小时前
【LLM评价指标】从概率到直觉:理解语言模型的困惑度
人工智能·语言模型·自然语言处理
呆萌很11 小时前
HSV颜色空间过滤
人工智能