基于Python 实现亚马逊销售数据可视化

python 复制代码
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from tabulate import tabulate

# 显示设置
sns.set(style='whitegrid', palette='muted', color_codes=True)

# 预测建模
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance

# 随机安装/随机固定
import random
random.seed(42)
np.random.seed(42)

# 随机性的确定性、统计学知识及警告信息
import scipy.stats as stats
import warnings
warnings.filterwarnings('ignore')
python 复制代码
df = pd.read_csv("/Amazon.csv")
df.head()

|---|------------|------------|------------|---------------|-----------|---------------------|-----------------|------------|----------|-----------|----------|-------|--------------|-------------|------------------|-------------|-------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 0 | ORD0000001 | 2023-01-31 | CUST001504 | Vihaan Sharma | P00014 | Drone Mini | Books | BrightLux | 3 | 106.59 | 0.00 | 0.00 | 0.09 | 319.86 | Debit Card | Delivered | Washington | DC | India | SELL01967 |
| 1 | ORD0000002 | 2023-12-30 | CUST000178 | Pooja Kumar | P00040 | Microphone | Home & Kitchen | UrbanStyle | 1 | 251.37 | 0.05 | 19.10 | 1.74 | 259.64 | Amazon Pay | Delivered | Fort Worth | TX | United States | SELL01298 |
| 2 | ORD0000003 | 2022-05-10 | CUST047516 | Sneha Singh | P00044 | Power Bank 20000mAh | Clothing | UrbanStyle | 3 | 35.03 | 0.10 | 7.57 | 5.91 | 108.06 | Debit Card | Delivered | Austin | TX | United States | SELL00908 |
| 3 | ORD0000004 | 2023-07-18 | CUST030059 | Vihaan Reddy | P00041 | Webcam Full HD | Home & Kitchen | Zenith | 5 | 33.58 | 0.15 | 11.42 | 5.53 | 159.66 | Cash on Delivery | Delivered | Charlotte | NC | India | SELL01164 |
| 4 | ORD0000005 | 2023-02-04 | CUST048677 | Aditya Kapoor | P00029 | T-Shirt | Clothing | KiddoFun | 2 | 515.64 | 0.25 | 38.67 | 9.23 | 821.36 | Credit Card | Cancelled | San Antonio | TX | Canada | SELL01411 |

python 复制代码
df.tail()

|-------|------------|------------|------------|---------------|-----------|-------------------|--------------------|-----------|----------|-----------|----------|--------|--------------|-------------|------------------|-------------|--------------|-------|---------------|-----------|
| | OrderID | OrderDate | CustomerID | CustomerName | ProductID | ProductName | Category | Brand | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount | PaymentMethod | OrderStatus | City | State | Country | SellerID |
| 99995 | ORD0099996 | 2023-03-07 | CUST001356 | Karan Joshi | P00047 | Memory Card 128GB | Electronics | Apex | 2 | 492.34 | 0.00 | 78.77 | 2.75 | 1066.20 | UPI | Delivered | Jacksonville | FL | India | SELL00041 |
| 99996 | ORD0099997 | 2021-11-24 | CUST031254 | Sunita Kapoor | P00046 | Car Charger | Sports & Outdoors | Apex | 5 | 449.30 | 0.00 | 179.72 | 6.07 | 2432.29 | Credit Card | Delivered | San Jose | CA | United States | SELL01449 |
| 99997 | ORD0099998 | 2023-04-29 | CUST012579 | Aman Gupta | P00030 | Dress Shirt | Sports & Outdoors | BrightLux | 4 | 232.40 | 0.00 | 74.37 | 12.43 | 1016.40 | Cash on Delivery | Delivered | Indianapolis | IN | United States | SELL00028 |
| 99998 | ORD0099999 | 2021-11-01 | CUST026243 | Simran Gupta | P00046 | Car Charger | Sports & Outdoors | HomeEase | 1 | 294.05 | 0.00 | 23.52 | 13.09 | 330.66 | Debit Card | Delivered | Charlotte | NC | United States | SELL00324 |
| 99999 | ORD0100000 | 2021-12-04 | CUST029492 | Sunita Reddy | P00019 | LED Desk Lamp | Home & Kitchen | CoreTech | 5 | 166.70 | 0.05 | 63.35 | 3.34 | 858.52 | Debit Card | Delivered | New York | NY | United States | SELL00761 |

python 复制代码
df.describe()

|-------|---------------|---------------|---------------|---------------|---------------|---------------|
| | Quantity | UnitPrice | Discount | Tax | ShippingCost | TotalAmount |
| count | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 | 100000.000000 |
| mean | 3.001400 | 302.905748 | 0.074226 | 68.468902 | 7.406660 | 918.256479 |
| std | 1.413548 | 171.840797 | 0.082583 | 74.131180 | 4.324057 | 724.508332 |
| min | 1.000000 | 5.000000 | 0.000000 | 0.000000 | 0.000000 | 4.270000 |
| 25% | 2.000000 | 154.190000 | 0.000000 | 15.920000 | 3.680000 | 340.890000 |
| 50% | 3.000000 | 303.070000 | 0.050000 | 45.250000 | 7.300000 | 714.315000 |
| 75% | 4.000000 | 451.500000 | 0.100000 | 96.060000 | 11.150000 | 1349.765000 |
| max | 5.000000 | 599.990000 | 0.300000 | 538.460000 | 15.000000 | 3534.980000 |

python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()
python 复制代码
plt.figure(figsize=(10,6))
sns.heatmap(df.isnull(), cbar=False, cmap="viridis")
plt.title("Missing Values Heatmap")
plt.show()
python 复制代码
numeric_cols = df.select_dtypes(include=['number']).columns

# رسم التوزيع لكل عمود رقمي
for col in numeric_cols:
    sns.histplot(x=col, data=df, kde=True)
    plt.show()
python 复制代码
for col in df:
    if df[col].dtype == 'O':
        sns.countplot(x=col,data=df)
        plt.show()
相关推荐
J2虾虾2 分钟前
Spring AI Alibaba - 人工介入(Human-in-the-Loop)
java·人工智能·spring
兆。5 分钟前
LangChain框架深度解析:与FastGPT的优势对比
人工智能·langchain
tzc_fly8 分钟前
ELF:连续扩散语言模型
人工智能·语言模型·自然语言处理
昨日之日200624 分钟前
MiniCPM5-1B - 随身AI智能助手 擅长工具使用和复杂推理,长上下文处理能力强 一键整合包下载
人工智能
J2虾虾29 分钟前
Spring AI Alibaba - Skills 技能
人工智能·python·spring
冰西瓜60043 分钟前
深度学习的数学原理(四十)—— Transformer 推理全过程
人工智能·深度学习·transformer
Bingorl1 小时前
机器学习之集成学习
人工智能·机器学习·集成学习
weixin_468466851 小时前
SURF 图像特征提取算法新手实战指南
图像处理·人工智能·算法·机器视觉·surf·sift
盛夏光年爱学习1 小时前
Agentic RAG 深度解析:让 Agent 自己决定要不要检索、检索几次,这才是 RAG 的正确打开方式
人工智能
weiwin1231 小时前
MAF入门(3 下):多轮对话进阶——清除历史、注入 System、截断策略
人工智能·agent