利用面积图探索历史温度的变化趋势

利用面积图探索历史温度的变化趋势

python 复制代码
import datetime as dt
from calendar import isleap

import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import requests
import seaborn as sns
from matplotlib import ticker
from plotly.express.colors import sample_colorscale
from statsmodels.nonparametric.smoothers_lowess import lowess

数据探索

以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~

python 复制代码
# 导入数据
url = "https://raw.githubusercontent.com/HsuHeinrich/data-collection/master/graph/temperature_history.csv"
df_raw = pd.read_csv(url)

df_raw["date"] = pd.to_datetime(df_raw["date"])

df_raw.head()
python 复制代码
# 数据预处理

# 副本数据
df_f = df_raw.copy()

# 定义年份和参考期
year = 2022
reference_period = (1961, 1990)

# 新增dayofyear列
df_f["dayofyear"] = df_f["date"].dt.dayofyear

# 删除2.29的日期
df_f = df_f[~((df_f["date"].dt.month == 2) & (df_f["date"].dt.day == 29))].copy()

# 调整闰年2.29号之后的dayofyear(减1)
df_f["dayofyear"] = df_f["dayofyear"].where(
    ~((df_f["date"].dt.month > 2) & (df_f["date"].dt.is_leap_year)),
    df_f["dayofyear"] - 1,
)

# 重制索引
df_f.reset_index(drop=True, inplace=True)

# 筛选参考期数据
data = df_f[df_f["date"].dt.year.between(*reference_period)].copy()

# 计算参考期的百分位数据
data = (
    data.groupby("dayofyear")["value"]
    .agg(
        [
            ("p05", lambda x: np.nanpercentile(x, 5)),
            ("mean", "mean"),
            ("p95", lambda x: np.nanpercentile(x, 95)),
        ]
    )
    .reset_index()
)


# 对上述计算的百分位数进行平滑处理(采用lowess方法)
for col in ["p05", "mean", "p95"]:
    smoothed_values = lowess(
        data[col],
        data["dayofyear"],
        is_sorted=True,
        frac=1 / 12,
    )

    data[col] = smoothed_values[:, 1]
    
    
# 增加指定年份的一列数据
data[f"{year}"] = df_f[df_f["date"].dt.year == year]["value"].reset_index(drop=True)
# 计算与平均值的差异
data[f"{year}_diff"] = data[f"{year}"] - data["mean"]


# 添加日期列,并正确处理闰年数据
dayofyear = data["dayofyear"]

if isleap(year):
    dayofyear = data["dayofyear"].where(
        data["dayofyear"] < 60, other=data["dayofyear"] + 1
    )

data["date"] = dayofyear.apply(
    lambda x: dt.datetime(year, 1, 1) + dt.timedelta(days=x - 1)
)

data.head()

绘制基础图形

绘制基本框架

python 复制代码
# 设置风格
sns.set_style("white")

# 基本参数设置
plt.rcParams["figure.dpi"] = 100
mpl.rcParams["font.family"] = "sans-serif"
mpl.rcParams["font.sans-serif"] = "Lato"
mpl.rcParams["axes.labelsize"] = 11
mpl.rcParams["xtick.labelsize"] = 11
mpl.rcParams["ytick.labelsize"] = 11

# 布局
fig, axes = plt.subplots(figsize=(10, 7))

# 标题与副标题
plt.suptitle(
    f"Mean temperature in Addis Ababa, Ethiopia {year}",
    fontsize=24,
    fontweight="bold",
    x=1,
    ha="right",
)
plt.title(
    (
        f"Compared to historical daily mean temperatures "
        f"({reference_period[0]}-{reference_period[1]})"
    ),
    fontsize=14,
    fontweight="normal",
    x=1,
    ha="right",
    pad=20,
)

# 边距调整
fig.subplots_adjust(
    left=0,
    right=1,
    top=0.87,
)

# 移除边框
axes.spines["top"].set_visible(False)
axes.spines["bottom"].set_visible(False)
axes.spines["right"].set_visible(False)
axes.spines["left"].set_visible(False)

# 添加水平线
axes.grid(axis="y", color="0.9", linestyle="-", linewidth=1)

绘制百分位线及区域填充

python 复制代码
# 绘制一年中每一天的历史平均值
axes.plot(
    data.index,
    data["mean"],
    color="black",
    zorder=10,
)

# 百分位列表
percentiles = ["05", "95"]
# 绘制面积图
axes.fill_between(
    data.index,
    data[f"p{percentiles[0]}"],
    data[f"p{percentiles[1]}"],
    color="#f8f8f8",
)

# 绘制百分位线
for percentile in percentiles:
    # 绘制虚线
    axes.plot(
        data.index,
        data[f"p{percentile}"],
        label=f"{percentile}th Percentile",
        color="black",
        linestyle="dashed",
        linewidth=0.5,
        zorder=9,
    )
    # 线末端放置标签
    axes.text(
        data.index[-1],
        data[f"p{percentile}"].iloc[-1],
        f"P{percentile}",
        horizontalalignment="left",
        verticalalignment="center",
        color="black",
    )

fig

绘制指定年份与平均温度大差异值

python 复制代码
# 自定义色阶函数
def get_colorscale_mpl(diff: np.ndarray) -> np.ndarray:
    """
    计算给定系列值的色阶
    """

    # 上下阈值
    mask_above = diff > 0
    mask_below = diff < 0

    # diff的绝对值
    diff = abs(diff)

    # 创建同diff形状相同的0数组
    diff_norm = np.zeros_like(diff)

    # 计算高于平均值的最小值和最大值并标准化为 0-1
    if len(diff[mask_above]) > 0:
        max_above, min_above = np.nanmax(diff[mask_above]), np.nanmin(diff[mask_above])
        diff_norm[mask_above] = (diff[mask_above] - min_above) / (max_above - min_above)

    # 计算低于平均值的最小值和最大值并标准化为 0-1
    if len(diff[mask_below]) > 0:
        max_below, min_below = np.nanmax(diff[mask_below]), np.nanmin(diff[mask_below])
        diff_norm[mask_below] = (diff[mask_below] - min_below) / (max_below - min_below)

    # 调色板
    colormap_above = plt.get_cmap("YlOrRd")
    colormap_below = plt.get_cmap("YlGnBu")

    # 创建一个长度为 diff_norm 但具有 4 个通道的空数组
    colors = np.zeros((len(diff_norm), 4))

    # 颜色映射
    colors[mask_above] = colormap_above(diff_norm[mask_above])
    colors[~mask_above] = colormap_below(diff_norm[~mask_above])

    return colors
python 复制代码
# 获取色阶
colors = get_colorscale_mpl(data[f"{year}_diff"])

# 对于 p05 和 p95 之间的值,将不透明度设置为 0.6,否则设置为 1
opacity = np.where(
    (data[f"{year}"] >= data["p05"]) & (data[f"{year}"] <= data["p95"]), 0.6, 1
)

# 分别绘制高于平均值和低于平均值的面积
for method in ["above", "below"]:
    # 获取高于平均值的数据(反向获取低于平均值的数据)
    mask = data[f"{year}"] > data[f"mean"]
    if method == "below":
        mask = ~mask

    for i in range(len(data.index) - 1):
        # 创建一个与mask形状相同、值全为 np.nan 的数组,然后将筛选出的year的值赋给 values
        values = np.full(mask.shape, np.nan)
        values[mask] = data[mask][f"{year}"]
        mean = data["mean"].to_numpy()

        # 将高于平均温度和低于平均温度的部分用不同的颜色区分开来
        if (values[i] > mean[i]) ^ (values[i + 1] > mean[i + 1]):
            values[i + 1] = mean[i + 1]

        # 绘制面积图
        axes.fill_between(
            x=data.index[i : i + 2],
            y1=values[i : i + 2],
            y2=mean[i : i + 2],
            color=colors[i],
            alpha=opacity[i],
            edgecolor="none",
            zorder=8,
        )

fig

添加几个月的交替背景色

python 复制代码
# 每个偶数月的第一天和最后一天
months_with_days = [(32, 59), (91, 120), (152, 181), (213, 243), (274, 304), (335, 365)]

# 添加这几个月的背景色
for month_span in months_with_days:
    axes.axvspan(
        month_span[0],
        month_span[1],
        facecolor="#f8f8f8",
        edgecolor=None,
        alpha=0.6,
        zorder=0,
    )

fig

调整轴刻度标签

python 复制代码
# 16 是一个稍微近似的值,因为月份的天数不同
axes.xaxis.set_major_locator(mdates.MonthLocator())
axes.xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=16))
axes.xaxis.set_major_formatter(ticker.NullFormatter())
axes.xaxis.set_minor_formatter(mdates.DateFormatter("%b"))

for tick in axes.xaxis.get_minor_ticks():
    tick.tick1line.set_markersize(0)
    tick.tick2line.set_markersize(0)
    tick.label1.set_horizontalalignment("center")

# 使第一个和最后一个 x 轴标签不可见
if axes.get_xticklabels(minor=True):
    axes.get_xticklabels(minor=True)[0].set_visible(False)
    axes.get_xticklabels(minor=True)[-1].set_visible(False)

# 将 °C 添加到 y 轴刻度
ticks = axes.get_yticks()
labels = [f"{int(x)}°C" for x in ticks]
axes.set_yticks(ticks, labels)

fig

额外的信息

python 复制代码
# 添加平均线注释,箭头指向该线
axes.annotate(
    f"Mean Temperature\n{reference_period[0]}-{reference_period[1]}",
    xy=(74, data["mean"].iloc[74]),
    xytext=(105, 12),
    arrowprops={
        "arrowstyle": "-",
        "facecolor": "black",
        "edgecolor": "black",
        "shrinkB": 0, 
    },
    horizontalalignment="center",
    verticalalignment="center",
    color="black",
    zorder=10,
)

# 添加百分位区间注释,箭头指向该区域
axes.annotate(
    "90% of reference period\nvalues fall within the gray area",
    xy=(196, 16),
    xytext=(258, 19),
    arrowprops={
        "arrowstyle": "-",
        "facecolor": "black",
        "edgecolor": "black",
        "shrinkB": 0, 
    },
    horizontalalignment="center",
    verticalalignment="center",
    color="black",
    zorder=10,
)

# 添加著作信息
fig.text(
    1,
    0,
    (
        "Data: open-meteo.com, OSM  "
        "License: CC by-sa-nc 4.0  "
        "Graph: Jan Kühn, https://yotka.org"
    ),
    ha="right",
    va="bottom",
    fontsize=8,
    alpha=0.5,
)

fig

用plotly复现生成可交互图

python 复制代码
def get_colorscale(series: pd.Series) -> np.ndarray:
    """
    计算给定数值序列的颜色比例尺。
    """

    # 获取年份值与参考期平均值的差异
    diff = series.copy().to_numpy()

    # 为大于平均值和小于平均值的部分创建掩码
    mask_above = diff > 0
    mask_below = diff < 0

    # 获取差异的绝对值
    diff = abs(diff)

    # 创建一个与 diff 形状相同的零数组
    diff_norm = np.zeros_like(diff)

    # 计算大于平均值的部分的最小值和最大值
    if len(diff[mask_above]) > 0:
        max_above = np.nanmax(diff[mask_above])
        min_above = np.nanmin(diff[mask_above])

        # 将值标准化到0-1范围内
        diff_norm[mask_above] = (diff[mask_above] - min_above) / (max_above - min_above)

    # 计算小于平均值的部分的最小值和最大值
    if len(diff[mask_below]) > 0:
        max_below = np.nanmax(diff[mask_below])
        min_below = np.nanmin(diff[mask_below])

        # 将值标准化到0-1范围内
        diff_norm[mask_below] = (diff[mask_below] - min_below) / (max_below - min_below)

    # 创建一个与 diff 形状相同,颜色为白色的数组
    colors = np.full_like(diff, "rgb(255, 255, 255)", dtype="object")

    # 使用标准化的值从颜色映射中取样颜色
    colors[mask_above] = sample_colorscale("YlOrRd", diff_norm[mask_above])
    colors[mask_below] = sample_colorscale("YlGnBu", diff_norm[mask_below])

    return colors
python 复制代码
# 创建一个带有基本布局的新 Figure 对象
fig = go.Figure(
    layout=go.Layout(
        template="plotly_white",
        title={
            "text": (
                f"<b>Mean temperature in Addis Ababa, Ethiopia {year}</b><br />"
                f"<sup>Compared to historical daily mean temperatures "
                f"({reference_period[0]}-{reference_period[1]})</sup>"
            ),
        },
        width=1000,
        height=600,
        font={"family": "Lato", "size": 14, "color": "#1f1f1f"},
        xaxis_showgrid=False,
        yaxis_showgrid=False,
        hovermode="x",
    )
)

fig.update_layout(
    # 增加标题字体大小并调整位置
    title={
        "font": {"family": "Lato", "size": 32, "color": "#1f1f1f"},
        "x": 0.98,
        "y": 0.93,
        "xanchor": "right",
        "yanchor": "top",
    },
    margin={"b": 70, "l": 60, "r": 20, "t": 100, "pad": 10},
    xaxis={
        "dtick": "M1",  # 每个月作为一个刻度
        "hoverformat": "%e %B",  # 显示日期和月份名称
        # 设定范围,包含年份前后10天,空出一些距离
        "range": [f"{year-1}-12-20", f"{year+1}-01-10"],
        "showgrid": False,
        "tickformat": "%b",  # 显示月份名称
        "ticklabelmode": "period",  # 使刻度标签居中
    },
    yaxis={
        "showgrid": True,
        "ticksuffix": "°C",
    },
)


"""
添加百分位区域的轨迹
"""

fig.add_traces(
    [
        # 添加用作上限的 p95 轨迹,此轨迹用于填充 p05 和 p95 之间的区域
        go.Scatter(
            x=data["date"],
            y=data["p95"],
            name="Percentile area upper bound (p95)",
            # 使线条不可见
            line_color="rgba(0,0,0,0)",
            showlegend=False,
            hoverinfo="skip",
        ),
        # 填充 p05 和 p95 之间的区域
        go.Scatter(
            x=data["date"],
            y=data["p05"],
            name="Area between p05 and p95",
            # 填充该轨迹和上一轨迹之间的区域
            fill="tonexty",
            fillcolor="#f8f8f8",
            # 使线条不可见
            line_color="rgba(0,0,0,0)",
            showlegend=False,
            hoverinfo="skip",
        ),
    ]
)


"""
添加仅用于显示正确 hover 信息的隐藏轨迹
"""

# 获取指定年份的颜色比例尺
colors = get_colorscale(data[f"{year}_diff"])

# 如果值在 p05 和 p95 之间,透明度设为 0.6,否则设为 1
opacity = np.where(
    (data[f"{year}"] >= data["p05"]) & (data[f"{year}"] <= data["p95"]), 0.6, 1
)

# 添加的轨迹仅用于显示正确的显示 hover 信息
fig.add_trace(
    go.Scatter(
        x=data["date"],
        y=data[f"{year}"],
        showlegend=False,
        mode="markers",
        name="Hoverinfo current date",
        hovertemplate=("%{y:.1f} °C<extra></extra>"),
        marker={
            "color": colors,  # 此颜色将在 hover 时显示
            "opacity": 0,  # 隐藏标记
        },
    )
)


"""
对每一天,添加在平均值和当年值之间填充的区域
"""

for i in range(len(data) - 1):
    # 定义 x 和 y 的值,以在平均值及当天和次日的值之间绘制多边形
    date_today = data["date"].iloc[i]
    date_tomorrow = data["date"].iloc[i + 1]
    mean_today = data["mean"].iloc[i]
    mean_tomorrow = data["mean"].iloc[i + 1]
    value_today = data[f"{year}"].iloc[i]
    value_tomorrow = data[f"{year}"].iloc[i + 1]

    # 如果一天在平均值以上,另一天在平均值以下,将该值设为平均值
    if (value_today > mean_today) ^ (value_tomorrow > mean_tomorrow):
        value_tomorrow = mean_tomorrow

    fig.add_trace(
        go.Scatter(
            name=f"Daily value {data['date'].iloc[i].strftime('%d.%m.%Y')}",
            x=[date_today, date_today, date_tomorrow, date_tomorrow],
            y=[mean_today, value_today, value_tomorrow, mean_tomorrow],
            line_width=0,
            fill="toself",
            fillcolor=colors[i],
            showlegend=False,
            mode="lines",
            opacity=opacity[i],
            # 从 hover 信息中隐藏该轨迹
            hoverinfo="skip",
        )
    )


"""
添加显示平均值、p05 和 p95 线的轨迹
"""

fig.add_traces(
    [
        # p95 轨迹
        go.Scatter(
            x=data["date"],
            y=data["p95"],
            name="P95",
            line={"color": "#000", "width": 1, "dash": "dot"},
            showlegend=False,
            hovertemplate=(
                "%{y:.1f} °C"
                f"<extra><b>95th percentile {reference_period[0]}-"
                f"{reference_period[1]}</b></extra>"
            ),
        ),
        # 平均值轨迹
        go.Scatter(
            x=data["date"],
            y=data["mean"],
            name="Mean",
            line={"color": "#000", "width": 2.5},
            showlegend=False,
            hovertemplate=(
                "%{y:.1f} °C"
                f"<extra><b>Mean {reference_period[0]}-"
                f"{reference_period[1]}</b></extra>"
            ),
        ),
        # p05 轨迹
        go.Scatter(
            x=data["date"],
            y=data["p05"],
            name="P05",
            line={"color": "#000", "width": 1, "dash": "dot"},
            showlegend=False,
            hovertemplate=(
                "%{y:.1f} °C"
                f"<extra><b>5th percentile {reference_period[0]}-"
                f"{reference_period[1]}</b></extra>"
            ),
        ),
    ]
)


"""
每个月添加交替的背景色
"""

# 定义每个月的第一天和最后一天的字典,忽略闰日
months_with_days = {
    month: (
        dt.datetime(year, month, 1),
        dt.datetime(
            year, month, 28 if month == 2 else 30 if month in [4, 6, 9, 11] else 31
        ),
    )
    for month in range(1, 13)
}

# 遍历每个月份,为每个月添加一个形状
for month, days in months_with_days.items():
    # 定义背景色
    bg_color = "#fcfcfc" if (month % 2) == 0 else "#fff"

    fig.add_shape(
        type="rect",
        yref="paper",
        x0=days[0],
        x1=days[1],
        y0=0,
        y1=1,
        fillcolor=bg_color,
        layer="below",
        line_width=0,
    )


"""
为平均线添加注释
"""

# 箭头位置
arrow_x = dt.datetime.strptime(f"{year}-03-15", "%Y-%m-%d")
arrow_y = data[data["date"] == arrow_x]["mean"].values[0]

# 文字位置
text_x = dt.datetime.strptime(f"{year}-04-15", "%Y-%m-%d")
text_y = data[data["date"] == text_x]["p05"].values[0] - 2

fig.add_annotation(
    x=arrow_x,
    y=arrow_y,
    xref="x",
    yref="y",
    ax=text_x,
    ay=text_y,
    axref="x",
    ayref="y",
    text=(f"Mean Temperature<br />{reference_period[0]}-{reference_period[1]}"),
    showarrow=True,
    xanchor="center",
    yanchor="middle",
    arrowwidth=2,
    arrowcolor="#000",
    name="Reference period mean",
)


"""
为百分位区域添加注释
"""

# 箭头位置
arrow_x = dt.datetime.strptime(f"{year}-08-01", "%Y-%m-%d")
arrow_y = data[data["date"] == arrow_x]["p95"].values[0] - 0.5

# 文字位置
text_x = dt.datetime.strptime(f"{year}-09-15", "%Y-%m-%d")
text_y = data[data["date"] == text_x]["p95"].values[0] + 2

fig.add_annotation(
    x=arrow_x,
    y=arrow_y,
    xref="x",
    yref="y",
    ax=text_x,
    ay=text_y,
    axref="x",
    ayref="y",
    text="90% of reference period<br />values fall within the gray area",
    showarrow=True,
    xanchor="center",
    yanchor="middle",
    arrowwidth=2,
    arrowcolor="#000",
    name="Reference period mean",
)


"""
为百分位线添加注释
"""

for percentile in ["p05", "p95"]:
    fig.add_annotation(
        x=data["date"].iloc[-1],
        y=data[percentile].iloc[-1],
        text=percentile.upper(),
        showarrow=False,
        xanchor="left",
        yanchor="middle",
    )


"""
为数据来源添加注释
"""

fig.add_annotation(
    xref="paper",
    yref="paper",
    name="数据来源",
    x=1,
    y=-0.14,
    xanchor="right",
    showarrow=False,
    text="<b>Data:</b> open-meteo.com, OSM, "
    "<b>License:</b> CC by-sa-nc 4.0  "
    "<b>Graph:</b> Jan Kühn, https://yotka.org",
    opacity=0.5,
    font_size=12,
)

参考:Area over flexible baseline chart

共勉~

相关推荐
winfredzhang1 小时前
Python实战:手把手教你写一个带界面的“照片按日期归档与清理”工具
python·复制·日期·回收站·媒体文件备份
程序员三藏4 小时前
Jmeter自动化测试
自动化测试·软件测试·python·测试工具·jmeter·测试用例·接口测试
吴佳浩6 小时前
Langchain 浅出
python·langchain·llm
smj2302_796826526 小时前
解决leetcode第3753题范围内总波动值II
python·算法·leetcode
mortimer6 小时前
破局视频翻译【最后一公里】––从语音克隆到口型对齐的完整工程思路
python·github·aigc
门框研究员9 小时前
解锁Python的强大能力:深入理解描述符
python
子不语18010 小时前
Python——函数
开发语言·python
daidaidaiyu10 小时前
一文入门 LangChain 开发
python·ai
JJ1M811 小时前
用 Python 快速搭建一个支持 HTTPS、CORS 和断点续传的文件服务器
服务器·python·https