2023高教社杯全国大学生数学建模竞赛E题 黄河水沙监测数据分析 代码解析
因为一些不可抗力,下面仅展示部分python代码(第一问的部分),其余代码看文末
首先导入包:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pmdarima as pm
from sklearn.linear_model import LinearRegression
import seaborn as sns
然后需要移除table的数据,因为重复了:
table = pd.read_excel(r"./data/附件1.xlsx")
for i in range(2017, 2017+5):
# 移除table最后一条数据(重复了)
# print(table.iloc[len(table)-1])
table.drop((len(table)-1),inplace=True)
i = str(i)
temp = pd.read_excel(r"./data/附件1.xlsx",sheet_name = i)
table = pd.concat([table, temp])
table = table.reset_index(drop=True)
table
如下为部分数据预处理代码:
# 数据预处理
time_list = []
for i in range(len(table)):
m, d, h = str(int(table.iloc[i,1])), str(int(table.iloc[i,2])),str(table.iloc[i,3])
if(int(table.iloc[i,1])<10):
m = "0" + str(int(table.iloc[i,1]))
if(int(table.iloc[i,2])<10):
d = "0" + str(int(table.iloc[i,2]))
# print(m,d)
time = str(int(table.iloc[i,0]))+"-"+ m+"-"+ d +" "+ h
# print(time)
time_list.append(time)
temp = pd.DataFrame(time_list, columns=["时刻"])
temp["时刻"]= pd.to_datetime(temp["时刻"])
# temp.to_csv('example3.csv', index=False)
# temp
第一问的可视化图部分代码:
# 创建子图
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 10))
# 绘制水位数据
axes[0].plot(data.index, data['水位(m)'], label='Water Level', color='blue')
axes[0].set_ylabel('Water Level (m)')
axes[0].set_title('Water Level Over Time')
# 绘制水流量数据
axes[1].plot(data.index, data['流量(m3/s)'], label='Flow Rate', color='green')
axes[1].set_ylabel('Flow Rate (m^3/s)')
axes[1].set_title('Flow Rate Over Time')
# 绘制含沙量数据
axes[2].plot(data.index, data['含沙量(kg/m3) '], label='Sediment Content', color='red')
axes[2].set_xlabel('Time')
axes[2].set_ylabel('Sediment Content')
axes[2].set_title('Sediment Content Over Time')
得到:
有关思路、相关代码、讲解视频、参考文献等相关内容可以点击下方群名片哦!