python
复制代码
from __future__ import print_function #为了确保代码同时兼容Python 2和Python 3版本中的print函数
# 导入标准库和第三方库
import os.path #导入了Python的os.path模块,用于处理文件和目录路径
from os import path #从os模块中导入了path子模块,可以直接使用path来调用os.path中的函数
import sys #导入了sys模块,用于系统相关的参数和函数
import math #导入了math模块,提供了数学运算函数
import numpy as np #导入了NumPy库,并使用np作为别名,NumPy是用于科学计算的基础库
import pandas as pd #导入了Pandas库,并使用pd作为别名,Pandas是用于数据分析的强大库
# 导入深度学习相关库
import tensorflow as tf #导入了TensorFlow深度学习框架
from keras import backend as K #导入了Keras的backend模块,并使用K作为别名,用于访问后端引擎的函数
from keras.models import Model #从Keras导入了Model类,用于定义神经网络模型
from keras.layers import LSTM, GRU, TimeDistributed, Input, Dense, RepeatVector #从Keras导入了LSTM、Input和Dense等神经网络层
from keras.callbacks import CSVLogger, EarlyStopping, TerminateOnNaN #从Keras导入了CSVLogger、EarlyStopping和TerminateOnNaN等回调函数,用于模型训练时的控制和记录
from keras import regularizers #从Keras导入了regularizers模块,用于正则化
from keras.optimizers import Adam #从Keras导入了Adam优化器,用于编译模型时指定优化算法
# 导入其他功能模块
from functools import partial, update_wrapper #从Python标准库functools中导入了partial和update_wrapper函数,用于函数式编程中的功能扩展和包装
# 这个函数的作用是创建一个部分应用(partial application)的函数,并保留原始函数的文档字符串等信息。
def wrapped_partial(func, *args, **kwargs):
partial_func = partial(func, *args, **kwargs)
update_wrapper(partial_func, func)
return partial_func
# 这是一个自定义的损失函数,计算加权的均方误差(Mean Squared Error),其中y_true是真实值,y_pred是预测值,weights是权重。
def weighted_mse(y_true, y_pred, weights):
return K.mean(K.square(y_true - y_pred) * weights, axis=-1)
# 这部分代码用于选择使用的GPU设备。它从命令行参数中获取一个整数值gpu,如果gpu小于3,则设置CUDA环境变量以指定使用的GPU设备
import os
gpu = int(sys.argv[-13])
if gpu < 3:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]= "{}".format(gpu)
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
# 这部分代码获取了一系列命令行参数,并将它们分别赋值给变量
# 这些参数可能包括数据集名称、训练的批次数量、训练周期数、学习率、正则化惩罚、Dropout率、耐心(用于Early Stopping)等
imp = sys.argv[-1]
T = sys.argv[-2]
t0 = sys.argv[-3]
dataname = sys.argv[-4]
nb_batches = sys.argv[-5]
nb_epochs = sys.argv[-6]
lr = float(sys.argv[-7])
penalty = float(sys.argv[-8])
dr = float(sys.argv[-9])
patience = sys.argv[-10]
n_hidden = int(sys.argv[-11])
hidden_activation = sys.argv[-12]
# results_directory 是一个字符串,表示将要创建的结果文件夹路径。dataname 是之前从命令行参数中获取的数据集名称
# 如果这个文件夹路径不存在,就使用 os.makedirs 函数创建它。这个路径通常用于存储训练模型的结果或者日志
results_directory = 'results/encoder-decoder/{}'.format(dataname)
if not os.path.exists(results_directory):
os.makedirs(results_directory)
# 定义了一个函数 create_model,用于创建、编译和返回一个循环神经网络(RNN)模型
def create_model(n_pre, n_post, nb_features, output_dim, lr, penalty, dr, n_hidden, hidden_activation):
"""
creates, compiles and returns a RNN model
@param nb_features: the number of features in the model
"""
# 这里定义了两个输入层:inputs 是一个形状为 (n_pre, nb_features) 的输入张量,用于模型的主输入;weights_tensor 是一个形状相同的张量,用于传递权重或其他需要的信息
inputs = Input(shape=(n_pre, nb_features), name="Inputs")
weights_tensor = Input(shape=(n_pre, nb_features), name="Weights")
# 这里使用了两个 LSTM 层:lstm_1 是一个具有 n_hidden 个单元的 LSTM 层,应用了 dropout 和 recurrent_dropout,并且返回整个时间序列的输出。lstm_2 是一个相同的 LSTM 层,但它只返回最后一个时间步的输出。
lstm_1 = LSTM(n_hidden, dropout=dr, recurrent_dropout=dr, activation=hidden_activation, return_sequences=True, name='LSTM_1')(inputs) # Encoder
lstm_2 = LSTM(n_hidden, activation=hidden_activation, return_sequences=False, name='LSTM_2')(lstm_1) # Encoder
repeat = RepeatVector(n_post, name='Repeat')(lstm_2) # get the last output of the LSTM and repeats it
gru_1 = GRU(n_hidden, activation=hidden_activation, return_sequences=True, name='Decoder')(repeat) # Decoder
output= TimeDistributed(Dense(output_dim, activation='linear', kernel_regularizer=regularizers.l2(penalty), name='Dense'), name='Outputs')(gru_1)
model = Model([inputs, weights_tensor], output)
# Compile
cl = wrapped_partial(weighted_mse, weights=weights_tensor)
model.compile(optimizer=Adam(lr=lr), loss=cl)
print(model.summary())
return model
def train_model(model, dataX, dataY, weights, nb_epoches, nb_batches):
# Prepare model checkpoints and callbacks
stopping = EarlyStopping(monitor='val_loss', patience=int(patience), min_delta=0, verbose=1, mode='min', restore_best_weights=True)
csv_logger = CSVLogger('results/encoder-decoder/{}/training_log_{}_{}_{}_{}_{}_{}_{}_{}.csv'.format(dataname,dataname,imp,hidden_activation,n_hidden,patience,dr,penalty,nb_batches), separator=',', append=False)
terminate = TerminateOnNaN()
# Model fit
history = model.fit(x=[dataX,weights],
y=dataY,
batch_size=nb_batches,
verbose=1,
epochs=nb_epoches,
callbacks=[stopping,csv_logger,terminate],
validation_split=0.2)
def test_model():
n_post = int(1)
n_pre =int(t0)-1
seq_len = int(T)
wx = np.array(pd.read_csv("data/{}-wx-{}.csv".format(dataname,imp)))
print('raw wx shape', wx.shape)
wXC = []
for i in range(seq_len-n_pre-n_post):
wXC.append(wx[i:i+n_pre])
wXC = np.array(wXC)
print('wXC shape:', wXC.shape)
x = np.array(pd.read_csv("data/{}-x-{}.csv".format(dataname,imp)))
print('raw x shape', x.shape)
dXC, dYC = [], []
for i in range(seq_len-n_pre-n_post):
dXC.append(x[i:i+n_pre])
dYC.append(x[i+n_pre:i+n_pre+n_post])
dataXC = np.array(dXC)
dataYC = np.array(dYC)
print('dataXC shape:', dataXC.shape)
print('dataYC shape:', dataYC.shape)
nb_features = dataXC.shape[2]
output_dim = dataYC.shape[2]
# create and fit the encoder-decoder network
print('creating model...')
model = create_model(n_pre, n_post, nb_features, output_dim, lr, penalty, dr, n_hidden, hidden_activation)
train_model(model, dataXC, dataYC, wXC, int(nb_epochs), int(nb_batches))
# now test
print('Generate predictions on full training set')
preds_train = model.predict([dataXC,wXC], batch_size=int(nb_batches), verbose=1)
print('predictions shape =', preds_train.shape)
preds_train = np.squeeze(preds_train)
print('predictions shape (squeezed)=', preds_train.shape)
print('Saving to results/encoder-decoder/{}/encoder-decoder-{}-train-{}-{}-{}-{}-{}-{}.csv'.format(dataname,dataname,imp,hidden_activation,n_hidden,patience,dr,penalty,nb_batches))
np.savetxt("results/encoder-decoder/{}/encoder-decoder-{}-train-{}-{}-{}-{}-{}-{}.csv".format(dataname,dataname,imp,hidden_activation,n_hidden,patience,dr,penalty,nb_batches), preds_train, delimiter=",")
print('Generate predictions on test set')
wy = np.array(pd.read_csv("data/{}-wy-{}.csv".format(dataname,imp)))
print('raw wy shape', wy.shape)
wY = []
for i in range(seq_len-n_pre-n_post):
wY.append(wy[i:i+n_pre]) # weights for outputs
wXT = np.array(wY)
print('wXT shape:', wXT.shape)
y = np.array(pd.read_csv("data/{}-y-{}.csv".format(dataname,imp)))
print('raw y shape', y.shape)
dXT = []
for i in range(seq_len-n_pre-n_post):
dXT.append(y[i:i+n_pre]) # treated is input
dataXT = np.array(dXT)
print('dataXT shape:', dataXT.shape)
preds_test = model.predict([dataXT, wXT], batch_size=int(nb_batches), verbose=1)
print('predictions shape =', preds_test.shape)
preds_test = np.squeeze(preds_test)
print('predictions shape (squeezed)=', preds_test.shape)
print('Saving to results/encoder-decoder/{}/encoder-decoder-{}-test-{}-{}-{}-{}-{}-{}.csv'.format(dataname,dataname,imp,hidden_activation,n_hidden,patience,dr,penalty,nb_batches))
np.savetxt("results/encoder-decoder/{}/encoder-decoder-{}-test-{}-{}-{}-{}-{}-{}.csv".format(dataname,dataname,imp,hidden_activation,n_hidden,patience,dr,penalty,nb_batches), preds_test, delimiter=",")
def main():
test_model()
return 1
if __name__ == "__main__":
main()