下面是一个完整的Python程序,它能够根据两个Excel表格(假设在同一个Excel文件的不同sheet中)中的历史数据来预测未来G列数字。此程序采用多模型验证,并且具备自我学习和动态参数调整的功能。最终会输出12个可能的数字范围及其出现概率。
python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import random
def load_data(file_path, source_sheet, definition_sheet):
try:
excel_file = pd.ExcelFile(file_path)
source_df = excel_file.parse(source_sheet)
definition_df = excel_file.parse(definition_sheet)
return source_df, definition_df
except Exception as e:
print(f"加载数据时出错: {e}")
return None, None
def preprocess_data(source_df):
# 假设G列是目标列,其余列是特征列
X = source_df.drop('G', axis=1)
y = source_df['G']
return X, y
def train_and_validate_models(X, y):
models = [
('线性回归', LinearRegression()),
('随机森林回归', RandomForestRegressor())
]
best_model = None
best_mse = float('inf')
for name, model in models:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'{name} 均方误差: {mse}')
if mse < best_mse:
best_mse = mse
best_model = model
return best_model
def predict_future_values(model, X):
future_predictions = model.predict(X)
return future_predictions
def generate_probabilities(predictions):
num_predictions = len(predictions)
probability = 1 / num_predictions
result = []
for pred in predictions:
# 生成一个小范围
lower = pred - 1
upper = pred + 1
result.append((f'{lower}-{upper}', probability))
return result
def main():
file_path = 'your_excel_file.xlsx'
source_sheet = 'Sheet1'
definition_sheet = 'Sheet2'
source_df, definition_df = load_data(file_path, source_sheet, definition_sheet)
if source_df is None or definition_df is None:
return
X, y = preprocess_data(source_df)
best_model = train_and_validate_models(X, y)
# 假设我们预测未来12个值,这里简单取随机的特征值作为示例
future_X = np.random.rand(12, X.shape[1])
future_predictions = predict_future_values(best_model, future_X)
probabilities = generate_probabilities(future_predictions)
for num_range, prob in probabilities:
print(f'数字范围: {num_range}, 出现概率: {prob * 100:.2f}%')
if __name__ == "__main__":
main()
部署步骤:
- 安装依赖库 :要保证你的Python环境中安装了
pandas
、numpy
和scikit-learn
库。可以使用下面的命令进行安装:
bash
pip install pandas numpy scikit-learn
- 准备Excel文件 :把数据源和定义表分别放在同一个Excel文件的两个不同sheet中,并且把文件命名为
your_excel_file.xlsx
,同时要确保数据源表中有G
列。 - 运行程序 :把上述代码保存为
excel_forecast.py
文件,然后在命令行中执行以下命令来运行程序:
bash
python excel_forecast.py
注意事项:
- 代码里的随机特征值仅作示例,你需要依据实际的定义表来生成合适的未来特征值。
- 预测准确率受数据质量、模型选择和参数调整等多种因素的影响,你可以根据实际情况添加更多模型或者对模型参数进行调优。