import pandas as pd import statsmodels.api as sm from sklearn.metrics import r2_score, mean_squared_error import numpy as np pd.set_option('display.max_rows', None)#显示全部行 from sklearn.model_selection import train_test_split # 从Excel文件读取数据 # 假设Excel文件中有多列数据,包括植被指数(X1, X2, ...),最后一列是生物量(y) # 请根据你的实际数据文件进行替换D:\桌面\修改生物量后建模-原始植被指数\2021.xls data = pd.read_excel(r'D: );#数据 # 定义自变量和因变量 X = data[['RVI' ]] # 根据你的数据集修改列名 y = data['Biomass'] # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 从Excel文件读取数据 # 假设Excel文件中有多列数据,包括植被指数(X1, X2, ...),最后一列是生物量(y) # 请根据你的实际数据文件进行替换 # 添加截距列 X_train = sm.add_constant(X_train) X_test = sm.add_constant(X_test) # 初始化多元线性回归模型 model = sm.OLS(y_train, X_train) # 拟合模型 results = model.fit() print(results.summary()) # 输出模型参数 coefficients = results.params intercept = coefficients[0] # 输出拟合方程 equation = f"y = {intercept:.4f} + " for i, coef in enumerate(coefficients[1:]): equation += f"{coef:.4f} * X{i+1} + " equation = equation.rstrip(" + ") print("Fitted Equation:", equation) # 在全部数据上进行预测 y_train_pred = results.predict(X_train)#在训练集预测 y_test_pred = results.predict(X_test)#在验证集预测 # 计算R方 训练集 r2_train = r2_score(y_train, y_train_pred) # 计算RMSE和nRMSE rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred)) nrmse_train = rmse_train / np.mean(y_train) # 随机 计算R方 验证集 r2_test = r2_score(y_test, y_test_pred) # 计算RMSE和nRMSE rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred)) nrmse_test = rmse_test / np.mean(y_test) # 输出结果 print("y训练集真实值",y_train) print("y_pred训练集预测值:",y_train_pred) print("y验证集真实值",y_test) print("y_pred验证集预测值:",y_test_pred) print("训练集R-squared:", r2_train) print("训练集RMSE:", rmse_train) print("训练集nRMSE:", nrmse_train) print("验证集R-squared:", r2_test) print("验证集RMSE:", rmse_test) print("验证集nRMSE:", nrmse_test)
多元线性回归代码
本文介绍了如何使用Python中的pandas、statsmodels和sklearn库对Excel数据进行预处理、划分训练集和测试集,建立多元线性回归模型,计算R方、RMSE和nRMSE,并在训练集和验证集上进行预测和性能评估。
摘要由CSDN通过智能技术生成