其数据输入分别为csv,excel格式。
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import random
from pylab import xticks, yticks
from sklearn.metrics import mean_absolute_error
# # 从 CSV 文件加载特征数据
# feature_data = pd.read_csv('your_feature_data.csv')
# X_train = torch.tensor(feature_data.values, dtype=torch.float32)
#
# # 从 Excel 表格加载目标变量数据
# target_data = pd.read_excel('your_target_data.xlsx')
# y_train = torch.tensor(target_data['target_column'].values, dtype=torch.float32)
def plot_curve(X):
#画出光谱曲线
wavelength = [i for i in range(X.shape[1])]
plt.figure(figsize=(8, 5))
ax = plt.gca()
ax.set_ylim(0, 1, 0.1)
with plt.style.context(('ggplot')):
plt.plot(wavelength, X.T)
plt.xlabel('Wavelength')
plt.ylabel('Ref Value')
plt.show()
def read_y_csv_data(y_xlsx_path, property):
#读取csv文件。
data = pd.read_excel(io=y_xlsx_path)
use_data = data.loc[:, property].values[1:281] # 选取数据范围
res = []
for use_data_item in use_data:
# res.append(use_data_item)
res.append(use_data_item)
return np.array(res)
"""获取数据"""
def get_internal_data(x_csv_path, y_xlsx_path, test_size=0.2, property=None):
X = pd.read_csv(x_csv_path, header=0).values
print(f"数据规模:{X.shape}")
# print(X.shape[0])
# 选取数据
index = np.array([[i for i in range(0, X.shape[0], 1)]]).T
X = X[index, :].reshape(-1, 176)
y = read_y_csv_data(y_xlsx_path=y_xlsx_path, property=property)
# X = X[-180:, :]
# y = y[-180:]
print(f"选取后样本: {X.shape}")
print(y.shape)
plot_curve(X)
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=test_size, random_state=None,
shuffle=True)
# 归一化
scaler = StandardScaler()
train_x = scaler.fit_transform(train_x)
test_x = scaler.transform(test_x)
return train_x, train_y, test_x, test_y
csv_path = './select_zhenye_271.csv' # 使用枝条图片数据
# csv_path = './zzs_data_csv_all/select_zhitiao.csv' #使用针叶图片数据
y_path = 'zzs_select_271.xlsx'
propertys = ["水势/bar", "水势/MPa", "鲜重/g", "干重/g",
"叶片含水量(相对含水量):(鲜重-干重)/鲜重",
"蒸腾速率E", "光合速率A", "胞间CO2浓度Ci", "气孔导度gsw",
"瞬时水分利用效率", "內禀水分利用效率"]
"""
0 "水势/bar", 1 "水势/MPa", 2 "鲜重/g", 3"干重/g",
4"叶片含水量(相对含水量):(鲜重-干重)/鲜重",
5"蒸腾速率E", 6"光合速率A", 7"胞间CO2浓度Ci", 8"气孔导度gsw",
9"瞬时水分利用效率", 10"內禀水分利用效率"
"""
# 固定样本排序
# setup_seed(7)
shengli = 4
train_x, train_y, test_x, test_y = get_internal_data(csv_path,
y_path,
test_size=0.2,
property=propertys[shengli])
print(train_x.shape)
print(test_x.shape)
#将numpy数组更改为pytorch张量
# pytorch_tensor = torch.from_numpy(numpy_array)
X_train = torch.from_numpy(train_x).float()
y_train = torch.from_numpy(train_y).float()
X_test = torch.from_numpy(test_x).float()
y_test = torch.from_numpy(test_y).float()
print('tensor_X_trian:',X_train.shape)
print('tensor_X_trian:',y_train.shape)
# 定义 LS-SVM 模型
class LSSVM(nn.Module):
def __init__(self, input_dim):
super(LSSVM, self).__init__()
self.linear = nn.Linear(input_dim, 1) # 线性模型
self.criterion = nn.MSELoss() # 均方误差损失
def forward(self, x):
return self.linear(x)
# 创建模型和优化器
input_dim = X_train.shape[1]
model = LSSVM(input_dim)
optimizer = optim.SGD(model.parameters(), lr=0.005) # 使用随机梯度下降
# 存储训练损失
train_losses = []
# 训练模型
num_epochs = 300
for epoch in range(num_epochs):
optimizer.zero_grad()
outputs = model(X_train)
loss = model.criterion(outputs, y_train)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
# 可视化训练损失
plt.plot(range(num_epochs), train_losses)
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('Training Loss Over Epochs')
plt.show()
# 预测
# 加载测试数据的特征
# test_feature_data = pd.read_csv('your_test_feature_data.csv')
# X_test = torch.tensor(test_feature_data.values, dtype=torch.float32)
#在上面已经转换完成
# 计算训练数据的 R2 分数
train_predictions = model(X_train).detach().numpy()
train_r2 = r2_score(y_train.numpy(), train_predictions)
# 计算训练数据的 MAE
train_mae = mean_absolute_error(y_train.numpy(), train_predictions)
# 预测
# 加载测试数据的特征
# 进行预测
test_predictions = model(X_test).detach().numpy()
# 计算测试数据的 R2 分数
test_r2 = r2_score(y_test.numpy(), test_predictions)
# 计算测试数据的 MAE
test_mae = mean_absolute_error(y_test.numpy(), test_predictions)
# 输出训练和测试的 R2 分数
print(f"Training R2 Score: {train_r2:.4f}")
print(f"Testing R2 Score: {test_r2:.4f}")
print(f"Training MAE: {train_mae:.4f}")
print(f"Testing MAE: {test_mae:.4f}")
# 可视化预测值与真值的散点图以及对齐的拟合直线
plt.scatter(train_predictions, y_train.numpy(), label='Training Data', color='blue', alpha=0.5)
plt.scatter(test_predictions, y_test.numpy(), label='Testing Data', color='red', alpha=0.5)
plt.xlabel('Predicted Values')
plt.ylabel('True Values')
plt.title('LS-SVM ' + str(shengli) + ' trian $R^{2}$: ' + str(round(train_r2, 4)) + ' test $R^{2}$: ' + str(
round(test_r2, 4)) + ' test MAE:' + str(np.round(test_mae, 4)))
# 绘制对齐的训练拟合直线
min_val = min(train_predictions.min(), y_train.min())
max_val = max(train_predictions.max(), y_train.max())
plt.plot([min_val, max_val], [min_val, max_val], linestyle='--', color='green', linewidth=2, label='Alignment Line (Perfect Fit)')
plt.legend()
plt.show()