import os
import numpy as np
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# 定义数据文件夹路径
labeled_data_folder = r'C:\Users\86188\Desktop\Python\数学建模\新建文件夹\LabeledData'
# 定义数据存储列表
X = []
y = []
# 读取已标记数据
for file_name in os.listdir(labeled_data_folder):
if file_name.endswith('.txt'):
file_path = os.path.join(labeled_data_folder, file_name)
with open(file_path, 'r') as file:
lines = file.readlines()
data_start = lines.index('[ABS_DATA]\n') + 1
data = [line.strip().split(';') for line in lines[data_start:]]
data = np.array(data[1:], dtype=float) # 跳过第一行标题
wave = data[:, 0]
abs_intensity = data[:, 1]
concentration_U_IV = float(lines[2].split('=')[1])
concentration_U_VI = float(lines[3].split('=')[1])
concentration_HNO3 = float(lines[4].split('=')[1])
# 将频谱特征和浓度值添加到X和y中
X.append(abs_intensity)
y.append([concentration_U_IV, concentration_U_VI, concentration_HNO3])
# 转换X和y为NumPy数组
X = np.array(X)
y = np.array(y)
# 使用PCA进行降维
pca = PCA(n_components=200) # 选择要保留的主成分数量
X_pca = pca.fit_transform(X)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
# 建立BP神经网络模型
model = MLPRegressor(hidden_layer_sizes=(100, 50,70), activation='relu', solver='adam', random_state=42)
model.fit(X_train, y_train)
# 在测试集上进行预测
y_pred = model.predict(X_test)
# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
print("均方误差 (MSE):", mse)
# 定义数据文件夹路径
unlabeled_data_folder = r'C:\Users\86188\Desktop\Python\数学建模\新建文件夹\UnlabeledData'
# 定义数据存储列表
X_unlabeled = []
# 读取未标记数据
for file_name in os.listdir(unlabeled_data_folder):
if file_name.endswith('.txt'):
file_path = os.path.join(unlabeled_data_folder, file_name)
with open(file_path, 'r') as file:
lines = file.readlines()
data_start = lines.index('[ABS_DATA]\n') + 1
data = [line.strip().split(';') for line in lines[data_start:]]
data = np.array(data[1:], dtype=float) # 跳过第一行标题
abs_intensity = data[:, 1]
# 将频谱特征添加到X_unlabeled中
X_unlabeled.append(abs_intensity)
# 转换X_unlabeled为NumPy数组
X_unlabeled = np.array(X_unlabeled)
# 使用PCA进行降维
X_unlabeled_pca = pca.transform(X_unlabeled)
# 进行预测
y_pred_unlabeled = model.predict(X_unlabeled_pca)
# 打印预测结果
for i, file_name in enumerate(os.listdir(unlabeled_data_folder)):
if file_name.endswith('.txt'):
print(file_name)
print("预测浓度 (U(IV), U(VI), HNO3):", y_pred_unlabeled[i])
print()
import os
import numpy as np
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# 定义数据文件夹路径
labeled_data_folder = r'C:\Users\86188\Desktop\Python\数学建模\新建文件夹\LabeledData'
# 定义数据存储列表
X = []
y = []
# 读取已标记数据
for file_name in os.listdir(labeled_data_folder):
if file_name.endswith('.txt'):
file_path = os.path.join(labeled_data_folder, file_name)
with open(file_path, 'r') as file:
lines = file.readlines()
data_start = lines.index('[ABS_DATA]\n') + 1
data = [line.strip().split(';') for line in lines[data_start:]]
data = np.array(data[1:], dtype=float) # 跳过第一行标题
wave = data[:, 0]
abs_intensity = data[:, 1]
concentration_U_IV = float(lines[2].split('=')[1])
concentration_U_VI = float(lines[3].split('=')[1])
concentration_HNO3 = float(lines[4].split('=')[1])
# 将频谱特征和浓度值添加到X和y中
X.append(abs_intensity)
y.append([concentration_U_IV, concentration_U_VI, concentration_HNO3])
# 转换X和y为NumPy数组
X = np.array(X)
y = np.array(y)
# 使用PCA进行降维
pca = PCA(n_components=200) # 选择要保留的主成分数量
X_pca = pca.fit_transform(X)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
# 建立BP神经网络模型
model = MLPRegressor(hidden_layer_sizes=(100, 50,70), activation='relu', solver='adam', random_state=42)
model.fit(X_train, y_train)
# 在测试集上进行预测
y_pred = model.predict(X_test)
# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
print("均方误差 (MSE):", mse)
# 定义数据文件夹路径
unlabeled_data_folder = r'C:\Users\86188\Desktop\Python\数学建模\新建文件夹\UnlabeledData'
# 定义数据存储列表
X_unlabeled = []
# 读取未标记数据
for file_name in os.listdir(unlabeled_data_folder):
if file_name.endswith('.txt'):
file_path = os.path.join(unlabeled_data_folder, file_name)
with open(file_path, 'r') as file:
lines = file.readlines()
data_start = lines.index('[ABS_DATA]\n') + 1
data = [line.strip().split(';') for line in lines[data_start:]]
data = np.array(data[1:], dtype=float) # 跳过第一行标题
abs_intensity = data[:, 1]
# 将频谱特征添加到X_unlabeled中
X_unlabeled.append(abs_intensity)
# 转换X_unlabeled为NumPy数组
X_unlabeled = np.array(X_unlabeled)
# 使用PCA进行降维
X_unlabeled_pca = pca.transform(X_unlabeled)
# 进行预测
y_pred_unlabeled = model.predict(X_unlabeled_pca)
# 打印预测结果
for i, file_name in enumerate(os.listdir(unlabeled_data_folder)):
if file_name.endswith('.txt'):
print(file_name)
print("预测浓度 (U(IV), U(VI), HNO3):", y_pred_unlabeled[i])
print()