血糖数据，新数据拟合到旧数据上

雷古小狮子

已于 2023-10-19 20:49:03 修改

阅读量45

点赞数

文章标签： python 机器学习

于 2023-10-19 17:30:39 首次发布

本文链接：https://blog.csdn.net/qq_45876576/article/details/133931057

版权

本文介绍了如何使用Python进行线性回归和多项式模型的训练，包括数据预处理、模型构建（如LinearRegression和PolynomialFeatures）、评估指标（R-squared）以及模型的保存和预测。最后展示了如何加载模型并对新数据进行预测并计算误差。

摘要由CSDN通过智能技术生成

对数据进行线性回归，多项式模型训练

线性回归训练

# -*- coding: utf-8 -*-
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import joblib
data_x=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S01now4.7.csv',encoding='ISO-8859-1')
data_y=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S014.7.csv',encoding='ISO-8859-1')

from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

features = data_x["4.7AD"]
targets=data_y["4.7AD"]
score_train1=[]
score_test1=[]
x_train,x_test,y_train,y_test=train_test_split(features,targets,train_size=0.75,random_state=4000)

x_train1 =x_train.values.reshape(-1, 1)
y_train1 = y_train.values.reshape(-1, 1)
x_test1 = x_test.values.reshape(-1, 1)
y_test1 = y_test.values.reshape(-1, 1)

regressor = LinearRegression()
regressor.fit(x_train1,y_train1)
reg_y_prediction = regressor.predict(x_test1)

plt.scatter(x_train1,y_train1)
plt1, = plt.plot(x_test1,reg_y_prediction,label='regressor')
plt.xlabel('X')
plt.ylabel('y')
plt.legend(handles=[plt1])#handles需要传入你所画线条的实例对象
plt.show()
#输出线性回归模型在训练样本上的R-squared值
print('The R-squared value of linear regressor performing on the training data is',regressor.score(x_train1,y_train1))
print('The R-squared value of linear regressor performing on the test data is',regressor.score(x_test1,y_test1))

多项式模型训练

degree=2

# -*- coding: utf-8 -*-
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import joblib
data_x=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S01now4.7.csv',encoding='ISO-8859-1')
data_y=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S014.7.csv',encoding='ISO-8859-1')

from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

features = data_x["4.7AD"]
targets=data_y["4.7AD"]
score_train1=[]
score_test1=[]
x_train,x_test,y_train,y_test=train_test_split(features,targets,train_size=0.75,random_state=4000)

#使用PolynomialFeatures（degree=2）映射出2次多项式
poly2 = PolynomialFeatures(degree=2)
x_train1 =x_train.values.reshape(-1, 1)
y_train1 = y_train.values.reshape(-1, 1)
x_test1 = x_test.values.reshape(-1, 1)
y_test1 = y_test.values.reshape(-1, 1)

x_train_poly2 = poly2.fit_transform(x_train1)

regressor_poly2 = LinearRegression()
regressor_poly2.fit(x_train_poly2,y_train1)
x_poly2 = poly2.transform(x_test1)
y_poly2 = regressor_poly2.predict(x_poly2)

plt.scatter(x_train1,y_train1)
plt1, = plt.plot(x_test1,reg_y_prediction,label='regressor')
plt2, = plt.plot(x_test1,y_poly2,label='Degree=2')

plt.xlabel('X')
plt.ylabel('y')
plt.legend(handles=[plt1,plt2])#handles需要传入你所画线条的实例对象
plt.show()

print(x_train_poly2.shape)
print(y_train1.shape)

#输出线性回归模型在训练样本上的R-squared值
#x_train_poly2 = x_train_poly2.reshape(-1, 1)
#y_test1 = y_test.values.reshape(-1, 1)
#输出模型在训练样本上的R-squared值

print('The R-squared value of Polynomial regressor(Degress=2) performing on the training data is',regressor_poly2.score(x_train_poly2,y_train1))
print('The R-squared value of linear regressor performing on the test data is',regressor.score(x_test1,y_test1))

degree = 4

# -*- coding: utf-8 -*-
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import joblib
data_x=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S01now4.7.csv',encoding='ISO-8859-1')
data_y=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S014.7.csv',encoding='ISO-8859-1')

from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

features = data_x["4.7AD"]
targets=data_y["4.7AD"]
score_train1=[]
score_test1=[]
x_train,x_test,y_train,y_test=train_test_split(features,targets,train_size=0.75,random_state=4000)

#使用PolynomialFeatures（degree=4）映射出4次多项式
poly4 = PolynomialFeatures(degree=4)

x_train1 =x_train.values.reshape(-1, 1)
y_train1 = y_train.values.reshape(-1, 1)
x_test1 = x_test.values.reshape(-1, 1)
y_test1 = y_test.values.reshape(-1, 1)

x_train_poly4 = poly4.fit_transform(x_train1)

regressor_poly4 = LinearRegression()
regressor_poly4.fit(x_train_poly4,y_train1)
x_poly4 = poly4.transform(x_test1)
y_poly4 = regressor_poly4.predict(x_poly4)

plt.scatter(x_train1,y_train1)
#plt1, = plt.plot(x_test1,reg_y_prediction,label='regressor')
#plt2, = plt.plot(x_test1,y_poly2,label='Degree=2')
plt4, = plt.plot(x_test1,y_poly4,label='Degree=4')


plt.xlabel('X')
plt.ylabel('y')
plt.legend(handles=[plt4])#handles需要传入你所画线条的实例对象
plt.show()
#输出线性回归模型在训练样本上的R-squared值
print('The R-squared value of Polynomial regressor(Degress=4) performing on the training data is',regressor_poly4.score(x_train_poly4,y_train1))
print('The R-squared value of linear regressor performing on the test data is',regressor.score(x_test1,y_test1))

预测

生成模型

# -*- coding: utf-8 -*-
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
import joblib
data_x=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S01now4.7.csv',encoding='ISO-8859-1')
data_y=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S014.7.csv',encoding='ISO-8859-1')

from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

features = data_x["4.7AD"]
targets=data_y["4.7AD"]
score_train1=[]
score_test1=[]
x_train,x_test,y_train,y_test=train_test_split(features,targets,train_size=0.75,random_state=4000)

#使用PolynomialFeatures（degree=2）映射出2次多项式
poly2 = PolynomialFeatures(degree=2)
x_train1 =x_train.values.reshape(-1, 1)
y_train1 = y_train.values.reshape(-1, 1)
x_test1 = x_test.values.reshape(-1, 1)
y_test1 = y_test.values.reshape(-1, 1)

x_train_poly2 = poly2.fit_transform(x_train1)

regressor_poly2 = LinearRegression()
regressor_poly2.fit(x_train_poly2,y_train1)
#生成模型
joblib.dump(regressor_poly2,'model.pkl')
joblib.dump(poly2,'poly_feature.pkl')

用模型预测结果

import joblib
import numpy as np
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression

# 加载导出的模型
model = joblib.load('model.pkl')
poly_features = joblib.load('poly_feature.pkl')
data_x=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S01now7.1.csv',encoding='ISO-8859-1')
data_y=pd.read_csv('H:/Myjupyter/zhw/血糖/修改数据/S017.2.csv',encoding='ISO-8859-1')
test_x = data_x["4.7AD"]
test_y = data_y["4.7AD"]
test_x = test_x.values.reshape(-1, 1)

#poly_features = PolynomialFeatures(degree=2)  # degree是多项式的次数
X_test_poly = poly_features.transform(test_x)
# 使用模型进行推理或预测
predictions = model.predict(X_test_poly)

predictions.shape
type(predictions)

test_y = list(np.array(test_y).flatten())
length = len(test_y)
print(length)  # 这将打印列表的长度

test_y = np.asarray(test_y)
test_y.shape

mean_absolute_error(test_y, predictions)
absolute=predictions-test_y
mean_error=(absolute/test_y)
#result = np.column_stack((predictions, test_y,absolute,mean_error))
print(predictions.shape)
print(test_y.shape)
print(absolute.shape)
print(mean_error.shape)



#df = pd.DataFrame(result,columns=['预测值', '真实值', '绝对误差','相对误差'])
result = np.column_stack((predictions, test_y))#,absolute,mean_error))
df = pd.DataFrame(result,columns=['预测值', '真实值'])#, '绝对误差','相对误差'])
df.to_csv('H:/Myjupyter/zhw/血糖/结果.csv',index=0,encoding="gbk")