import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
data=pd.read_csv('')
cells=data.columns.tolist()[3:]
Preg=PolynomialFeatures(degree=5,include_bias=False)
#### 三维分体拟合,可以提高拟合精度
sigma_bins=[-3,-1,1,3]
temp_bins=[-50,0,65,130]
voltage_bins=[0,0.7,1,1.5]
for cell_index,cell in enumerate(cells):
df=data[['sigma','temp','voltage',cell]].dropna(axis=0)
df=df[df[cell]!='failed'] ## 对 dataframe稍微处理下去除dirty data
x_inputs=df[['sigma','temp','voltage']]
max_error_per_cell=0
max_error_segment_per_cell=""
max_mse_per_cell=0
max_mse_segment_per_cell=""
segment_index=0
for sigma_index,sigma_bin in enumerate(['sbin1','sbin2','sbin3']):
filter_sigma_df=df[pd.cut(df['sigma'],sigma_bins,labels=['sbin1','sbin2','sbin3'],include_lowest=True)==sigma_bin] ## 根据预先定义的分段条件对第一维度分段
for temp_index,temp_bin in enumerate(['tbin1','tbin2','tbin3']):
filter_temp_df=filter_sigma_df[pd.cut(filter_sigma_df['temp'],temp_bins,labels=['tbin1','tbin2','tbin3'],include_lowest=True)==temp_bin] ## 根据预先定义的分段条件对第二维度分段
for voltage_index,voltage in enumerate(['vbin1','vbin2','vbin3']):
filter_voltage_df=filter_temp_df[pd.cut(filter_temp_df['voltage'],voltage_bins,labels=['vbin1','vbin2','vbin3'],include_lowest=True)==voltage_bin] ## 根据预先定义的分段条件对第三维度分段
filter_df=filter_voltage_df
segment_index+=1
if filter_df.empty is True:
continue
x=filter_df[['sigma','temp','voltage']] ## 三个特征维度的输入
y=filter_df[cell].astype(float)*1e10 ## 待拟合的值,值如果太小,稍微放大下乘个常数
xx=Preg.fit_transform(np.array(x)) ## 将sigma temp voltage三个维度的输入按照PolynomialFeature定义的degree构造出多项式
xx_names=Preg.fit(np.array(x)).get_feature_names(['sigma','temp','voltage']) ## 将sigma temp voltage三个维度的输入按照PolynomialFeature定义的degree构造出多项式名称
regr=LinearRegression()
regr.fit(xx,y) ## 进行拟合
weights=regr.coef_ ## 有需求可以记录下权重
bias=regr.intercept_ ## 有需求可以记录下截距
predict=regr.predict(xx)
max_error=((y-predict)/y)*100.abs().max()
mse=mean_squared_error(y,predict)
rmse=np.sqrt(mse)
print("====================")
print("degree= ",index)
print("coefficients= ",regr.coef_)
print("intercept= " ,regr.interept_)
print("R^2= ",regr.score(xx,y))
print("====================")
if max_error>max_error_per_cell:
max_error_per_cell=max_error ## 记录分体拟合后的最差segment的最大%误差
max_error_segment_per_cell=segment_index ## 记录分体拟合后的最差segment的index,方便debug
if mse>max_mse_per_cell:
max_mse_per_cell=mse
max_mse_segment_per_cell=segment_index
print("### max error:",max_error_per_cell,"at segment:",max_error_segment_per_cell)
print("### max MSE:",max_mse_per_cell,"at segment",max_mse_segment_per_cell)
实现三个特征的多项式分体(一维分段,二维分面,三位分体)拟合,最高阶为5,精度极高
参考:
https://blog.csdn.net/zhaohongfei_358/article/details/133901980
https://blog.csdn.net/l645317186/article/details/124963010
https://www.cnblogs.com/qiu-hua/p/14965659.html