今年华为杯研究生数学建模的C题第二问用到了最小二乘法拟合,这里配合代码简要地讲一下。
数据预处理
主要是筛选材料1中励磁波形为正弦波的数据,先计算出最大磁通密度,然后整合数据:
import pandas as pd
df1 = pd.read_excel('./data/附件一(训练集).xlsx', sheet_name='材料1')
sine_wave = df1[df1['励磁波形'] == '正弦波' ]
print(sine_wave.shape)
collom_name = [i for i in range(1,1024)]
collom_name.append('0(磁通密度B,T)')
Bm = sine_wave[collom_name].max(axis=1)
print(Bm.shape)
print(Bm.head())
data = pd.DataFrame({
'f': sine_wave['频率,Hz'],
'Bm': Bm,
'T': sine_wave['温度,oC'],
'P': sine_wave['磁芯损耗,w/m3'],
})
X = data[['f','Bm','T']].to_numpy()
print(X.shape)
Y = data['P'].to_numpy()
print(Y.shape)
print(data['Bm'].describe())
拟合
首先是拟合原斯坦麦茨方程(SE)得到k,α,β。然后对原SE取对数变形得到线性的表示形式,固定参数α,β,得到不同温度下的lnk,由此确定lnk与T的关系。
from scipy.optimize import curve_fit
import numpy as np
params, _ = curve_fit(steinmetz, X, Y)
print(params)
# lnP = lnk + αlnf + βlnBm
def ln_steinmetz(X, ln_k):
return ln_k + params[1] * X[:,0] + params[2] * X[:,1]
T_unique = data['T'].unique()
print(T_unique)
ln_k_list = []
for T in T_unique:
data_T = data[data['T'] == T]
lnX_T = np.log(data_T[['f','Bm']]).to_numpy()
lnY_T = np.log(data_T['P']).to_numpy()
lnk_T, _ = curve_fit(ln_steinmetz, lnX_T, lnY_T, p0=[params[0]])
ln_k_list.append({'T: ':T, 'lnk: ': lnk_T[0]})
print(f"{T}℃下的ln(k_i): {lnk_T[0]}\n")
绘制散点图,可视化不同温度下的lnk,发现lnk随温度的升高,下降趋势变缓,由此考虑二次函数。并且发现用二次函数拟合的效果还不错。
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot()
ax.scatter(T_unique, [i['lnk: '] for i in ln_k_list], marker='o', label='ln(k_i)', color='#43a2ca')
ax.set_xlabel('T/℃')
ax.set_ylabel('lnk')
# 拟合lnk与T的关系,假设lnk与T之间是二次函数关系
def lnk_fun(T, a, b, c):
return a*T**2 + b*T + c
params_ab, _ = curve_fit(lnk_fun, np.array(T_unique), [i['lnk: '] for i in ln_k_list])
print(params_ab) # [ 1.25158480e-04 -2.59127337e-02 1.33086478e+00]
T_fit = np.linspace(T_unique.min(), T_unique.max(), 100)
lnk_fit = lnk_fun(T_fit, *params_ab)
ax.plot(T_fit, lnk_fit, label='Fitting curve', color='#f768a1')
ax.legend()
fig.savefig('./T_lnk.svg', format='svg', dpi=500)
确定修正SE
因为指数形式的方程不好拟合,所以先拟合对数形式的修正SE:
# lnP = (aT^2+bT+c) + αlnf + βlnBm
def ln_steinmetz1(X, a, b, c, alpha, beta):
return (a*X[:,2]**2 + b*X[:,2] + c) + alpha * np.log(X[:,0]) + beta * np.log(X[:,1])
params_final, _ = curve_fit(ln_steinmetz1, X, np.log(Y), p0=[*params_ab, *params[1:]])
print(params_final) # 1.22161954e-04 -2.58499448e-02 -1.00054774e+00 1.64224635e+00 2.52362546e+00
然后等号两边同时取以e为底的指数就可以得到修正的SE方程。
比较原SE与修正SE
我们使用均方误差(MSE)、平均相对误差(MRE)、平均绝对误差(MAE)、残差(Residual)来评判原SE与修正SE的优劣。
from sklearn.metrics import mean_squared_error
import numpy as np
# 用最优参数预测测试集
y_pred = steinmetz(X, *params)
# 计算误差
mse_train = mean_squared_error(Y, y_pred)
print(f'均方误差: {mse_train}')
relative_error = np.abs((y_pred - Y) / Y)
print(f'mean relative error: {relative_error.mean()}')
absolute_error = np.abs(y_pred - Y).mean()
print(f'mean absolute error: {absolute_error}')
def modified_steinmetz(X, a, b, c, alpha, beta):
return np.exp(a*X[:,2]**2 + b*X[:,2] + c) * X[:,0]**alpha * X[:,1]**beta
# 用最优参数预测测试集
y_pred_mod = modified_steinmetz(X, *params_final)
# 计算误差
mse_train = mean_squared_error(Y, y_pred_mod)
print(f'均方误差: {mse_train}')
relative_error = np.abs((y_pred_mod - Y) / Y)
print(f'mean relative error: {relative_error.mean()}')
absolute_error = np.abs(y_pred_mod - Y).mean()
print(f'mean absolute error: {absolute_error}')
# 残差图
residuals = Y - y_pred
residuals_mod = Y - y_pred_mod
fig, (ax, bx) = plt.subplots(1,2, figsize=(13,6),sharey=True)
ax.scatter(range(271), residuals[:271], color='#fef0d9', label='25°C')
ax.scatter(range(271, 527), residuals[271:527], color='#fdcc8a', label='50°C')
ax.scatter(range(527, 792), residuals[527:792], color='#fc8d59', label='70°C')
ax.scatter(range(792, len(residuals)), residuals[792:], color='#d7301f', label='90°C')
ax.legend(loc='lower right')
ax.set_xlabel('Sample Index')
ax.set_ylabel('Residuals')
ax.grid(True)
ax.set_title('Original SE function')
bx.scatter(range(271), residuals_mod[:271], color='#fef0d9', label='25°C')
bx.scatter(range(271, 527), residuals_mod[271:527], color='#fdcc8a', label='50°C')
bx.scatter(range(527, 792), residuals_mod[527:792], color='#fc8d59', label='70°C')
bx.scatter(range(792, len(residuals)), residuals_mod[792:],color='#d7301f', label='90°C')
bx.legend(loc='lower right')
bx.set_xlabel('Sample Index')
bx.grid(True)
bx.set_title('Modified SE function')
fig.suptitle('Residuals distribution across different temperatures')
fig.savefig('./residuals_2.svg', format='svg', dpi=500)
可以看到,原SE的残差在温度为25℃、70℃和90℃时的分布偏离0较多,而修正SE偏离较少。
有问题欢迎在评论区讨论!