目录
本文章代码参考下面的博客,这个博客写的更加详细,大家可以去学习一下。光谱数据处理:4.七种预处理方法及其python实现_光谱预处理一阶导数-CSDN博客
创建这个文章的目的是为了保存一下运行过的模型代码,大家有需要可以拿去参考。
预处理:
归一化代码
# 归一化
min_vals = np.min(x, axis=0)
max_vals = np.max(x, axis=0)
x = (x - min_vals) / (max_vals - min_vals)
多元散射校正代码
# 多元散射校正
def do_msc(input_data):
ref_spectrum = np.mean(input_data, axis=0)
corrected_data = np.zeros_like(input_data)
for i in range(input_data.shape[0]):
fit = np.polyfit(ref_spectrum, input_data[i, :], 1)
corrected_data[i, :] = (input_data[i, :] - fit[1]) / fit[0]
return corrected_data, ref_spectrum
input_data = x[:, :].astype(float)
corrected_spectra, mean_spectrum = do_msc(input_data)
基线校正代码
for i in range(len(df)):
y_data=df.iloc[i,1:].tolist()
y_data = np.array(y_data, dtype=float)
x_data=dict['Wavenumbers']
# 基线校正(使用简单的最小二乘多项式拟合)
# 寻找峰值
peaks, _ = find_peaks(y_data, prominence=0.001)
# 峰值的宽度
widths = peak_widths(y_data, peaks, rel_height=0.1)
# 为基线拟合使用峰值外的数据
mask = np.ones_like(y_data, dtype=bool)
for peak, width in zip(peaks, widths[0]):
mask[int(peak - width):int(peak + width)] = False
# 多项式拟合基线
baseline_values = np.polyfit(np.array(x_data)[mask], np.array(y_data)[mask], 2)
baseline_poly = np.poly1d(baseline_values)
# 计算基线并进行校正
baseline = baseline_poly(x_data)
y_corrected = y_data - baseline
# 绘制原始数据和基线校正后结果
fig, ax = plt.subplots(figsize=(6, 8))
# 原始数据
ax.plot(x_data, y_data, label='Original Data')
ax.plot(x_data, baseline, label='Baseline', linestyle='--')
ax.set_title('Original Spectral Data with Baseline')
ax.legend()
# 基线校正后的数据
ax.plot(x_data, y_corrected, color='orange', label='Corrected Data')
ax.set_title('Baseline Corrected Data')
ax.legend()
# 设置X轴标签
ax.set_xlabel('Wavelength (nm)')
ax.set_ylabel('Intensity')
# 保存图像
save_path1 = r'D:\数据集\Open-Nirs-Datasets-main\定性分析\草莓叶-983条(草莓和非草莓的二分类)\图片\基线校正\图{}.png'.format(
i)
plt.savefig(save_path1)
小波变换去噪代码
## 小波变换去噪
# 可视化
for i in range(len(df)):
y_data = df.iloc[i, 1:].tolist()
y_data = np.array(y_data, dtype=float)
x_data = dict['Wavenumbers']
def moving_average(data, window_size):
return np.convolve(data, np.ones(window_size) / window_size, mode='same')
# Savitzky-Golay滤波器
def savgol_filtering(data, window_size, order):
return savgol_filter(data, window_size, order)
# 小波变换去噪
def wavelet_denoising(data, wavelet, level):
coeff = pywt.wavedec(data, wavelet, mode='per', level=level)
sigma = np.median(np.abs(coeff[-1])) / 0.6745
uthresh = sigma * np.sqrt(2 * np.log(len(data)))
coeff[1:] = (pywt.threshold(i, value=uthresh, mode='soft') for i in coeff[1:])
return pywt.waverec(coeff, wavelet, mode='per')
# 应用噪声降低技术
ma_data = moving_average(y_data, 5) # 窗口大小为5的移动平均
sg_data = savgol_filtering(y_data, 15, 3) # 窗口大小15,多项式阶数3的Savitzky-Golay滤波
wt_data = wavelet_denoising(y_data, 'db1',1) # Daubechies小波,分解层数为1
wt_data=wt_data[:235]
# 绘制原始数据和各种降噪结果
fig, axs = plt.subplots(4, 1, figsize=(10, 12), sharex=True)
# 原始数据
axs[0].plot(x_data, y_data, label='Original Data')
axs[0].set_title('Original Spectral Data with Noise')
axs[0].legend()
# 移动平均结果
axs[1].plot(x_data, ma_data, color='green', label='Moving Average')
axs[1].set_title('Moving Average')
axs[1].legend()
# Savitzky-Golay滤波结果
axs[2].plot(x_data, sg_data, color='red', label='Savitzky-Golay Filter')
axs[2].set_title('Savitzky-Golay Filter')
axs[2].legend()
# 小波变换去噪结果
axs[3].plot(x_data, wt_data, color='purple', label='Wavelet Denoising')
axs[3].set_title('Wavelet Denoising')
axs[3].legend()
# 设置共同的X轴标签
for ax in axs:
ax.set_xlabel('Wavelength (nm)')
ax.set_ylabel('Intensity')
plt.tight_layout()
save_path3=r'D:\数据集\Open-Nirs-Datasets-main\定性分析\草莓叶-983条(草莓和非草莓的二分类)\图片\小波变换去噪\图片{}'.format(i)
plt.savefig(save_path3)
标准正态变量变换代码
# 标准正态变量变换函数
for i in range(len(df)):
y_data = df.iloc[i, 1:].tolist()
y_data = np.array(y_data, dtype=float)
x_data = dict['Wavenumbers']
def standardize_data(data):
mean_val = np.mean(data)
std_dev = np.std(data)
standardized_data = (data - mean_val) / std_dev
return standardized_data
# 应用标准正态变量变换
standardized_y_data = standardize_data(y_data)
# 绘制原始数据和标准正态变量变换结果
fig, axs = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
# 原始数据
axs[0].plot(x_data, y_data, label='Original Data')
axs[0].set_title('Original Spectral Data')
axs[0].set_ylabel('Intensity')
axs[0].legend()
# 标准正态变量变换结果
axs[1].plot(x_data, standardized_y_data, color='red', label='Standardized Data')
axs[1].set_title('Standardized Spectral Data')
axs[1].set_ylabel('Z-Score')
axs[1].legend()
# 设置共同的X轴标签
for ax in axs:
ax.set_xlabel('Wavelength (nm)')
plt.tight_layout()
save_path5=r'D:\数据集\Open-Nirs-Datasets-main\定性分析\草莓叶-983条(草莓和非草莓的二分类)\图片\z函数标准化\png{}'.format(i)
plt.savefig(save_path5)
一阶光谱导数代码
## 一阶导数光谱
# 计算导数光谱
def calculate_derivative(x, spectra):
# 使用numpy的差分函数计算一阶导数
dx = np.diff(x)
# 注意这里,我们用dx[0]来近似所有波长间的差值,这是一个简化假设,通常光谱波长间隔是均匀的
derivative_spectra = np.diff(spectra) / dx[0]
# x坐标为原始x坐标的中点
x_mid_points = (x[:-1] + x[1:]) / 2
return x_mid_points, derivative_spectra
x=dict['Wavenumbers']
x = np.array(x)
spectra=df.iloc[:,1:].to_numpy().astype(float)
# 计算导数光谱
x_deriv, derivative_spectra = calculate_derivative(x, spectra)
# 绘制图形
fig, axes = plt.subplots(2, 1, figsize=(8, 10), sharex=True)
# 绘制原始光谱
for i in range(983):
axes[0].plot(x, spectra[i, :], label=f'Sample {i + 1}')
axes[0].set_title('Original Spectra')
axes[0].set_ylabel('Intensity')
#
# 绘制一阶导数光谱
for i in range(236):
axes[1].plot(x_deriv, derivative_spectra[i, :], label=f'Sample {i + 1}')
axes[1].set_title('First Derivative of Spectra')
axes[1].set_xlabel('Wavelength (nm)')
axes[1].set_ylabel('First Derivative Intensity')
plt.tight_layout()
plt.savefig(r'D:\数据集\Open-Nirs-Datasets-main\定性分析\草莓叶-983条(草莓和非草莓的二分类)\图片\一阶导数光谱\png')