from imblearn.over_sampling import RandomOverSampler from imblearn.under_sampling import RandomUnderSampler
# 上采样 def over_sampling(train_img, train_label): rus = RandomOverSampler(random_state=random_seed) nsamples, nx, ny, nz, = train_img.shape train_img_flatten = train_img.reshape(nsamples, nx * ny * nz) X_resampled, y_resampled = rus.fit_resample(train_img_flatten, train_label) X_resample = X_resampled.reshape(X_resampled.shape[0], nx, ny, nz) return X_resample, y_resampled
解释,简单的对欠采样的少数样本进行随机选择添加。
类似于sklearn的先对类实例化,拿到一个转换器对象,然后调用对象的fit_resample方法,返回值即为上采样后的样本与标签,下采样过程相同。
这里还对一批图像数据进行展平操作,下采样过程相同。
# 下采样 def under_sampling(train_img, train_label): rus = RandomUnderSampler(random_state=random_seed, replacement=True) n_samples, nx, ny, nz = train_img.shape train_img_flatten = train_img.reshape(n_samples, nx * ny * nz) x_resampled, y_resampled = rus.fit_resample(train_img_flatten, train_label) x_resampled = x_resampled.reshape(x_resampled.shape[0], nx, ny, nz) return x_resampled, y_resampled
解释,简单的对多数样本进行随机选择删除
数据增强仅仅用于训练数据集,不需要改变交叉验证集和测试集