对数据集中某一列数据进行重采样,使得不同类别的数量一致
import pandas as pd
from sklearn.utils import resample
min_n = df['Pclass'].value_counts().min()
categories = df['Pclass'].unique()
# 为每个类别进行重采样,使数量一致
balanced_data = []
for category in categories:
category_data = df[df['Pclass'] == category]
balanced_category_data = resample(category_data, replace=False, n_samples=min_n, random_state=42)
balanced_data.append(balanced_category_data)
# 合并采样后的数据
balanced_df = pd.concat(balanced_data)