数据生成
import pandas as pd
import numpy as np
data = pd.DataFrame(np.random.random((50, 3)), columns=['a', 'b', 'c'])
data['y'] = np.vstack((np.zeros((6, 1)), np.ones((44, 1))))
过采样/上采样
# 朴素随机过采样 过采样后样本类别的比列为1:1
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0)
x, y