from imblearn. over_sampling import SMOTE
import pandas as pd
C:\ProgramData\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
C:\ProgramData\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
df = pd. read_csv( 'base_done.csv' )
data = df[ : 20 ] . iloc[ : , 1 : 10 ]
data
sex age provider level verified using_time regist_type card_a_cnt card_b_cnt 0 0 24853 0 1 0 24713 1 24719 24712 1 1 25011 0 1 0 24743 7 24712 24712 2 1 24877 0 2 0 24744 7 24719 24725 3 0 24925 0 2 0 24715 1 24712 24712 4 1 24877 2 1 0 24706 3 24712 24712 5 0 24944 0 2 0 24720 1 24719 24712 6 0 24840 0 2 0 24727 1 24712 24712 7 0 24944 0 2 0 24709 1 24712 24712 8 0 24908 0 2 0 24730 1 24712 24712 9 1 24956 0 2 0 24741 7 24719 24719 10 0 24920 0 2 0 24741 7 24719 24719 11 0 24871 0 2 0 24722 2 24712 24712 12 1 24889 0 2 0 24725 1 24719 24719 13 1 24865 2 2 0 24710 3 24712 24712 14 0 24944 0 2 0 24727 1 24712 24712 15 1 24931 0 2 0 24733 7 24712 24719 16 0 24963 0 1 0 24721 2 24712 24712 17 0 24877 0 2 0 24727 1 24712 24712 18 0 24901 0 2 0 24733 7 24725 24719 19 0 24859 2 1 0 24713 3 24712 24712
X = data. drop( columns= 'provider' ) . values
y = data. provider
data. provider. value_counts( )
0 17
2 3
Name: provider, dtype: int64
sm = SMOTE( sampling_strategy= { 0 : 17 , 2 : 15 } , k_neighbors= 2 )
X_res, y_res = sm. fit_resample( X, y)
y_res. value_counts( )
0 17
2 15
Name: provider, dtype: int64
X_res
sex age level verified using_time regist_type card_a_cnt card_b_cnt 0 0 24853 1 0 24713 1 24719 24712 1 1 25011 1 0 24743 7 24712 24712 2 1 24877 2 0 24744 7 24719 24725 3 0 24925 2 0 24715 1 24712 24712 4 1 24877 1 0 24706 3 24712 24712 5 0 24944 2 0 24720 1 24719 24712 6 0 24840 2 0 24727 1 24712 24712 7 0 24944 2 0 24709 1 24712 24712 8 0 24908 2 0 24730 1 24712 24712 9 1 24956 2 0 24741 7 24719 24719 10 0 24920 2 0 24741 7 24719 24719 11 0 24871 2 0 24722 2 24712 24712 12 1 24889 2 0 24725 1 24719 24719 13 1 24865 2 0 24710 3 24712 24712 14 0 24944 2 0 24727 1 24712 24712 15 1 24931 2 0 24733 7 24712 24719 16 0 24963 1 0 24721 2 24712 24712 17 0 24877 2 0 24727 1 24712 24712 18 0 24901 2 0 24733 7 24725 24719 19 0 24859 1 0 24713 3 24712 24712 20 0 24860 1 0 24712 3 24712 24712 21 0 24871 1 0 24708 3 24712 24712 22 0 24872 1 0 24707 3 24712 24712 23 1 24865 1 0 24709 3 24712 24712 24 0 24863 1 0 24710 3 24712 24712 25 0 24859 1 0 24712 3 24712 24712 26 1 24870 1 0 24708 3 24712 24712 27 1 24875 1 0 24706 3 24712 24712 28 1 24872 1 0 24707 3 24712 24712 29 0 24873 1 0 24707 3 24712 24712 30 0 24862 1 0 24711 3 24712 24712 31 0 24859 1 0 24712 3 24712 24712 32 0 24873 1 0 24707 3 24712 24712 33 1 24866 1 0 24709 3 24712 24712