smoteenn算法_请问可以使用python多进程实现smote算法吗?

我姥之前的脚本

import numpy as np

import csv

#import svm

import pandas as pd

import matplotlib.pyplot as plt

dataset=pd.read_csv(r'xxx.csv')

array=np.array(dataset)

y= np.array(array[:,0], dtype=np.int64)

x = np.array(array[:,1: array.shape[1]], dtype=np.double)

from sklearn.cross_validation import train_test_split

from imblearn.over_sampling import SMOTE

from imblearn.combine import SMOTEENN

def data_prepration(x):

x_features= x.ix[:,x.columns != "y_label_name"]

x_labels=x.ix[:,x.columns=="y_label_name"]

x_features_train,x_features_test,x_labels_train,x_labels_test = train_test_split(x_features,x_labels,test_size=0.3,random_state = 0)

print("length of training data")

print(len(x_features_train))

print("length of test data")

print(len(x_features_test))

return(x_features_train,x_features_test,x_labels_train,x_labels_test)

data_train_x,data_test_x,data_train_y,data_test_y=data_prepration(dataset)

os= SMOTE(random_state=0)

os_data_x,os_data_y=os.fit_sample(data_train_x.values,data_train_y.values.ravel())

columns = data_train_x.columns

os_data_x = pd.DataFrame(data=os_data_x,columns=columns )

print (len(os_data_x))

os_data_y= pd.DataFrame(data=os_data_y,columns=["credit_status"])

# 现在检查下抽样后的数据

print("length of oversampled data is ",len(os_data_x))

print("Number of normal transcation",len(os_data_y[os_data_y["credit_status"]==1]))

print("Number of fraud transcation",len(os_data_y[os_data_y["credit_status"]==0]))

print("Proportion of Normal data in oversampled data is ",len(os_data_y[os_data_y["credit_status"]==0])/len(os_data_x))

print("Proportion of fraud data in oversampled data is ",len(os_data_y[os_data_y["credit_status"]==1])/len(os_data_x))

newtraindata=pd.concat([os_data_x,os_data_y],axis=1)

newtestdata=pd.concat([data_test_x,data_test_y],axis=1)

#train_csv_file= open(r'D:\Data\MijiaOldCustomer\train.csv','wb')

#writer = csv.writer(train_csv_file)

#writer.writerows(newtraindata)

newtraindata.to_csv(r'train.csv',sep=',')

newtestdata.to_csv(r'test.csv

',sep=',')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值