如下:
import numpy as np
import pandas as pd
import jieba.analyse as jieba
data = pd.read_csv(r'/opt/dianxing/train_all.csv',low_memory=False)
def dataProcess(data):
data.is_mix_service=data.is_mix_service.fillna(data.is_mix_service.mode()[0])
return data
Train_data = dataProcess(train_data)
feature = ['service_type’,’is_mix_service’,’online_time’,’1_total_fee’,’2_total_fee’,’3_total_fee’,’4_total_fee’,’month_traffic’,’many_over_bill’,’contract_type’,’contract_time’,’is_promise_low_consume’,’net_service’,’pay_times’,’pay_num’,’last_month_traffic’,’local_trafffic_month’,’local_caller_time’,’service1_caller_time’,’gender’,’age’,’complaint_level’,’former_complaint_num’,’former_complaint_fee’,’current_service’]
feature = ['service_type','is_mix_service','month_traffic','pay_times','pay_num','former_complaint_num','current_service','user_id']
x = data[feature]
y = data.service_type
from sklearn.tree import DecisionTreeClassifier as DT
clf = DT()
clf = fit(x,y)
from sklearn.svm import SVC
clf = SVC()
clf.fit(x, y)