2升4分类模型

最新推荐文章于 2023-08-04 12:55:53 发布

破晓时刻

最新推荐文章于 2023-08-04 12:55:53 发布

阅读量270

点赞数

分类专栏：分析与建模

本文链接：https://blog.csdn.net/qq_33217634/article/details/88086351

版权

分析与建模专栏收录该内容

2 篇文章 0 订阅

订阅专栏

2升4数据建模 离散化特征


# In[1]:

"""
导入数据
"""
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
os.chdir('K:\\项目\\2升4\\')

data = pd.read_excel('5月拍照持2G终端用户.xlsx')



# In[4]:

df=data.copy()
df.columns=['numbers','Network Time','Disbursements','start','cost name','names','ID','age','adress','nodeb','3/4G net','2 to 4']


# In[5]:

df=df[df['age']!='未知']
df=df[df['age']!=0]


# In[6]:

X = df[['cost name','3/4G net','2 to 4']].values


# In[7]:

ZC_label=LabelEncoder()
X[:,0] = ZC_label.fit_transform(X[:,0])
X[:,1] = ZC_label.fit_transform(X[:,1])
X[:,2] = ZC_label.fit_transform(X[:,2])

one_hot = OneHotEncoder(categorical_features='all')
#print(one_hot.fit_transform(X).toarray())
data=one_hot.fit_transform(X).toarray()
data= pd.DataFrame(data,columns=['cs id']*len(data[0]))


# In[8]:

df1=df[['numbers','Network Time','Disbursements','age','2 to 4']]


# In[9]:

df2=pd.concat([data,df1],axis=1)


# In[10]:

features= ['numbers','Network Time','Disbursements','age','cs id']


# In[11]:

target = df2['2 to 4'].value
x_train,x_test,y_train,y_test=train_test_split(df2[features],target,test_size=0.3)


# In[12]:

x_test.to_csv("x_test1.csv")


# In[13]:

x_train=x_train[['Network Time','Disbursements','age','cs id']]


# In[14]:

x_test=x_test[['Network Time','Disbursements','age','cs id']]
x_test.to_csv("x_test2.csv")


# In[15]:
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.fit_transform(x_test)


# In[16]:

#建模
mlp=MLPClassifier(hidden_layer_sizes=(100,50),max_iter=500)


# In[17]:

#训练
mlp.fit(x_train,y_train)


# In[18]:

#预测
predictions=mlp.predict(x_test)


# In[19]:

print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))


# In[20]:

confusion_matrix=confusion_matrix(y_test,predictions)


# In[28]:


df_cm = pd.DataFrame(confusion_matrix)
sns.heatmap(df_cm,annot = True,fmt = 'd',cmap = 'Blues',square = True )
plt.show()


# In[59]:

my_prediction = pd.DataFrame(predictions)


# In[61]:

my_prediction.to_csv("my_prediction.csv")


# In[62]:

#保存模型
import pickle 
with open('mlp.pickle','wb') as f:
    pickle.dump(mlp,f)


# In[262]:

#读取模型
with open('mlp.pickle','rb') as f:
     mlp=pickle.load(f)
mlp.predict(x_test)