分别实现了svm,逻辑回归,ridge回归,gbdt和神经网络,代码如下:
import urllib
import matplotlib
import os
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import proj3d
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.ensemble import GradientBoostingClassifier
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from sklearn.tree import DecisionTreeClassifier
import keras
from keras.models import Sequential
from keras.layers.core import Dense,Dropout,Activation
def download(outpath):
filename=outpath+"/fourclass_scale"
if os.path.exists(filename) == False:
urllib.urlretrieve("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/fourclass_scale",filename)
def data_building():
dtrain = load_svmlight_file('fourclass_scale')
train_d=dtrain[0].toarray()
train_l=dtrain[1]
x1 = train_d[:,0]
x2 = train_d[:,1]
y = train_l
px1 = []
px2 = []
pl = []
nx1 = []
nx2 = []
nl = []
idx = 0
for i in y:
if i == 1:
px1.append(x1[idx]-0.5)
px2.append(x2[idx]+0.5)
pl.append(i)
else:
nx1.append(x1[idx]+0.8)
nx2.append(x2[idx]-0.8)
nl.append(i)
idx = idx + 1
x_axis, y_axis = np.meshgrid(np.linspace(x1.min(), x1.max(), 100), np.linspace(x2.min(), x2.max(), 100))
return x_axis, y_axis, px1, px2, nx1, nx2, train_d, train_l
def paint(name, x_axis, y_axis, px1, px2, nx1, nx2, z):
fig = plt.figure()
ax = Axes3D(fig)
ax=plt.subplot(projection='3d')
ax.scatter(px1,px2,c='r')
ax.scatter(nx1,nx2,c='g')
ax.plot_surface(x_axis, y_axis,z.reshape(x_axis.shape), rstride=8, cstride=8, alpha=0.3)
ax.contourf(x_axis, y_axis, z.reshape(x_axis.shape), zdir='z', offset=-100, cmap=cm.coolwarm)
ax.contourf(x_axis, y_axis, z.reshape(x_axis.shape), levels=[0,max(z)], cmap=cm.hot)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
fig.savefig(name+".png", format='png')
def svc(x_axis, y_axis, x,y):
clf = svm.SVC()
clf.fit(x, y)
y = clf.predict(np.c_[x_axis.ravel(), y_axis.ravel()])
return y
def lr(x_axis, y_axis, x,y):
clf = LogisticRegression()
clf.fit(x, y)
y = clf.predict(np.c_[x_axis.ravel(), y_axis.ravel()])
return y
def ridge(x_axis, y_axis, x,y):
clf = Ridge()
clf.fit(x, y)
y = clf.predict(np.c_[x_axis.ravel(), y_axis.ravel()])
return y
def dt(x_axis, y_axis, x,y):
clf = GradientBoostingClassifier()
clf.fit(x, y)
y = clf.predict(np.c_[x_axis.ravel(), y_axis.ravel()])
return y
def nn(x_axis, y_axis, x,y):
model = Sequential()
model.add(Dense(20, input_dim=2))
model.add(Activation('relu'))
model.add(Dense(20))
model.add(Activation('relu'))
model.add(Dense(1, activation='tanh'))
model.compile(loss='mse',
optimizer='adam',
metrics=['accuracy'])
model.fit(x,y,batch_size=20, nb_epoch=50, validation_split=0.2)
y = model.predict(np.c_[x_axis.ravel(), y_axis.ravel()],batch_size=20)
return y
if __name__ == '__main__':
download("/root")
x_axis, y_axis, px1, px2, nx1, nx2, train_d, train_l = data_building()
z = svc(x_axis, y_axis, train_d, train_l)
paint("svc", x_axis, y_axis, px1, px2, nx1, nx2, z)
z = lr(x_axis, y_axis, train_d, train_l)
paint("lr", x_axis, y_axis, px1, px2, nx1, nx2, z)
z = ridge(x_axis, y_axis, train_d, train_l)
paint("ridge", x_axis, y_axis, px1, px2, nx1, nx2, z)
z = dt(x_axis, y_axis, train_d, train_l)
paint("gbdt", x_axis, y_axis, px1, px2, nx1, nx2, z)
z = nn(x_axis, y_axis, train_d, train_l)
paint("nn", x_axis, y_axis, px1, px2, nx1, nx2, z)
各种分类器的效果图如下: