1、单变量线性回归
代价:
J = 1.0/(2*m)*np.sum(np.square(h-y))
梯度:
deltatheta = (1.0/m)*np.dot(X.T,h-y)
精度:
u = np.sum(np.square(h-y))
v = np.sum(np.square(y-np.mean(y)))
画图:
min_x,max_x = np.min(X[:,1]),np.max(X[:,1])
min_x_y,max_x_y = theta[0]+theta[1]*min_x,theta[0]+theta[1]*max_x
plt.plot([min_x,max_x],[min_x_y,max_x_y])
2、多变量线性回归
缩放:
X -= np.mean(X,axis=0)
X /= np.std(X,axis=0,ddof=1)
画图:
plt.scatter(test_y,test_y,label='真实值')
plt.scatter(test_y,test_h,label='预测值')
3、逻辑回归(正则化)
逻辑函数图:
a = np.arange(-10,11)
b = g(a)
模型:
z = np.dot(x,theta)
h = g(z)
代价:
J = -(1/m)*np.sum(y*np.log(h)+(1-y)*np.log(1-h))+R
梯度:
h = model(x,theta)
theta_r = theta.copy()
theta_r[0] = 0
R = lamda/(2*m)*np.sum(np.square(theta_r))
J_history.append(costFunc(h,y,R))
deltaTheta = 1.0/m*(np.dot(x.T,h-y)+lamda*theta_r)
画图:
plt.scatter(x[ y[:,0]==0 ,1],x[ y[:,0]==0 ,2],label='负样本')
plt.scatter(x[ y[:,0]==1 ,1],x[ y[:,0]==1 ,2],label='正样本')
min_x1,max_x1 = np.min(x[:,1]),np.max(x[:,1])
min_x1_x2,max_x1_x2 = -(theta[0]+theta[1]*min_x1)/theta[2],-(theta[0]+theta[1]*max_x1)/theta[2]
plt.plot([min_x1,max_x1],[min_x1_x2,max_x1_x2],label='分界线')
调库:(不用加初始化)
from sklearn.linear_model import LogisticRegression
模型:
LR = LogisticRegression(max_iter=1500)
4.神经网络
多分类:
特征缩放:
min_X,max_X = np.min(X),np.max(X)
X = (X-min_X)/(max_X-min_X)
独热编码:
from sklearn.preprocessing import OneHotEncoder
oh = OneHotEncoder()
train_y = oh.fit_transform(train_y).toarray()
test_y = oh.fit_transform(test_y).toarray()
代价:
J = -1.0/m*np.sum(y*np.log(h)+(1-y)*np.log(1-h))
准确率:
if np.argmax(h[i]) == np.argmax(y[i]):
混淆分类:
train_y = np.argmax(train_y,axis=1)
train_y_pre = np.argmax(train_h,axis=1)
调库:(不用加初始化)
network_mlp
from sklearn.neural_network import MLPClassifier
参数:(正则化0.1)
mlp = MLPClassifier(hidden_layer_sizes=(400,100),alpha=0.1,max_iter=300)
5.支持向量机
调库:
from sklearn.svm import SVC
模型:
model = SVC(C=1,kernel='linear')
二分类:
minx1,maxx1 = min(X[:,0]),max(X[:,0])
minx2,maxx2 = min(X[:,1]),max(X[:,1])
#把画布分成200*200的网格
x1,x2 = np.mgrid[minx1:maxx1:200j,minx2:maxx2:200j]
#计算网格中心到超平面的距离
x1x2 = np.c_[x1.ravel(),x2.ravel()] #(40000, 2)
z = model.decision_function(x1x2) #(200,)
z = z.reshape(x1.shape)
#画训练集的样本图
plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.Paired,edgecolors='k',zorder=5)
#画测试集样本图
plt.scatter(test_X[:,0],test_X[:,1],s=100,edgecolors='k',facecolor='none',zorder=10)
#画等值面
plt.contourf(x1,x2,z>=0,cmap=plt.cm.Paired)
#画等值线
plt.contour(x1,x2,z,levels=[-1,0,1],linestyles=['--','-','--'],colors=['r','g','b'])
三分类:
# (3)调用SVM库函数,使用特征X2和X4两个特征数据实现分类。(共22分)
train_X_new,test_X_new = train_X[:,[1,3]],test_X[:,[1,3]]
# -创建模型及设置参数
svc = SVC(C=1,kernel='linear')
svc.fit(train_X_new,train_y)
# -以X1为横坐标、X4为纵坐标,画出测试集的分类图(等高线) (8分)
plt.scatter(test_X[test_y==0,0],test_X[test_y==0,3],label='0')
plt.scatter(test_X[test_y==1,0],test_X[test_y==1,3],label='1')
plt.scatter(test_X[test_y==2,0],test_X[test_y==2,3],label='2')
X1_min,X1_max = np.min(test_X[:,0]),np.max(test_X[:,0])
X4_min,X4_max = np.min(test_X[:,3]),np.max(test_X[:,3])
X1,X4 = np.mgrid[X1_min:X1_max:200j,X4_min:X4_max:200j]
X1X4 = np.c_[X1.ravel(),X4.ravel()]
z = svc.predict(X1X4)
z = z.reshape(X1.shape)
plt.contourf(X1,X4,z,cmap=plt.cm.hot,zorder=-1)
6.聚类(KMeans)
调库
from sklearn.cluster import KMeans
肘部法则
k = np.arange(1,11)
inertia = []
for i in k:
model = KMeans(n_clusters=i)
model.fit(X)
inertia.append(model.inertia_) #所有样本与其最近中心距离的平方和
plt.annotate(str(i), (i,model.inertia_)) #第1个参数:加的注释的内容,第2个参数:加注释的位置的坐标
plt.plot(k, inertia)
plt.show()
画图
k = 4
model1 = KMeans(n_clusters=k)
model1.fit(X)
Ci = model1.predict(X) #预测数据集X中每个样本所属的聚类中心索引
muk = model1.cluster_centers_ #聚类中心坐标: shape(k, n_features)
#样本散点分布图
plt.scatter(X[:,0],X[:,1],c=Ci,cmap=plt.cm.Paired)
#画聚类中心
plt.scatter(muk[:,0],muk[:,1],marker='^')
for i in range(k):
plt.annotate('中心'+str(i+1),(muk[i,0],muk[i,1]),size=20)
plt.show()
7、降维(PCA)
调库
from sklearn.decomposition import PCA
模型:
pca = PCA(n_components=2) #代表降维后维度为2
X_new = pca.fit_transform(X)
输出
print('特征向量',pca.components_)
print('特征值的方差 ',pca.explained_variance_)
print('特征值方差所占比例',pca.explained_variance_ratio_)
#画图
plt.scatter(X_new[y==0,0],X_new[y==0,1],c='r',label='1'])
plt.scatter(X_new[y==1,0],X_new[y==1,1],c='b',label='2')
8、决策树
回归树:
调库:
from sklearn.tree import DecisionTreeRegressor
模型:
r_tree5 = DecisionTreeRegressor(max_depth=5)
r_tree5.fit(X,y)
画图:
plt.scatter(X,y,label='真实值')
plt.plot(X,h5,c='r',label='预测值')
分类树:
调库:
from sklearn.tree import DecisionTreeClassifier
取两个特征:
train_X_new,test_X_new = train_X[:,[1,3]],test_X[:,[1,3]]
dtc2 = DecisionTreeClassifier(max_depth=2)
画图:
# -以X2为横坐标、X4为纵坐标,画出测试集的分类图 (8分)
plt.scatter(test_X_new[test_y==0,0],test_X_new[test_y==0,1],label='0')
plt.scatter(test_X_new[test_y==1,0],test_X_new[test_y==1,1],label='1')
plt.scatter(test_X_new[test_y==2,0],test_X_new[test_y==2,1],label='2')