李航《统计学方法chapter two 实践》

最新推荐文章于 2023-03-09 22:01:42 发布

upupqlj

最新推荐文章于 2023-03-09 22:01:42 发布

阅读量224

点赞数

本文链接：https://blog.csdn.net/To_conquer_or_to_die/article/details/88751177

版权

自己实现梯度下降

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.datasets import  load_iris
import pandas as pd
#value_counts()方法统计数组或序列所有元素出现次数，对某一列统计可以直接用df.column_name.value_counts()
#load data
iris = load_iris()#数据不包含特征名
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['label']=iris.target
#
df.columns=['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
df.label.value_counts()#label 各值的个数
plt.scatter(df[:50]['sepal length'],df[:50]['sepal width'],label='0')
plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],label='1')
plt.legend()
plt.show()
data=np.array(df.iloc[:100,[0,1,-1]])
X,y= data[:100,:-1],data[:,-1]
#将label改成1和-1
y=np.array([1 if i==1 else -1 for i in y])
#perceptron
#数据线性可分 ，二分类问题
#此处为一元一次性方程
#在编写代码时只写框架思路,具体实现还未编写就可以用pass进行占位,使程序不报错,不会进行任何操作
class Model:
    def __init__(self):
        self.w = np.ones(len(data[0])-1,dtype=np.float32)#属性只有两个 所以w的维度为2 w的属性是array
        self.b = 0
        self.l_rate = 0.1#学习率
    def sign(self,x,w,b):
        y=np.dot(x,w)+b#dot即做点积
        return y

    #随机梯度下降法
    def fit(self,X_train,y_train):
        is_wrong = True
        while is_wrong:
            wrong_count = 0
            for d in range(len(X_train)):
                X = X_train[d]
                y = y_train[d]
                if y*self.sign(X,self.w,self.b)<=0:#注意= 0也是误分类
                    self.w = self.w + self.l_rate*np.dot(y,X)
                    self.b = self.b + self.l_rate*y
                    wrong_count +=1
            if wrong_count == 0:
                is_wrong = False
        return 'Perceptron Model'
    def score(self):
        pass

perceptron = Model()
perceptron.fit(X,y)

x_points = np.linspace(4,7,10)
print(perceptron.w)
y_  = -(perceptron.w[0]* x_points + perceptron.b)/perceptron.w[1]#因为w*x+b=0
plt.plot(x_points,y_)

plt.plot(data[:50,0],data[:50,1],"bo",color='blue',label='0')
plt.plot(data[50:100,0],data[50:100,1],"bo",color="orange",label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()

sklearn 调用感知机

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.datasets import  load_iris
import pandas as pd
from sklearn.linear_model import Perceptron
#value_counts()方法统计数组或序列所有元素出现次数，对某一列统计可以直接用df.column_name.value_counts()
#load data
iris = load_iris()#数据不包含特征名
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['label']=iris.target
#
df.columns=['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
df.label.value_counts()#label 各值的个数
plt.scatter(df[:50]['sepal length'],df[:50]['sepal width'],label='0')
plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],label='1')
plt.legend()
plt.show()
data=np.array(df.iloc[:100,[0,1,-1]])
X,y= data[:100,:-1],data[:,-1]
#将label改成1和-1
y=np.array([1 if i==1 else -1 for i in y])

#fit_intercept 默认为true(代表需要中心化)
clf =  Perceptron(fit_intercept=False,n_iter=1000,shuffle=False)
clf.fit(X,y)
print(clf.coef_)#clf.coef[0] 是w
print(clf.intercept_)# 截距 即b
x_points=np.arange(4,8)
y_ = -(clf.coef_[0][0]*x_points+clf.intercept_)/clf.coef_[0][1]
plt.plot(x_points,y_)

plt.plot(data[:50,0],data[:50,1],"bo",color='blue',label='0')
plt.plot(data[50:100,0],data[50:100,1],"bo",color="orange",label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()