感知机是一种较为简单的二分类模型,感知机旨在学习能够将输入数据划分为+1/-1的线性分离超平面,所以说整体而言感知机是一种线性模型。
查看数据集
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# load data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
print(df.label.value_counts())
plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='one')
plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='two')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
发现四个维度的数据,有两个维度就可以线性可分.
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# load data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
# df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
# print(df.label.value_counts())
#
# plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='one')
# plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='two')
# plt.xlabel('sepal length')
# plt.ylabel('sepal width')
# plt.legend()
# plt.show()
data = np.array(df.iloc[:100, [0,1,-1]])
print(data)
X, y = data[:,:-1], data[:,-1]
print(type(X))
y = np.array([1 if i == 1 else -1 for i in y])
# 数据线性可分,二分类数据
# 此处为一元一次线性方程
class Model:
def __init__(self):
self.w = np.ones(len(data[0]) - 1, dtype=np.float32)
print('self.w',self.w)
self.b = 0
self.l_rate = 0.1
# self.data = data
def sign(self, x, w, b):
y = np.dot(x, w) + b
return y
# 随机梯度下降法
def fit(self, X_train, y_train):
is_wrong = False
while not is_wrong:
wrong_count = 0
for d in range(len(X_train)):
X = X_train[d]
y = y_train[d]
if y * self.sign(X, self.w, self.b) <= 0:
self.w = self.w + self.l_rate * np.dot(y, X)
self.b = self.b + self.l_rate * y
wrong_count += 1
if wrong_count == 0:
is_wrong = True
return 'Perceptron Model!'
def score(self):
pass
perceptron = Model()
perceptron.fit(X, y)
x_points = np.linspace(4, 7,10)
print('x_points=',x_points)
y_ = -(perceptron.w[0]*x_points + perceptron.b)/perceptron.w[1]
plt.plot(x_points, y_)
plt.plot(data[:50, 0], data[:50, 1], 'bo', color='blue', label='one')
plt.plot(data[50:100, 0], data[50:100, 1], 'bo', color='orange', label='two')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()