import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
names = ["x1", "x2", "x3", "x4", "y"]
df = pd.read_csv("~/Documents/PLA.csv", names=names)
df.insert(0, "x0", 1)
timels=[]
for i in range(100):
w = pd.Series([0, 0, 0, 0, 0], index=["x0", "x1", "x2", "x3", "x4"]); times = 1; # wl = []
# wl.append(w)
mis_p = df[df["y"]==+1][np.dot(df[df["y"]==+1].iloc[:,0:df.shape[1]-1], w) <= 0]
mis_n = df[df["y"]==-1][np.dot(df[df["y"]==-1].iloc[:,0:df.shape[1]-1], w) > 0]
mis = pd.concat([mis_p, mis_n], axis=0)
length = mis.shape[0]
while length > 0:
mis_point = mis.iloc[np.random.randint(length),:]
w = w + mis_point["y"] * mis_point.iloc[0: mis_point.shape[0]-1]
mis_p = df[df["y"]==+1][np.dot(df[df["y"]==+1].iloc[:,0:df.shape[1]-1], w) <= 0]
mis_n = df[df["y"]==-1][np.dot(df[df["y"]==-1].iloc[:,0:df.shape[1]-1], w) > 0]
mis = pd.concat([mis_p, mis_n], axis=0)
length = mis.shape[0]
times += 1; # wl.append(w)
if times > 50000 :
print("over 50000 times loops")
break
timels.append(times)
ts = pd.Series(timels)
ts.plot()
print(ts.mean())
平均 42.16 次
另外发现一个有意思的点:
w = pd.Series([0, 0, 0, 0, 0])
np.dot(df[df["y"]==+1].iloc[:,0:df.shape[1]-1], w) # 可行
w.dot(df[df["y"]==+1].iloc[:,0:df.shape[1]-1]) # 报错