import numpy as np
import random
def read_data(path):
f1=open(path)
x_matrix=[]
y_matrix=[]
for i in f1:
x=[1]
for j in i.split('\t')[0].split():
x.append(float(j))
y_matrix.append(int(i.strip().split('\t')[1]))
x_matrix.append(x)
x_matrix=np.array(x_matrix)
#print(x_matrix)
return x_matrix,y_matrix
def sign(x,w):
if np.dot(x, w)[0]<=0:
return -1
else:
return 1
def naive_PLA(x_matrix,y_matrix):
sum=len(x_matrix)
length=len(x_matrix[0])
w=np.zeros((length,1))
#print(w)
count=0
s=0
flag=0
while True:
for i in range(sum):
s+=1
#print(np.dot(x_matrix[i], w)[0]*y_matrix[i])
if sign(x_matrix[i], w)!=y_matrix[i]:
#print(w,x_matrix[i],y_matrix[i])
w+=np.matrix(x_matrix[i]).T*y_matrix[i]
count+=1
s=0
if s==sum:
flag=1
break
if flag==1:
break
return count
def random_PLA(x_matrix,y_matrix):
sum=len(x_matrix)
length=len(x_matrix[0])
w=np.zeros((length,1))
order=range(sum)
#print(order)
random_seed=random.sample(order,sum)
#print(random_seed)
count=0
s=0
flag=0
while True:
for i in random_seed:
s+=1
#print(np.dot(x_matrix[i], w)[0]*y_matrix[i])
if sign(x_matrix[i], w)!=y_matrix[i]:
#print(w,x_matrix[i],y_matrix[i])
w+=np.matrix(x_matrix[i]).T*y_matrix[i]
count+=1
s=0
if s==sum:
flag=1
break
if flag==1:
break
return count
def weighted_random_PLA(x_matrix,y_matrix,eta):
sum=len(x_matrix)
length=len(x_matrix[0])
w=np.zeros((length,1))
order=range(sum)
#print(order)
random_seed=random.sample(order,sum)
#print(random_seed)
count=0
s=0
flag=0
while True:
for i in random_seed:
s+=1
#print(np.dot(x_matrix[i], w)[0]*y_matrix[i])
if sign(x_matrix[i], w)!=y_matrix[i]:
#print(w,x_matrix[i],y_matrix[i])
w+=np.matrix(x_matrix[i]).T*y_matrix[i]*eta
count+=1
s=0
if s==sum:
flag=1
break
if flag==1:
break
return count
if __name__=='__main__':
x_matrix, y_matrix=read_data('ntumlone_hw1_hw1_15_train.dat')
#count=naive_PLA(x_matrix,y_matrix) #Question 15
#print(count)
# sum=0 #Question 16
# for i in range(2000):
# sum+=random_PLA(x_matrix,y_matrix)
# print()
# print(sum/2000)
sum=0 #Question 17
for i in range(2000):
sum+=weighted_random_PLA(x_matrix,y_matrix,0.5)
print(sum/2000)
import numpy as np
import random
import copy
def read_data(path):
f1=open(path)
x_matrix=[]
y_matrix=[]
for i in f1:
x=[1]
for j in i.split('\t')[0].split():
x.append(float(j))
y_matrix.append(int(i.strip().split('\t')[1]))
x_matrix.append(x)
x_matrix=np.array(x_matrix)
#print(x_matrix)
return x_matrix,y_matrix
def sign(x,w):
if np.dot(x, w)[0]<=0:
return -1
else:
return 1
def test(w,x_matrix,y_matrix,sum):
count=0
for i in range(sum):
if sign(x_matrix[i],w) !=y_matrix[i]:
count+=1
return count
def random_pocket(x_matrix,y_matrix,updates):
sum=len(x_matrix)
length=len(x_matrix[0])
order=range(sum)
random_seed=random.sample(order,sum)
bestW=np.zeros((length,1))
w= np.zeros((length, 1))
bestCount=501
update=0
while update<updates:
for i in random_seed:
if sign(x_matrix[i], w)!=y_matrix[i]:
update += 1
w=w+np.matrix(x_matrix[i]).T*y_matrix[i]
count=test(w,x_matrix,y_matrix,sum)
if count<bestCount:
bestCount=count
bestW=w
if update==updates:
break
return bestW
def random_PLA(x_matrix,y_matrix):
sum=len(x_matrix)
length=len(x_matrix[0])
w=np.zeros((length,1))
order=range(sum)
#print(order)
random_seed=random.sample(order,sum)
#print(random_seed)
count=0
while True:
for i in random_seed:
if sign(x_matrix[i], w)!=y_matrix[i]:
w+=np.matrix(x_matrix[i]).T*y_matrix[i]
count+=1
if count==50:
break
if count == 50:
break
return w
if __name__=='__main__':
x_matrix, y_matrix=read_data('train.txt')
x_test,y_test=read_data('test.txt')
sum=len(x_test)
error=0
# for i in range(2000): #Question 18
# print(i)
# w=random_pocket(x_matrix, y_matrix, 50)
# count=test(w,x_test,y_test,sum)
# error+=count
# print(float(error)/sum/2000)
# for i in range(2000): #Question 19
# print(i)
# w=random_PLA(x_matrix, y_matrix)
# count=test(w,x_test,y_test,sum)
# error+=count
# print(float(error)/sum/2000)
for i in range(2000): #Question 20
print(i)
w=random_pocket(x_matrix, y_matrix, 100)
count=test(w,x_test,y_test,sum)
error+=count
print(float(error)/sum/2000)