from scipy.io import arff
import numpy as np
import pandas as pd
radius = 4 # search radius
def distance(point1, point2):
return np.sqrt(np.sum([(point1[i] - point2[i]) ** 2 for i in range(4)]))
iris = arff.loadarff('../dataset/iris.arff')
df = pd.DataFrame(iris[0])
length = df.shape[0]
classes = list(set(df['class']))
classes_length = len(classes)
classes_dict = dict()
for i in range(classes_length):
classes_dict[classes[i]] = i
for i in range(length):
df.iloc[i, 4] = classes_dict[df.iloc[i, 4]]
if df.iloc[i, 4] != 0: # convert to an binary classification
df.iloc[i, 4] = 1
# df = df.sample(frac=1) # shuffle data randomly
train_data = df.iloc[0:100]
test_data = df.iloc[100:]
train_length = train_data.shape[0]
test_length = test_data.shape[0]
accuracy = 0
def my_iter(data, batch):
X = []
y = []
length = data.shape[0]
start = 0
while length - batch > 0:
X = data.iloc[start:start + batch, 0:4]
y = data.iloc[start:start + batch, 4]
yield X, y
start += batch
length -= batch
X = data.iloc[start:, 0:4]
y = data.iloc[start:, 4]
yield X, y
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def initialize_with_zeros(dim):
w = np.zeros([dim, 1]) # input=4, neural=1
b = 0
return w, b
# w 4,1
# X batch 4
def forward(w, b, X, Y):
batch = X.shape[0]
Z = np.dot(X, w) + b
Z.resize([batch])
A = sigmoid(Z)
cost = -1 / batch * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
return cost, A
# w 4,1
# X batch 4
# A batch 1
def backward(w, b, X, Y, A, cost):
batch = X.shape[0]
Y = np.array(Y)
Y.resize(batch, 1)
A.resize(batch, 1)
dw = np.array(1 / batch * np.sum(np.multiply(X, (A - Y)), axis=0))
db = np.array(1 / batch * np.sum(A - Y))
dw.resize([4, 1])
cost = np.squeeze(cost)
grads = {'dw': dw, 'db': db}
return grads, cost
def optimize(w, b, train_data, learning_rate, batch=20):
cost = []
for i, (X, Y) in enumerate(my_iter(train_data, batch)):
cost, A = forward(w, b, X, Y)
grads, cost = backward(w, b, X, Y, A, cost)
dw = grads['dw']
db = grads['db']
# renew the parameters
# w 4,1
# dw 4,1
w = w - learning_rate * dw
b = b - learning_rate * db
print('iter %d loss %f' % (i, cost))
params = {'w': w, 'b': b}
return params
def predict(w, b, X):
batch = X.shape[0]
y_predict = np.zeros((1, batch))
y_predict = sigmoid(np.dot(X, w) + b)
for i in range(batch):
if y_predict[i, 0] > 0.5:
y_predict[i, 0] = 1
else:
y_predict[i, 0] = 0
return y_predict
w, b = initialize_with_zeros(4)
params = optimize(w, b, train_data, learning_rate=0.1, batch=20)
y_predict_train = predict(w, b, train_data.iloc[:, 0:4])
y_predict_test = predict(w, b, test_data.iloc[:, 0:4])
y_predict_train.resize([train_length])
y_predict_test.resize([test_length])
print('acc train %.2f' % (np.mean(y_predict_train == train_data.iloc[:, 4])))
print('acc test %.2f' % (np.mean(y_predict_test == test_data.iloc[:, 4])))