OvR
在二分类逻辑回归的基础上,采用One vs Rest的方法进行多分类。最终分类由概率最高的OvR决定。在二分类实验中,会发现对versicolor的分类准确性和覆盖率很差,对virginica的分类也不是很理想,只有对setosa的分类很准。采用OvR综合决策后,分类准确率极大提升,正确率达到95%左右。
# Author: Daniel Geng
# In this code, let's change the method to Gradient descent. We start from single feature.
import csv
import numpy as np
import matplotlib.pyplot as pl
import random
import scipy.special
# Import Iris raw data
def ImportData(filePath):
X = [] # 2D arrays [[1, x11, x12, ...], [], ...[]], each item is a [] which contains columns of each row
Y = [] # 1D vector Stands for class
f = open(filePath)
r = csv.reader(f, delimiter=',')
r.__next__() # Skip header row
for row in r:
rowX = [1.0]
for i in row[1:5]:
rowX.append(float(i))
X.append(rowX)
Y.append(row[5])
return (X, Y)
# Random the order of the raw data
def RandomShuffle(X, Y, flag):
# random the order of raw data
if flag == True:
for i, j in zip(X, Y):
i.append(j)
random.shuffle(X)
Y = [] # clear Y, get Y item from X
for i in X:
Y.append(i[-1])
i.pop(-1)
else:
pass
return (X, Y)
# Format to Numpy
def FormatNumpy(X):
Xformat = np.array(X[:]) # change to numpy format
return Xformat
# Refill classification, change string to 1 or 0
def RefillClass(Y, char_positive):
tempY = []
for i, e in enumerate(Y):
if Y[i] == char_positive: