用面向对象的方法进行数据的处理
昨天学习了面向对象的基础,尝试去用该思想去预处理机器学习的数据。基本思路是往函数中输入数据文件名称和分配的比例ratio,输出测试集,测试标签,训练集,训练标签。
from numpy import *
from sklearn import svm #库不能写在函数中。
class FunSVm(): #没参数
def __init__(self,name,ratio): #定义需要给funsvm传入2个参数,名字和比例
self.name=name
self.ratio=ratio #定义类变量
def file2mat(self):
with open(self.name) as fr:
arrayLine = fr.readlines() #读取文件
linenum = len(arrayLine)
dataMat = zeros((linenum,8))
label = []
index = 0
for line in arrayLine:
line = line.strip()
line = line.split(",")
dataMat[index,:] = line[0:8]
label.append(int(line[-1]))
index += 1
return dataMat,label,linenum
def classify(self):
ratio = self.ratio
x,y,length = self.file2mat()
trainNum = int(ratio * length)
x_train = x[:trainNum]
x_test = x[trainNum:]
y_train = y[:trainNum]
y_test = y[trainNum:]
return x_train,x_test,y_train,y_test
data = FunSVm("pima-indians-diabetes.txt",0.8)
array([[ 6. , 148. , 72. , ..., 33.6 , 0.627, 50. ], #输出numpy矩阵
[ 1. , 85. , 66. , ..., 26.6 , 0.351, 31. ],
[ 8. , 183. , 64. , ..., 23.3 , 0.672, 32. ],
...,
[ 3. , 174. , 58. , ..., 32.9 , 0.593, 36. ],
[ 7. , 168. , 88. , ..., 38.2 , 0.787, 40. ],
[ 6. , 105. , 80. , ..., 32.5 , 0.878, 26. ]])