1、读取指定文件内容
def file2matric(filename):
"""将txt文件的样本值存储到数组中 """
with open(filename) as file_object:
lines = file_object.readlines()#读取文件内容,返回存储每行的字符串列表
lenOlines = len(lines) #文件行数
line1 = lines[1].rstrip()
listLine1 = line1.split('\t')
columLine1 = len(listLine1)
dataSetMat = zeros((lenOlines,columLine1-1))
classLablesReturn = []
index = 0
for line in lines:
line = line.rstrip()#去除每行末尾空格
listFromLine = line.split('\t')
dataSetMat[index,:] = listFromLine[0:-1]
classLablesReturn.append(int(listFromLine[-1]))#标签应为整数类型
index += 1
return dataSetMat,classLablesReturn
2、读取指定文件夹下所有文件内容
from os import listdir
def imgtxts2Matric(filePath):
imgFileList = listdir(filePath)#文件名列表,字符串类型
filename0 = imgFileList[0]
file0vector = imgtxt2Vector(filePath+"/"+filename0)
m = len(imgFileList)#m个样本图片
n = file0vector.shape[1]#矩阵列数,
returnMat = zeros((m,n))
returnLables = []
for i in range(m):
filename = imgFileList[i]
imgVector = imgtxt2Vector(filePath+"/"+filename)
returnMat[i,:] = imgVector[0,:]
returnLables.append(int(filename[0]))
return returnMat,returnLables