文件的简单读取
import numpy as np
class DataUtil:
def get_dataset(name,path,train_num=None,tar_index=None,shuffle=True):
x =[]
with open(path,"r",encoding="utf-8") as file:
if "balloon" in name:
for sample in file:
x.append(sample.strip().split(","))
if shuffle:
np.random.shuffle(x)
tar_index = -1 if tar_index is None else tar_index
y = np.array([xx.pop(tar_index) for xx in x])
x = np.array(x)
if train_num is None:
return x,y
return (x[:train_num],y[:train_num]),(x[train_num:],y[train_num:])
测试:
原始数据:
黄色,小,成人,用手打,不爆炸
黄色,小,成人,用脚踩,爆炸
黄色,小,小孩,用手打,不爆炸
黄色,小,小孩,用脚踩,不爆炸
黄色,大,成人,用手打,爆炸
黄色,大,成人,用脚踩,爆炸
黄色,大,小孩,用手打,不爆炸
黄色,大,小孩,用脚踩,爆炸
紫色,小,成人,用手打,不爆炸
紫色,小,小孩,用手打,不爆炸
紫色,大,成人,用脚踩,爆炸
紫色,大,小孩,用脚踩,爆炸
测试结果
_x,_y = DataUtil.get_dataset("balloon1.0","_Data/balloon1.0.txt")
print(_x)
print(_y)
runfile('D:/share/test/Util.py', wdir='D:/share/test')
[['黄色' '大' '成人' '用脚踩']
['黄色' '小' '小孩' '用手打']
['黄色' '小' '成人' '用手打']
['紫色' '小' '成人' '用手打']
['紫色' '小' '小孩' '用手打']
['紫色' '大' '小孩' '用脚踩']
['紫色' '大' '成人' '用脚踩']
['黄色' '小' '成人' '用脚踩']
['黄色' '小' '小孩' '用脚踩']
['黄色' '大' '成人' '用手打']
['黄色' '大' '小孩' '用手打']
['黄色' '大' '小孩' '用脚踩']]
['爆炸' '不爆炸' '不爆炸' '不爆炸' '不爆炸' '爆炸' '爆炸' '爆炸' '不爆炸' '爆炸' '不爆炸' '爆炸']
另外一种:
def loadDataSet(fileName):
"""
加载数据集
:param fileName:
:return:
"""
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = line.strip().split('\t')
dataMat.append([float(lineArr[0]), float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat