'''
Author: TuZhou
Version: 1.0
Date: 2021-09-13 10:04:38
LastEditTime: 2021-09-13 10:36:17
LastEditors: TuZhou
Description:
FilePath: \python_test\traffic_dataset_clean.py
'''
import random
import numpy as np
def clean_test():
with open('./test.txt') as f:
testLines = f.readlines()
print("testlines length is ", len(testLines))
cleaned_test = np.random.choice(testLines, int(len(testLines) * 0.25), replace = False)
print("cleaned_test length is ", len(cleaned_test))
f = open('./cleaned_dataset/test.txt', 'w+')
for i in cleaned_test:
f.write(i)
f.close()
def clean_train():
with open('./train.txt') as f:
trainLines = f.readlines()
print("trainLines length is ", len(trainLines))
cleaned_train = np.random.choice(trainLines, int(len(trainLines) * 0.5), replace = False)
print("cleaned_train length is ", len(cleaned_train))
f = open('./cleaned_dataset/train.txt', 'w+')
for i in cleaned_train:
f.write(i)
f.close()
def clean_val():
with open('./val.txt') as f:
valLines = f.readlines()
print("valLines length is ", len(valLines))
cleaned_val = np.random.choice(valLines, int(len(valLines) * 0.1), replace = False)
print("cleaned_val length is ", len(cleaned_val))
f = open('./cleaned_dataset/val.txt', 'w+')
for i in cleaned_val:
f.write(i)
f.close()
if __name__ == '__main__':
clean_test()
clean_train()
clean_val()
数据集清洗,随机抽取部分
最新推荐文章于 2024-07-31 14:30:10 发布