数据集清洗,随机抽取部分

'''
Author: TuZhou
Version: 1.0
Date: 2021-09-13 10:04:38
LastEditTime: 2021-09-13 10:36:17
LastEditors: TuZhou
Description: 
FilePath: \python_test\traffic_dataset_clean.py
'''
import random
import numpy as np

def clean_test():
    with open('./test.txt') as f:
        testLines = f.readlines()

    print("testlines length is ", len(testLines))
    cleaned_test = np.random.choice(testLines, int(len(testLines) * 0.25), replace = False)
    print("cleaned_test length is ", len(cleaned_test))

    f = open('./cleaned_dataset/test.txt', 'w+')
    for i in cleaned_test:
        f.write(i)
        
    f.close()

def clean_train():
    with open('./train.txt') as f:
        trainLines = f.readlines()

    print("trainLines length is ", len(trainLines))
    cleaned_train = np.random.choice(trainLines, int(len(trainLines) * 0.5), replace = False)
    print("cleaned_train length is ", len(cleaned_train))

    f = open('./cleaned_dataset/train.txt', 'w+')
    for i in cleaned_train:
        f.write(i)
        
    f.close()
    
def clean_val():
    with open('./val.txt') as f:
        valLines = f.readlines()

    print("valLines length is ", len(valLines))
    cleaned_val = np.random.choice(valLines, int(len(valLines) * 0.1), replace = False)
    print("cleaned_val length is ", len(cleaned_val))

    f = open('./cleaned_dataset/val.txt', 'w+')
    for i in cleaned_val:
        f.write(i)
        
    f.close()
    
if __name__ == '__main__':
    clean_test()
    clean_train()
    clean_val()
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值