使用Python整理数据集，规范化数据

最新推荐文章于 2023-09-27 10:08:09 发布

幼儿园传奇大侠

最新推荐文章于 2023-09-27 10:08:09 发布

阅读量3.1k

点赞数

分类专栏： Python 深度学习 PyTorch 文章标签：数据集整理

本文链接：https://blog.csdn.net/u013000248/article/details/91361684

版权

深度学习同时被 3 个专栏收录

13 篇文章 0 订阅

订阅专栏

PyTorch

7 篇文章 0 订阅

订阅专栏

Python

5 篇文章 0 订阅

订阅专栏

写在前面的话：经过大量的采图实验，数据散落各处，设备也调整过，标定参数之类的也都不一致，以前整理数据都是用的C/C++傻乎乎地system(“cp str str”)虽然知道shell更简单，但是毕竟懒，现在发现PY真是一把利器。

1.0按照指定的文件路径组织文件

已知部分（多数数据）数据目录结构如下：

用下面的方式整理实验数据，写完这个也可以当是PY学习的小例子：

import os
import shutil

dirroot = "O:\\380A"
dir1 = [d for d in os.listdir(dirroot) if os.path.isdir(os.path.join(dirroot, d))]
dir1 = [os.path.join(dirroot, d) for d in dir1]
dir1 = [os.path.join(d, "1") for d in dir1]
dirs = []
for d in dir1:
    dirs.extend( os.path.join(d, dd) for dd in os.listdir(d))
dirtmp = dirs.copy()
dirs.clear()
for d in dirtmp:
    dirs.extend(os.path.join(d, dd) for dd in os.listdir(d))

tardir = "O:\\chengdudongdataset\\380Adata"
calibdir = "O:\\chengdudongdataset\\calib\\chengdudongIU20181122bd"
cooresponding = []
error_s = []
for index, d in enumerate(dirs):
    if not os.path.isdir(d):
        continue
    dir_d = os.path.join(tardir, ("%05d" % index))
    os.mkdir(dir_d)
    #
    #vector<string> robot1{ "1-r.jpg","1-t.jpg","2-r.jpg", "2-t.jpg","1-g.jpg", "2-g.jpg","2d.jpg"};
    #vector<string> robot2{ "3-r.jpg","3-t.jpg","4-r.jpg", "4-t.jpg","3-g.jpg", "4-g.jpg","2d.jpg"};
    robot1_b = os.path.exists(os.path.join(d, "1-r.jpg"))
    robot2_b = os.path.exists(os.path.join(d, "3-r.jpg"))
    try:
        # FileNotFoundError: [Errno 2] No such file or directory: '
        # O:\\380A\\20190311-081911 (CRH380A-2867(B)) {10560529268737}
        # \\1\\1\\01.04.15.23.1-2\\1-t.jpg'
        if robot1_b:
            shutil.copyfile(os.path.join(d, "1-r.jpg"), os.path.join(dir_d, "img2r.jpg"))
            shutil.copyfile(os.path.join(d, "1-t.jpg"), os.path.join(dir_d, "img2t.jpg"))
            shutil.copyfile(os.path.join(d, "1-g.jpg"), os.path.join(dir_d, "img2g.jpg"))
            shutil.copyfile(os.path.join(d, "2-r.jpg"), os.path.join(dir_d, "img1r.jpg"))
            shutil.copyfile(os.path.join(d, "2-t.jpg"), os.path.join(dir_d, "img1t.jpg"))
            shutil.copyfile(os.path.join(d, "2-g.jpg"), os.path.join(dir_d, "img1g.jpg"))
            shutil.copyfile(os.path.join(d, "2d.jpg"), os.path.join(dir_d, "imgcom.jpg"))
            shutil.copyfile(os.path.join(calibdir, "para_stero_1_2.xml"), os.path.join(dir_d, "para_stereo.xml"))
            shutil.copyfile(os.path.join(calibdir, "para1.xml"), os.path.join(dir_d, "cam1.xml"))
            shutil.copyfile(os.path.join(calibdir, "para2.xml"), os.path.join(dir_d, "cam2.xml"))
        elif robot2_b:
            shutil.copyfile(os.path.join(d, "3-r.jpg"), os.path.join(dir_d, "img2r.jpg"))
            shutil.copyfile(os.path.join(d, "3-t.jpg"), os.path.join(dir_d, "img2t.jpg"))
            shutil.copyfile(os.path.join(d, "3-g.jpg"), os.path.join(dir_d, "img2g.jpg"))
            shutil.copyfile(os.path.join(d, "4-r.jpg"), os.path.join(dir_d, "img1r.jpg"))
            shutil.copyfile(os.path.join(d, "4-t.jpg"), os.path.join(dir_d, "img1t.jpg"))
            shutil.copyfile(os.path.join(d, "4-g.jpg"), os.path.join(dir_d, "img1g.jpg"))
            shutil.copyfile(os.path.join(d, "2d.jpg"), os.path.join(dir_d, "imgcom.jpg"))
            shutil.copyfile(os.path.join(calibdir, "para_stero_3_4.xml"), os.path.join(dir_d, "para_stereo.xml"))
            shutil.copyfile(os.path.join(calibdir, "para3.xml"), os.path.join(dir_d, "cam1.xml"))
            shutil.copyfile(os.path.join(calibdir, "para4.xml"), os.path.join(dir_d, "cam2.xml"))
        else:
            pass
        coor = (("%05d" % index)+'\t'+d+'\n')
        cooresponding.append(coor)
    except:
        coor = (("%05d" % index)+'\t'+d+'\n')
        error_s.append(coor)
cooresponding[-1] = cooresponding[-1][:-1]

with open(os.path.join(tardir, "cooresponding.txt"), 'w') as file_obj:
    for coor in cooresponding:
        file_obj.write(coor)

if len(error_s):
    error_s[-1] = error_s[-1][:-1]
    with open(os.path.join(tardir, "error_s.txt"), 'w') as file_error_obj:
        for coor in error_s:
            file_error_obj.write(coor)

整理完就是：
整理后

2.0 制作简单GUI工具筛选数据。使用tkinter

先设计一个大概的应用界面：
GUI设计
这是最终完成的样子：
最终设计结果
Code如下：通过这个例子就可以基本摸清楚tkinter的套路了，和HTML，Android XML这些常见GUI很类似
纠正：做标记时候建议用：1、2、4、8、来做标记，因为这样方便位运算。我用了1/2/3来做标记也可以用，但是不建议这样做

# -*- coding: utf-8 -*-
"""
Created on Wed Jul 17 16:57:48 2019

@author: frank
"""
import os
import tkinter as tk
from PIL import Image, ImageTk
window = tk.Tk()
window.title('西南交大光电工程研究所数据分类工具_ClassifyTo3 - - - Quasimo')
window.geometry('1000x730')

# 撤销操作的标记，这个标记用于记录上一次的按键操作是啥，所有的操作会被记录进optList,
# optList的标记分别对应dirs的每个目录的数据是什么

# 各个标记的定义
TRAIN_CLASS = 1
NOT_WELL_CLASS = 2
TOO_MUCH_NOISE = 3
SPECIAL_PART = 8
# 当前图像的标记
optAndMark = 0


dirs = []

global dirRoot
dirRoot = r'D:\chengdudongdataset\stereodata'
with open(os.path.join(dirRoot, "dirList_windows.txt"), 'r') as file_obj:
    dirs = file_obj.readlines()
for idx, d in enumerate(dirs):
    dirs[idx] = dirs[idx].rstrip()

trainDirList = []
notVeryWell = []
tooMuchNoise = []
specialPart = []
global optMarks, isWheelFlag
optMarks = ['0']*len(dirs)
isWheelFlag = False

global indexOfDirs
indexOfDirs = 0
global dispImg
global img_show
pathBingLookVar = tk.StringVar()
pathBingLookVar.set('this is path')

if os.path.exists(os.path.join(dirRoot, "optMarks.txt")):
    with open(os.path.join(dirRoot, "optMarks.txt"), 'r') as file_obj:
        optMarks = file_obj.readlines()
        for idx, d in enumerate(optMarks):
            optMarks[idx] = optMarks[idx].rstrip()
indexOfDirs = optMarks.index('0')

def openImgAndShow():
    bkgimg = Image.open(os.path.join(dirs[indexOfDirs], 'dispRainbowBlack.png'))
    pathBingLookVar.set(dirs[indexOfDirs])
    global dispImg
    dispImg = ImageTk.PhotoImage(bkgimg)
    global img_show
    img_show.configure(image=dispImg)
    progressbarVar.set(str(indexOfDirs) +' / ' + str(len(dirs)))
    #canvas = tk.Canvas(frame2, bg='green', height=540, width=960)
    #image = canvas.create_image(0, 0, anchor='NW',image=image_file) 


def saveBtFun():
    #保存当前进度
    global dirRoot
    global optMarks
    with open(os.path.join(dirRoot, "optMarks.txt"), 'w') as file_obj:
        for recor in optMarks:
            file_obj.writelines(recor+"\n")
    return
    
def revocationBtFun():
    #撤销一步
    global indexOfDirs
    indexOfDirs -= 1
    global optMarks,isWheelFlag
    optMarks[indexOfDirs] = str(0)
    isWheelFlag = False
    openImgAndShow()
    return
    
def wheelSurfBtFun():
    # 标记这是 : 车轮踏面
    global isWheelFlag
    isWheelFlag = SPECIAL_PART
    return
    
def tooMuchNoiseBtFun():
    # 标记这个 : 太多噪声
    
    global indexOfDirs,dirs
    global optMarks,isWheelFlag
    optMarks[indexOfDirs] = str(TOO_MUCH_NOISE | isWheelFlag)
    
    isWheelFlag = False
    indexOfDirs += 1
    if indexOfDirs == len(dirs):
        saveBtFun()
        exit()
    openImgAndShow()
    return
    
def notVeryWellBtFun():
    # 标记这个 : 不是很好
    
    global indexOfDirs
    global optMarks,isWheelFlag
    optMarks[indexOfDirs] = str(NOT_WELL_CLASS | isWheelFlag)
    isWheelFlag = False
    indexOfDirs += 1
    if indexOfDirs == len(dirs):
        saveBtFun()
        exit()
    openImgAndShow()
    return
global switchFlag
switchFlag = True
def switchPic():
    # 查看一下实际图，
    global switchFlag
    global img_show
    global dispImg
    if switchFlag:
        bkgimg = Image.open(os.path.join(dirs[indexOfDirs], 'img1tCV8UC3.png'))
        dispImg = ImageTk.PhotoImage(bkgimg)
        img_show.configure(image=dispImg)
        switchFlag = not switchFlag
    else:
        bkgimg = Image.open(os.path.join(dirs[indexOfDirs], 'dispRainbowBlack.png'))
        dispImg = ImageTk.PhotoImage(bkgimg)
        img_show.configure(image=dispImg)
        switchFlag = not switchFlag
    return
def trainDataBtFun():
    # 标记这个 : 数据可用
    
    global indexOfDirs
    global optMarks,isWheelFlag
    optMarks[indexOfDirs] = str(TRAIN_CLASS | isWheelFlag)
    isWheelFlag = False
    indexOfDirs += 1
    if indexOfDirs == len(dirs):
        saveBtFun()
        exit()
    openImgAndShow()
    return
    
frame = window
frame1 = tk.Frame(master=frame, bg='#000fff000', borderwidth=10)
frame2 = tk.Frame(master=frame, borderwidth=10)
frame3 = tk.Frame(master=frame, bg='red', borderwidth=10)
frame4 = tk.Frame(master=frame, borderwidth=10)

#progressbar = tk.Scale(window)#不用canvas了

pathBingLook = tk.Label(master=frame1,font=('Arial', 12), width =35, textvariable = pathBingLookVar).pack(side='left', fill='x',expand='yes')
progressbarVar = tk.StringVar()
progressbarVar.set('step / Num');
progressbar = tk.Label(master=frame1, bg='red', font=('Arial', 12), width =15, textvariable = progressbarVar).pack(side='left')
frame1.pack(side='top', fill='both',expand='NO')

bkgimg = Image.open(os.path.join(dirs[indexOfDirs], 'dispRainbowBlack.png'))
global dispImg
dispImg = ImageTk.PhotoImage(bkgimg) 
img_show = tk.Label(master=frame2, image = dispImg,height=540,width=960)
img_show.pack()
#canvas = tk.Canvas(frame2, bg='green', height=540, width=960)
#image = canvas.create_image(0, 0, anchor='NW',image=image_file) 
frame2.pack(side='top', fill='both')

saveBt = tk.Button(frame3, text = '保存进度',font=('宋体', 17), command=saveBtFun).pack( side='left', anchor='center', expand='YES')
lookLook = tk.Button(frame3, text = 'LookLook',font=('宋体', 17), command=switchPic).pack( side='left', anchor='center', expand='YES')
revocationBt = tk.Button(frame3, text = '撤销一步',font=('宋体', 17), command=revocationBtFun).pack( side='left', anchor='center', expand='YES')
wheelSurfBt = tk.Button(frame3, text = '车轮踏面',font=('宋体', 17), command=wheelSurfBtFun).pack( side='left', anchor='center', expand='YES')
frame3.pack(side='top', fill='both',expand='NO')

tooMuchNoiseBt = tk.Button(frame4, text = '不能用',font=('宋体', 17), command=tooMuchNoiseBtFun).pack(side='left', anchor='center', expand='YES')
notVeryWellBt = tk.Button(frame4, text = '还可以',font=('宋体', 17), command=notVeryWellBtFun).pack(side='left', anchor='center', expand='YES')
trainDataBt = tk.Button(frame4, text = '可以用',font=('宋体', 17), command=trainDataBtFun).pack(side='left', anchor='center', expand='YES')
frame4.pack(side='top', fill='both',expand='NO')

window.mainloop()

幼儿园传奇大侠

关注

0
点赞
踩
10

收藏

觉得还不错? 一键收藏
0
评论
使用Python整理数据集，规范化数据

写在前面的话：经过大量的采图实验，数据散落各处，设备也调整过，标定参数之类的也都不一致，以前整理数据都是用的C/C++傻乎乎地system(“cp str str”)虽然知道shell更简单，但是毕竟懒，现在发现PY真是一把利器。已知部分（多数数据）数据目录结构如下：用下面的方式整理实验数据，写完这个也可以当是PY学习的小例子：import osimport shutildirroo...
复制链接

扫一扫