python数据集处理小工具合集

制作自己的数据集
class MyDataset(Dataset):
    def __init__(self,image_Path,label_Path,img_transform=None,target_transform=None):
        print(os.getcwd())
        self.image_Path = os.path.join(os.getcwd(),image_Path)
        self.label_Path = os.path.join(os.getcwd(),label_Path)
        with open(self.label_Path,'r', encoding='UTF-8') as f:
            self.classes = json.load(f)
        self.imgs = []  #图片路径数组
        self.labels = [] #标签路径数组
        for files in sorted(os.listdir(self.image_Path)):
            if files.split('.')[-1] == 'jpg':
                self.imgs.append(os.path.join(self.image_Path,files))
            else:
                self.labels.append(os.path.join(self.image_Path,files))
        self.img_transform = img_transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        image_path = self.imgs[index]
        image = Image.open(image_path)
        label_path = self.labels[index]
        label_txt=open(label_path,"r")
        line_content = label_txt.readline()
        label = line_content.split(' ')[1]

        if self.img_transform:
            transform = transforms.Compose([
                transforms.Resize((400,400)),
                transforms.CenterCrop(400),
                transforms.ToTensor(),
                transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
            ])
            image = transform(image)
        if self.target_transform:
            label = int(label)
        return image,label

文件重命名

import glob
import shutil
import os

old_dir = "./label"

file_list = glob.glob(old_dir+"./*")

for file in file_list:
    ori_name = os.path.split(file)[1]
    new_name1 = ori_name.split(".")[0]
    new_name2 = new_name1+".txt"
    os.rename(file,new_name2)
    shutil.move(new_name2,old_dir)
    print("ok")

移动文件

import glob
import shutil
import os

filePath = "./data/anno"
out_path = './data/dark_anno_train'
if not os.path.exists(out_path):
    os.mkdir(out_path)

file_list = os.listdir(filePath)

for class_name in file_list:
    # matrix[i] = "filename"
    # print(filePath)
    # print(file_name)
    file_list = glob.glob(filePath + "/" + class_name + "/*")
    move_dir = out_path
    if not os.path.exists(move_dir):
        os.mkdir(move_dir)
    print(file_list)
    for input_file in file_list:
        #temp = os.path.split(input_file)[1]
        shutil.move(input_file,move_dir)

文件按条件筛选

from PIL import Image
import glob
import numpy as np
import shutil
import os

filePath = "./data/dark"
out_path ='./data/out'
if not os.path.exists(out_path):
    os.mkdir(out_path)

file_list = os.listdir(filePath)

for class_name in file_list:
    #matrix[i] = "filename"
    #print(filePath)
    #print(file_name)
    file_list = glob.glob(filePath+"/"+class_name+"/*")
    move_dir =os.path.join(out_path, class_name)
    if not os.path.exists(move_dir):
        os.mkdir(move_dir)
    print(file_list)
    for input_file in file_list:
        #print(input_file)
        #print(image)
        image1 = Image.open(input_file)
        image = np.array(image1)
        #image = image.convert("RGB")
        
        if len(image.shape) != 3 or (image1.size[0] >2000 or image1.size[1] > 2000) or image1.mode != "RGB":
            print(len(image.shape))
            print(image1.size)
            #move_path = os.path.join(move_dir,input_file) 
            shutil.move(input_file, move_dir)
        else:
            continue

文件夹切分

import os
import shutil
 
def mv_file(img, num,class_name):
    list_ = os.listdir(img)
    if num > len(list_):
        print('长度需小于:', len(list_))
        exit()
    num_file = int(len(list_)/num) + 1
    cnt = 0
    for n in range(1,num_file+1): # 创建文件夹
        new_file = os.path.join(img + '_' + str(n))
        move_file = os.path.join(os.path.dirname(os.path.dirname(img))+"/temp/"+class_name)
        #move_file = os.path.join(move_file,"/temp/"+class_name)
        move_file = os.path.join(move_file+'_'+str(n))
        if os.path.exists(new_file+'_'+str(cnt)):
            print('该路径已存在,请解决冲突', new_file)
            exit()
        if os.path.exists(move_file+'_'+str(cnt)):
            print('该路径已存在,请解决冲突', move_file)
            exit()
        print('创建文件夹:', new_file)
        print('创建文件夹:', move_file)
        os.mkdir(new_file)
        os.mkdir(move_file)
        list_n = list_[num*cnt:num*(cnt+1)]
        for m in list_n:
            old_path = os.path.join(img, m)
            new_path = os.path.join(new_file, m)
            move_path = os.path.join(move_file,m)
            shutil.copy(old_path, new_path)
            shutil.move(new_path,move_path)
        cnt = cnt + 1
    print('============task OK!===========')




if __name__ == "__main__":
    filePath = "./data/dark"
    file_list = os.listdir(filePath)
    for class_name in file_list:
        print(class_name)
        class_path = os.path.join(filePath+"/"+class_name)
        print(class_path)
        mv_file(class_path, 10,class_name) # 操作目录,单文件夹存放数量
    
    

目标检测 数据集转换


from PIL import Image
import glob
import numpy as np
import shutil
import os

img_dir = "./data/darktrain"
anno_dir = "./data/dark_anno_train"

file_list = glob.glob(img_dir+"/*")


def str2num(s):
    digits = {'Bicycle': 0, 'Boat': 1, 'Bottle': 2, 'Bus': 3, 'Car': 4, 'Cat': 5,
              'Chair': 6, 'Cup': 7, 'Dog': 8, 'Motorbike': 9, 'People': 10, 'Table': 11}

    return digits[s]

for file in file_list:
    img = Image.open(file)
    w = img.size[0]

    h = img.size[1]

    file_anno = os.path.split(file)[1]
    anno = os.path.join(anno_dir+"/"+file_anno+".txt")

    data = ''
    with open(anno,"r") as f:
        lines = f.readlines()
        # num_boxes = len(lines)
        for line in lines:
            if not line.startswith('%'):
                splited = line.strip().split()
                c = str2num(splited[0]) #class
                width = float(splited[3])  # w
                height = float(splited[4])  # h
                x = (float(splited[1]) + float(width/2))/w  # x
                y = (float(splited[2]) + float(height/2))/h  # y
                width = float(splited[3])/w  # w
                height = float(splited[4])/h  # h

                s = str(c)+' '+str(x)+' '+str(y)+' '+str(width)+' '+str(height)+'\n'
                data+=s

    with open(anno,'w') as f:
        f.write(data)


按csv文件名移动
```python
import glob
import shutil
import os
import numpy as np
import pandas as pd


images_path = "./images"
labels_path = "./labels"
out_images_path = './test_images'
out_labels_path = './test_labels'

csv_path = "./test.csv"

if not os.path.exists(out_images_path):
    os.mkdir(out_images_path)

if not os.path.exists(out_labels_path):
    os.mkdir(out_labels_path)

with open(csv_path,encoding = 'utf-8') as f:
    train_data = np.loadtxt(f,str,delimiter = ",")
    for i in range(len(train_data)):
        image = train_data[i][0]
        image_p = os.path.join(images_path,image)
        shutil.move(image_p,out_images_path)
        label = train_data[i][1]
        label_p = os.path.join(labels_path, label)
        shutil.move(label_p,out_labels_path)

f.close()


# file_list = os.listdir(filePath)
#
# for class_name in file_list:
#     # matrix[i] = "filename"
#     # print(filePath)
#     # print(file_name)
#     file_list = glob.glob(filePath + "/" + class_name + "/*")
#     move_dir = out_path
#     if not os.path.exists(move_dir):
#         os.mkdir(move_dir)
#     print(file_list)
#     for input_file in file_list:
#         #temp = os.path.split(input_file)[1]
#         shutil.move(input_file,move_dir)


写csv

import os
import csv

anno_path = "./data/ExDark/labels"
exdark_path = "./data/ExDark/images"

exdark_file_list = os.listdir(exdark_path)
file_anno_list = os.listdir(anno_path)

with open("./data/ExDark/data.csv","w",newline="") as f:
   for i in range(len(exdark_file_list)):
        file_name = exdark_file_list[i]
        file_anno_name = file_anno_list[i]
        f.write(file_name+","+file_anno_name+"\n")

f.close()

切分csv

#随机切开资料
import csv
import os
import numpy as np

'''将iris.csv中的数据分成train_iris和test_iris两个csv文件,其中train_iris.csv中有120个数据,test_iris.csv中有30个数据'''
labels = []
data = []
a_train_file = './data/ExDark/train.csv'
a_test_file = './data/ExDark/test.csv'
a_file = './data/ExDark/data.csv'

seed = 3
np.random.seed(seed)
train_indices = np.random.choice(7263,5810, replace=False) # 78170是数据总数,48856是按8:2切分的训练集,29314是测试集
test_indices = np.array(list(set(range(7263)) - set(train_indices)))
#test_indices = np.random.choice(len(residue), replace=False) # 如果训练集和测试集综合的数据加起来就是一整个数据集则不需要这个操作

with open(a_file)as afile:
    a_reader = csv.reader(afile)  #从原始数据集中将所有数据读取出来并保存到a_reader中
    #labels = next(a_reader)  # 提取第一行设置为labels
    for row in a_reader:  # 将a_reader中每一行的数据提取出来并保存到data的列表中
        data.append(row)


# 生成训练数据集
if not os.path.exists(a_train_file):
    with open(a_train_file, "w", newline='') as a_trian:
        writer = csv.writer(a_trian)
        #writer.writerows([labels])  #第一行为标签行
        writer.writerows(np.array(data)[train_indices])
        a_trian.close()

# 生成测试数据集
if not os.path.exists(a_test_file):
    with open(a_test_file, "w", newline='')as a_test:
        writer = csv.writer(a_test)
        #writer.writerows([labels])  #第一行为标签行
        writer.writerows(np.array(data)[test_indices])
        a_test.close()

dataset制作

import pandas as pd
from PIL import Image
import json
import os
import shutil

train_file = "train_list.txt"

file = pd.read_csv(train_file,sep=' ',header=None)[1:]

def label_to_name(num):
    with open("garbage_classification.json",'r') as f:
        name = json.loads((f.read()))
    return name['%s'%num]

for i in range(len(file)):
    path = file[0][i+1]
    label_num = file[1][i+1]
    label_name = label_to_name(label_num)
    one_dir = label_name
    move_dir = os.path.join('train/'+one_dir)
    if not os.path.exists(move_dir):
        os.makedirs(move_dir)
    shutil.move(path,move_dir)

image-clef 处理

import glob
import shutil
import os
import numpy as np

filePath = "./c"

file_class_path = "./list/cList.txt"

num2dir = {0:"aeroplane",
1:"bike",
2:"bird",
3:"boat",
4:"bottle",
5:"bus",
6:"car",
7:"dog",
8:"horse",
9:"monitor",
10:"motorbike",
11:"people"}


with open(file_class_path,encoding = 'utf-8') as f:
    file = np.loadtxt(f,str,delimiter = " ")
    for i in range(len(file)):
        temp = file[i][0].split("/")[-1]
        file_path = "./c/"+temp
        out_path = "./c/"+num2dir[int(file[i][1])]
        if not os.path.exists(out_path):
            os.mkdir(out_path)
        if not os.path.exists(file_path):
            continue
        shutil.move(file_path,out_path)





  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值