python的基本文件操作

最新推荐文章于 2024-04-24 18:51:57 发布

halo_小怪兽

最新推荐文章于 2024-04-24 18:51:57 发布

阅读量240

点赞数

分类专栏：数据处理工具文章标签： python xml

本文链接：https://blog.csdn.net/qq_37937847/article/details/110450432

版权

工具同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

数据处理

3 篇文章 0 订阅

订阅专栏

1. python文件操作

1.1 批量对文件进行改名

src_path=''      #需要改名的数据集文件夹路径
dst_path=''		#将改名后的数据集放入的文件夹路径
import os
#加载数据集文件夹
files=os.listdir(src_path)
print("文件数量："len(files))
for i ,filename in enumerate(files):
	#print(filename)
    src=os.path.join(os.path.abspath(src_path),name)#源文件名
    rename=str(i)+'txt'
    dstname=os.join(src,dstname)#目标文件名
    os.rename(src,dst)

##1.2 批量更改图片数据集格式

#如由.jpg改为.png格式
import os 
import PIL.Image as Image

src_path=''      #需要改名的数据集文件夹路径
dst_path=''		#将改名后的数据集放入的文件夹路径
file=os.lisdir(src_path)
for i name in enumerate(file):
    print("图片数量：",len(file))
    path=os.path.join(src_path,name)#获取图片的完整路径
    if(i%100==0):
        print("第{}张".format(i))
    img=Image.open(path)#打开图片
    rename=dst_path+name[0:-4]+'.png'
    img.save(rename)
    
 #注意：如果不通过Image库直接更改图片的后缀名，也可以成功，但是图片在ubuntu系统上可显示不出来

##1.3 将一个文件夹下的文件名写到一个.txt文件下

import os
folder_path=''	#需要处理的文件夹路径
files=os.lisdir(folder_path)
print("文件夹下的文件数量：",len(files))

dst_file=open('文件路径','w')		#打开一个文件，接下将文件名写入到这个文件中
for filename in files:
    dst_file.write(filename.split('.')[0]+'\n')	#使用split()函数去除掉文件扩展名

1.4 将文件下的某个文件删除

import os
file_path=''
os.remove(file_path)

#注意：这个函数可以用来对某个数据集中的数据进行清洗删除使用

1.5 json文件转换为单个的.xml文件

#问题描述
将BDD整个数据集的标签文件-json文件（一个包含了70000张图片的信息），转换成voc数据中的.xml文件（为每张图片生成单独的.xml标注文件）

#json文件结构描述
整个大json文件是个列表，每一张图片被一个{}圈起来，每张图片里面有很多属性，例如：name,attributes,labels,····，这些都是字典的格式；label里面是一个列表的结构，每一个物体目标为一个{}，里面包含了图片的类别和坐标信息，例如：category,box2d。


[
    {
        "name": "0000f77c-6257be58.jpg",
        "attributes": {
            "weather": "clear",
            "scene": "city street",
            "timeofday": "daytime"
        },
        "timestamp": 10000,
        "labels": [
            {
                "category": "traffic light",
                "attributes": {
                    "occluded": false,
                    "truncated": false,
                    "trafficLightColor": "green"
                },
                "manualShape": true,
                "manualAttributes": true,
                "box2d": {
                    "x1": 1125.902264,
                    "y1": 133.184488,
                    "x2": 1156.978645,
                    "y2": 210.875445
                },
                "id": 0
            },
            {
                "category": "traffic sign",
                "attributes": {
                    "occluded": false,
                    "truncated": false,
                    "trafficLightColor": "none"
                },
                "manualShape": true,
                "manualAttributes": true,
                "box2d": {
                    "x1": 1101.731743,
                    "y1": 211.122087,
                    "x2": 1170.79037,
                    "y2": 233.566141
                },
                "id": 2
            },
            
            {
                "category": "car",
                "attributes": {
                    "occluded": false,
                    "truncated": false,
                    "trafficLightColor": "none"
                },
                "manualShape": true,
                "manualAttributes": true,
                "box2d": {
                    "x1": 45.240919,
                    "y1": 254.530367,
                    "x2": 357.805838,
                    "y2": 487.906215
                },
                "id": 4
            }, 
        ]
    },
    {
        "name": "0000f77c-62c2a288.jpg",
        "attributes": {
            "weather": "clear",
            "scene": "highway",
            "timeofday": "dawn/dusk"
        },
        "timestamp": 10000,
        "labels": [
            {
                "category": "traffic sign",
                "attributes": {
                    "occluded": false,
                    "truncated": false,
                    "trafficLightColor": "none"
                },
                "manualShape": true,
                "manualAttributes": true,
                "box2d": {
                    "x1": 250.549246,
                    "y1": 294.619798,
                    "x2": 308.085801,
                    "y2": 357.05308
                },
                "id": 11
            },
            {
                "category": "traffic sign",
                "attributes": {
                    "occluded": true,
                    "truncated": false,
                    "trafficLightColor": "none"
                },
                "manualShape": true,
                "manualAttributes": true,
                "box2d": {
                    "x1": 206.478694,
                    "y1": 299.516526,
                    "x2": 221.16888,
                    "y2": 321.551802
                },
                "id": 12
            },

import os, sys
import json
import cv2
import random
import numpy as np


src_label_dir = 'G:/dataset/wm/Labels/bdd100k_labels_images_val.json'  ###指向数据集的json文件夹

out_xml_dir = 'G:/dataset/wm/BDD_to_VOC/val2_xml'  ###指向voc数据集的Annotations文件夹

f = open(src_label_dir, encoding='utf-8')
#加载json文件
content = json.load(f)
#打印里面图片的数量
print(len(content))

for j in range(len(content)):
#for j in range(2):
    #根据字典的结构，获取图片的名称
    content[j]['name'] = content[j]['name']
    img_path = content[j]['name']
    img_name = os.path.basename(img_path)
    (img_name, extension) = os.path.splitext(img_name)

    #获取图片的绝对路径
    #img_path = os.path.join('G:/dataset/wm/Images/100k/train', img_path)
    #获取图片的尺寸大小
    #height, width = cv2.imread(img_path).shape[:2]

    #为每张图片创建一个.xml文件
    xml_file = open((out_xml_dir + '/' + img_name + '.xml'), 'w')
    xml_file.write('<annotation>\n')
    xml_file.write('    <folder>VOC2007</folder>\n')
    xml_file.write('    <filename>' + str(img_name) + '.jpg' + '</filename>\n')  ###若准备的图片为jpg格式则将png替换为jpg
    xml_file.write('    <path>' + str(img_path) + '.jpg' + '</path>\n')  ###若准备的图片为jpg格式则将png替换为jpg
    # xml_file.write('    <size>\n')
    # xml_file.write('        <width>' + str(width) + '</width>\n')
    # xml_file.write('        <height>' + str(height) + '</height>\n')
    # xml_file.write('        <depth>3</depth>\n')
    # xml_file.write('    </size>\n')
    xml_file.write('    <segmented>0</segmented>\n')

    classes=['traffic sign', 'car', 'person', 'bike', 'motor', 'bus']
    count=0
    for i in content[j]['labels']:
        cls = i['category']
        if cls in classes:
            count=+1
            if cls=='traffic sign':
                cls='road_sign'
            if cls=='car' or cls=='bus':
                cls='vehicle'
            if cls=='person':
                cls='pedestrian'
            if cls=='bike' or cls=='motor':
                cls='bicycle'


            xmin = int(i['box2d']['x1'])
            ymin = int(i['box2d']['y1'])
            xmax = int(i['box2d']['x2'])
            ymax = int(i['box2d']['y2'])


            xml_file.write('    <object>\n')
            xml_file.write('        <name>' + str(cls) + '</name>\n')
            xml_file.write('        <pose>Unspecified</pose>\n')
            xml_file.write('        <truncated>0</truncated>\n')
            xml_file.write('        <difficult>0</difficult>\n')
            xml_file.write('        <bndbox>\n')
            xml_file.write('            <xmin>' + str(xmin) + '</xmin>\n')
            xml_file.write('            <ymin>' + str(ymin) + '</ymin>\n')
            xml_file.write('            <xmax>' + str(xmax) + '</xmax>\n')
            xml_file.write('            <ymax>' + str(ymax) + '</ymax>\n')
            xml_file.write('        </bndbox>\n')
            xml_file.write('    </object>\n')

    xml_file.write('</annotation>')
    xml_file.close()

    #最后判断一下，如果该张图片中没有我们需要检测的物体，既没有位置标签，就不要为这张图片生成xml文件，进行删除
    if count==0:
        os.remove('G:/dataset/wm/BDD_to_VOC/val2_xml/'+img_name + '.xml')

##1.6利用文件名从某个文件夹下复制对应文件


import os
import shutil
file_path='G:/dataset/VOCdevkit/VOC2007/ImageSets/Main/bicycle_val.txt'#文件名称所在的txt文件所在路径
images_path='G:/dataset/VOCdevkit/VOC2007/JPEGImages/'#源文件的地址
dst_path='G:/dataset/VOCdevkit/VOC2007/bicycle_val'#复制文件的存放地址


with open(file_path) as f:
    lines = f.readlines()#读取txt的每一行,返回结果是list,每一行是一个元素
    print(type(lines))
    count=0#统计需要复制的文件数目
    for filename in lines:
        filename=filename.replace('\n','').split(' ')#去除每一行的换行符
        #filename[1]=filename[1].replace('\n','')
        print(filename)
        if(filename[-1]=='1' or filename[-1]=='0' ):
            count+=1
            img_name=filename[0]+'.jpg'
            img_path=os.path.join(images_path,img_name)
            print(img_path)
            new_img_path=os.path.join(dst_path,img_name)
            shutil.copyfile(img_path,new_img_path)  # 将需要的文件从oldname复制到newname

    print("一共有{}张自行车图片".format(count))