python操作处理数据
一、 批量读取文件名并保存至txt
参考博客: Python批量读取文件名并保存至txt
1. 批量读取文件名,并将读取的文件名保存到指定路径下的txt中(带.*** 后缀)
# P01 批量读取文件名,并将读取的文件名保存到指定路径下的txt中(带.*** 后缀)
import os
def ListFilesToTxt(dir, file, wildcard, recursion):
exts = wildcard.split(" ")
files = os.listdir(dir)
for name in files:
fullname = os.path.join(dir, name)
if (os.path.isdir(fullname) & recursion):
ListFilesToTxt(fullname, file, wildcard, recursion)
else:
for ext in exts:
if (name.endswith(ext)):
file.write(name + "\n")
break
def ReadName():
dir = "C:/本地路径/JPEGImages" # 读取文件路径
outfile = "Image.txt" # 将文件名写入Image.txt
wildcard = ".jpg" # 读取jpg图片
# wildcard = ".jpg .txt .exe .dll .lib" #要读取的文件类型;
file = open(outfile, "w")
if not file:
print("cannot open the file %s for writing" % outfile)
ListFilesToTxt(dir, file, wildcard, 1)
file.close()
ReadName()
2. 只读取文件名,并将读取的文件名保存到指定路径下的txt中(不带文件后缀)
# P02 批量读取文件名(不带后缀)
# coding:utf-8
import os
file_path = "D:/code/cluster/gt_map/"
path_list = os.listdir(file_path) # os.listdir(file)会历遍文件夹内的文件并返回一个列表
print(path_list)
path_name = [] # 把文件列表写入save.txt中
def saveList(pathName):
for file_name in pathName:
with open("id.txt", "a") as f:
f.write(file_name.split(".")[0] + "\n")
def dirList(path_list):
for i in range(0, len(path_list)):
path = os.path.join(file_path, path_list[i])
if os.path.isdir(path):
saveList(os.listdir(path))
dirList(path_list)
saveList(path_list)
运行结果:
需要提取的文件名称:
提取出来的txt文件:
3. 在txt文件中,在读取的文件名前批量添加 前缀/ 后缀
# P03批量添加 ****/ 前缀
import pandas as pd
data = pd.read_csv("C:/本地路径/Image.txt", header=None); # 读取需要修改的文件
print(data)
for i in range(len(data)):
data.iloc[i] = 'data/obj/' + data.iloc[i] # 加上 data/obj/ 前缀
print(data.iloc[i])
data.to_csv('./addSuffixImage.txt', index=None)
二、根据txt文件中的id名,在另一个文件夹下提取出相应的文件
参考博客:如何根据txt中图片的名字批量提取对应的图片并保存到另一个文件夹
# -*- coding: UTF-8 -*-
#!/usr/bin/env python
import sys
import re
from PIL import Image
sys.path.append('D:/code')
import numpy as np
data = []
for line in open("D:/code/cluster/id.txt", "r"): # 设置文件对象并读取每一行文件
data.append(line)
#print(data)
for a in data:
#line3=line2[:-4] #读取每行去掉后四位的数#
# import pdb
# pdb.set_trace()
im = Image.open('D:/code/cluster/img/{}'.format(a[:-1] + ".jpg"))#打开改路径下的line3记录的的文件名
im.save('D:/code/cluster/gt_img/{}'.format(a[:-1] + ".jpg")) #把文件夹中指定的文件名称的图片另存到该路径下
im.close()
运行结果:
三、 计算函数运行时间
import time
def a():
start_time = time.time()
end_time = time.time()
print(end_time - start_time)
四、复制图片文件到另一个文件夹下
import os
import copy
import shutil
# 复制visdrone_train里图片复制到images中
image_old = "/home/jjliao/Visdrone_yolo_cluster/VisDrone2019-DET-train/images_cluster/"
image_new = "/home/jjliao/Visdrone_yolo_cluster/images/train/"
if not os.path.exists(image_new):
os.makedirs(image_new)
for file in os.listdir(image_old):
full_file = os.path.join(image_old, file)
new_full_file = os.path.join(image_new, file)
shutil.copy(full_file, new_full_file)
五、获取annotations文件夹中,格式为txt的某些内容
# coding:utf-8
import os
f1 = open("visdrone_test_gt.txt",'a') #创建test_gt.txt
path_name = "。/annotations/" #路径
for dir_item in os.listdir(path_name):
path = dir_item.split('.') #记事本
f = open(path_name + dir_item) #打开记事本
line = f.readline() #按行读取
while line:
a = line.split(',') #分割
f1.write(path[0] + ' ') #记事本名称
#各种属性值
f1.write(a[5] + ' ')
f1.write(a[0] + ' ')
f1.write(a[1] + ' ')
b1 = int(a[0])+int(a[2])
f1.write(str(b1) + ' ')
b2 = int(a[1])+int(a[3])
f1.write(str(b2) + '\n')
line = f.readline()
f.close()
六、从整个文件夹中提取出txt所需要的文件
# coding:utf-8
import os
import shutil
from tqdm import tqdm
# 根据/data/data/UAV2017/ImageSets/Layout里面的trainval.txt和test.txt挑选出训练集和测试集
SPLIT_PATH = "/data/data/UAVDT_voc/ImageSets/Layout"
IMGS_PATH = "/data/data/UAVDT_voc/JPEGImages"
TXTS_PATH = "/data/data/UAVDT_voc/Annotations"
TO_IMGS_PATH = '/data/data/UAVDT_coco/images'
TO_TXTS_PATH = '/data/data/UAVDT_coco/voc_annotations'
data_split = ['trainval.txt', 'test.txt']
to_split = ['train', 'val']
train_file = '/data/data/UAVDT_yolo/images_train.txt'
val_file = '/data/data/UAVDT_yolo/images_val.txt'
train_file_txt = ''
val_file_txt = ''
for index, split in enumerate(data_split):
split_path = os.path.join(SPLIT_PATH, split)
# import pdb; pdb.set_trace()
to_imgs_path = os.path.join(TO_IMGS_PATH, to_split[index])
if not os.path.exists(to_imgs_path):
os.makedirs(to_imgs_path)
to_txts_path = os.path.join(TO_TXTS_PATH, to_split[index])
if not os.path.exists(to_txts_path):
os.makedirs(to_txts_path)
f = open(split_path, 'r')
count = 1
for line in tqdm(f.readlines(), desc="{} is copying".format(to_split[index])):
# 复制图片
src_img_path = os.path.join(IMGS_PATH, line.strip() + '.jpg')
# import pdb; pdb.set_trace()
dst_img_path = os.path.join(to_imgs_path, line.strip() + '.jpg')
if os.path.exists(src_img_path):
shutil.copyfile(src_img_path, dst_img_path)
else:
print("error file: {}".format(src_img_path))
if to_split[index] == 'train':
train_file_txt = train_file_txt + dst_img_path + '\n'
elif to_split[index] == 'val':
val_file_txt = val_file_txt + dst_img_path + '\n'
# 复制txt标注文件
src_txt_path = os.path.join(TXTS_PATH, line.strip() + '.xml')
dst_txt_path = os.path.join(to_txts_path, line.strip() + '.xml')
if os.path.exists(src_txt_path):
shutil.copyfile(src_txt_path, dst_txt_path)
else:
print("error file: {}".format(src_txt_path))
with open(train_file, 'w') as out_train:
out_train.write(train_file_txt)
with open(val_file, 'w') as out_val:
out_val.write(val_file_txt)
七、PR曲线拼接代码
pinjie.py
'''
import glob
import numpy as np
from PIL import Image
# pic_list = glob.glob("*.png")
pic_list = ['1pedestrian PR Curve.png', '2people PR Curve.png', '3bicycle PR Curve.png', '4car PR Curve.png', '5van PR Curve.png', '6truck PR Curve.png', '7tricycle PR Curve.png', '8awning-tricycle PR Curve.png', '9bus PR Curve.png', 'motor PR Curve.png']
image_list = []
for pic in pic_list:
image = np.array(Image.open(pic))
image_list.append(image)
mid_image_list = []
for i in range(5):
if i == 0:
mid_image = np.concatenate((image_list[i], image_list[i+1]), axis=1)
else:
mid_image = np.concatenate((image_list[2*i], image_list[2*i+1]), axis=1)
mid_image_list.append(mid_image)
for i in range(5):
try:
big_image = np.concatenate((big_image, mid_image_list[i]), axis=0)
except:
big_image = mid_image_list[i]
big_image = Image.fromarray(big_image)
big_image.save("f.png")
八、比较两个txt文件中的不同元素
cmp.py
# import difflib
file1 = "gt.txt"
file2 = "allids1.txt"
a = open(file1).readlines()
b = open(file2).readlines()
# import pdb;pdb.set_trace()
c = [x for x in (a+b) if x not in a] #两个列表中的不同元素
print("c:", c)
d = [y for y in a if y not in b] #在a列表中而不在b列表中
print("d:", d)
九、把字符串列表[“x,x,x”]分成每一个元素一行
cmp.py
import os
file_path = "allids.txt"
fp = open("./allids1.txt", "w")
for name1 in open(file_path).readlines():
for i in range(5340):
fp.write(name1.split(",")[i] + "\n")
十、python中sys.stdout、sys.stdin
1. sys.stdout与print:
在python中调用print时,事实上调用了sys.stdout.write(obj+'\n')
。
print 将需要的内容打印到控制台,然后追加一个换行符。
以下两行代码等价:
sys.stdout.write('hello' + '\n')
等价于
print('hello')
2. sys.stdin与input
sys.stdin.readline( )
会将标准输入全部获取,包括末尾的’\n’,因此用len计算长度时是把换行符’\n’算进去了的,但是input( )
获取输入时返回的结果是不包含末尾的换行符’\n’的。
因此如果在平时使用sys.stdin.readline( )获取输入的话,不要忘了去掉末尾的换行符,可以用strip( )函数sys.stdin.readline( ).strip('\n')
或sys.stdin.readline( )[:-1]
这两种方法去掉换行。