很多时候,我们在进行检测的前是需要对你的数据集,进行分析的。
这个py文件是将训练数据的label文件,统一转换成csv格式,输出长、宽、面积、ratios(长宽比)等
如果不考虑批量操作,当然你可以用excel导入date.
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 7 11:11:15 2018
分析原始数据
@author: ygx
"""
import os
import pandas as pd
dir = '/home/ygx/dotatools/原始数据分析/样本'
allfiles = []
for root,dirs,files in os.walk(dir):
for filespath in files:
filepath = os.path.join(root, filespath)
extension = os.path.splitext(filepath)[1][1:]
allfiles.append(filepath)
print allfiles,filepath
def parse_dota_poly(filename):
objects = []
with open(filename, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
for splitline in splitlines:
object_struct = {}
object_struct['name'] = splitline[0]
object_struct['bbox'] = [int(float(splitline[1])),
int(float(splitline[2])),
int(float(splitline[3])),
int(float(splitline[4]))]
w = 1.5*(float(splitline[3]) - float(splitline[1]) + 1)
h = 1.5*(float(splitline[4]) - float(splitline[2]) + 1)
object_struct['area'] = abs(w * h)
object_struct['w'] = abs(w)
object_struct['h'] = abs(h)
object_struct['ratio']=abs(float(w)/float(h))
objects.append(object_struct)
return objects
if __name__ == '__main__':
for files in allfiles:
objects = parse_dota_poly(files)
filename = os.path.splitext(os.path.basename(files))[0]
b=pd.DataFrame(objects)
newpath = ('/home/ygx/dotatools/原始数据分析/csv文本/'+'%s.csv' %filename)
b.to_csv(newpath)