import codecs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import seaborn as sns
# from pandas_profiling import ProfileReport
#from numpy import *
import operator
from os import listdir
def readTXT(path):
#print(path)
with open(path, encoding='utf-8') as f:
text = f.read()
print(text)
def file2matrix(filename):
fr = open(filename)
classLabelVector = [] #prepare labels return
#fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip()
print(line)
listFromLine = line.split('\t')
print(listFromLine)
classLabelVector.append(int(listFromLine[0]))
index += 1
return returnMat,classLabelVector
mmp = {"Passerby":'0', "HumanBody":'0', "MotorVehicle":'1',
"NonMotorVehicle":{"Subtype":{"Motorcycle":'2', "Bicycle":'3', "Tricycle":'4', "Others":'5', "Unknow":'28'}},
"Package":{"Subtype":{"Handbag":'6', "Backpack":'6', "ShoulderBag":'6', "Suitcase":'7'}},
"Box":{"Material":{"Paper":'8', "Plastic":'8', "Metal":'8', "Others":'8'}},
"Bag":{"Material":{ "Paper":'9', "Plastic":'9', "Cloth":'9', "Others":'9'}},
'Book':'10', 'Bottle':'11','CellPhone':'12','Pillow':'13',
'Toybike':{"Subtype":{"Childrenbicycle":'14', "Electrictoycar":'15', "Others":'16', 'Childrenscooter':'37'}},
"Stroller":'17', "Whellchair":'18', "Handcart":'19',"RoadCone":'20',
'Tables':'21', 'Couch':'22', 'Television':'23',
'Computer':{"Subtype":{'Desktop':'24','Laptop':'24'}},
'MicrowaveOven':'25',"Refrigerator":'27',
'SafeHelmet':'29',
'Barrel':'30', 'FireExtinguisher':'31',
'Animal':'32', 'Head':'33', 'HeadShoulder':'34', # 动物 人头 头肩
'Clutter':'35', # 只用于交通抛撒
'Umbrella':{'None': '36', "Application":{"RainShade":'47',"SunShade":'48'}}, # 没有Subtype, 标36,
'GasTank':'38',
'Motorcycle_Basket': '39','Motorcycle_Wheel':'40','Motorcycle_Trunk':'41','Bicycle_Basket':'42','Bicycle_Wheel':'43','Cape':'44', # nonmoter new type
'DangerousGoods':{"Type":{"Sword":'45', "Stick":'46'}},
"Sundries": '49', "BillBoard":'50', "StreetHang":'51', #杂物堆 广告牌 沿街晾挂
"GarbageCan":{"Status":{"NotFull":'53',"Overflow":'52'}}, # 垃圾桶
"Broom":'54', "Freezer":'55', #扫帚 冰柜
"Stall":{"None": '56', "Modality":{"BasketStall":'57'}}, #如果属性为空则为'56':摊位,否则为'57':筐篓摊
}
f = codecs.open('/extraStore/secondStorePath/groupdata/basicAlgorithm/05.Article_Elevator/city/素材/label_list/AIcity_20210622.txt', mode='r', encoding='utf-8') # 打开txt文件,以"utf-8'编码读取
line = f.readline() # 以行的形式进行读取文件
list1 = []
while line:
a = line.split(";")
#b = a[2:3] # 这是选取需要读取的位数
b = a[1]
#print(b)
#b.replace("\n", "").replace("\r", "")
b = b[:-1] #quchu
#b.strip()
#print(b)
list1.append(b) # 将其添加在列表之中
line = f.readline()
f.close()
keyList = []
valuesList = []
#生成key为0-57的空字典
for i in range(58):
#i = str(i)
keyList.append(i)
valuesList.append(0)
resDict = dict(zip(keyList,valuesList))
print("原始resDict")
print(resDict)
#df_total = pd.DataFrame(columns=['label', 'x', 'y' ,'w', 'h'])
df_csvs = []
for i in list1: #每个标签文件的路径
#readTXT(i)
#returnMat,classLabelVector = file2matrix(i)
#print(classLabelVector)
#df_tmp=pd.read_csv(i,sep='\t', names=['label', 'x', 'y' ,'w', 'h'])
df_tmp=pd.read_csv(i,sep=' ', names=['label', 'x', 'y' ,'w', 'h'])
#不在原始数据上改
new_df_tmp = df_tmp.drop_duplicates(subset='label',keep='first',inplace=False )
#df_tmp.drop_duplicates(subset='label',keep='first',inplace=True )
#print(new_df_tmp)
#for row in new_df_tmp.itertuples():
#for row in new_df_tmp.index.tolist():
for row in new_df_tmp.iterrows():
#row = list(row)
#print(row[1])
if row[1]['label'] is np.nan:
resDict[int(row[1]['label'])] = resDict[int(row[1]['label'])] + 1
else:
continue
#resDict[int(row[1]['label'])] = resDict[int(row[1]['label'])] + 1
#print("新resDict")
#print(resDict)
df_csvs.append(new_df_tmp)
df_total = pd.concat(df_csvs)#合并成一个dataframe
totalRes=df_total.groupby('label').size()
print('totalRes:')
print(totalRes)
print('最后resDict:')
print(resDict)
#profile = ProfileReport(df_total)
#profile.to_file("my_report.html")
#print(df_total)
统计数据的脚本
最新推荐文章于 2021-12-22 18:44:31 发布