统计数据的脚本

最新推荐文章于 2021-12-22 18:44:31 发布

nealys

最新推荐文章于 2021-12-22 18:44:31 发布

阅读量363

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/nealys/article/details/118486887

版权

Python 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import codecs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import seaborn as sns	
# from pandas_profiling import ProfileReport
#from numpy import *
import operator
from os import listdir

def readTXT(path):
    #print(path)
    with open(path, encoding='utf-8') as f:
        text = f.read()
        print(text)
        
        
        
def file2matrix(filename):
    fr = open(filename)
    classLabelVector = []                       #prepare labels return   
    #fr = open(filename)
    index = 0
    for line in fr.readlines():
        line = line.strip()
        print(line)
        listFromLine = line.split('\t')
        print(listFromLine)
        classLabelVector.append(int(listFromLine[0]))
        index += 1
    return returnMat,classLabelVector        
        
        
mmp = {"Passerby":'0', "HumanBody":'0', "MotorVehicle":'1',
        "NonMotorVehicle":{"Subtype":{"Motorcycle":'2', "Bicycle":'3', "Tricycle":'4', "Others":'5', "Unknow":'28'}}, 
        "Package":{"Subtype":{"Handbag":'6', "Backpack":'6', "ShoulderBag":'6', "Suitcase":'7'}}, 
        "Box":{"Material":{"Paper":'8', "Plastic":'8', "Metal":'8', "Others":'8'}}, 
        "Bag":{"Material":{ "Paper":'9', "Plastic":'9', "Cloth":'9', "Others":'9'}},
        'Book':'10', 'Bottle':'11','CellPhone':'12','Pillow':'13',
        'Toybike':{"Subtype":{"Childrenbicycle":'14', "Electrictoycar":'15', "Others":'16', 'Childrenscooter':'37'}},
        "Stroller":'17', "Whellchair":'18', "Handcart":'19',"RoadCone":'20',

        'Tables':'21', 'Couch':'22', 'Television':'23', 
        'Computer':{"Subtype":{'Desktop':'24','Laptop':'24'}},
        'MicrowaveOven':'25',"Refrigerator":'27',
        'SafeHelmet':'29',
        'Barrel':'30', 'FireExtinguisher':'31',
        'Animal':'32', 'Head':'33', 'HeadShoulder':'34',  # 动物 人头 头肩
        'Clutter':'35',  # 只用于交通抛撒
        'Umbrella':{'None': '36', "Application":{"RainShade":'47',"SunShade":'48'}},  # 没有Subtype， 标36，
        'GasTank':'38',
        'Motorcycle_Basket': '39','Motorcycle_Wheel':'40','Motorcycle_Trunk':'41','Bicycle_Basket':'42','Bicycle_Wheel':'43','Cape':'44', # nonmoter new type
        'DangerousGoods':{"Type":{"Sword":'45', "Stick":'46'}},
        "Sundries": '49', "BillBoard":'50', "StreetHang":'51',  #杂物堆   广告牌  沿街晾挂
        "GarbageCan":{"Status":{"NotFull":'53',"Overflow":'52'}}, # 垃圾桶
        "Broom":'54', "Freezer":'55',  #扫帚 冰柜   
        "Stall":{"None": '56', "Modality":{"BasketStall":'57'}},  #如果属性为空则为'56':摊位，否则为'57':筐篓摊  
        }

f = codecs.open('/extraStore/secondStorePath/groupdata/basicAlgorithm/05.Article_Elevator/city/素材/label_list/AIcity_20210622.txt', mode='r', encoding='utf-8') # 打开txt文件，以"utf-8'编码读取
line = f.readline() # 以行的形式进行读取文件
list1 = []
while line:

    a = line.split(";")
    #b = a[2:3] # 这是选取需要读取的位数
    b = a[1]
    #print(b)
    #b.replace("\n", "").replace("\r", "")
    b = b[:-1]  #quchu 
    #b.strip()
    #print(b)
    list1.append(b) # 将其添加在列表之中
    line = f.readline()

f.close()

keyList = []
valuesList = []
#生成key为0-57的空字典
for i in range(58):
    #i = str(i)
    keyList.append(i)
    valuesList.append(0)
resDict = dict(zip(keyList,valuesList))
print("原始resDict")
print(resDict)

#df_total = pd.DataFrame(columns=['label', 'x', 'y' ,'w', 'h'])
df_csvs = []
for i in list1: #每个标签文件的路径
    #readTXT(i)
    
    #returnMat,classLabelVector = file2matrix(i)
    #print(classLabelVector)
    
    #df_tmp=pd.read_csv(i,sep='\t', names=['label', 'x', 'y' ,'w', 'h'])
    df_tmp=pd.read_csv(i,sep=' ', names=['label', 'x', 'y' ,'w', 'h'])
    
    #不在原始数据上改
    new_df_tmp = df_tmp.drop_duplicates(subset='label',keep='first',inplace=False )
    #df_tmp.drop_duplicates(subset='label',keep='first',inplace=True )
    #print(new_df_tmp)
    #for row in new_df_tmp.itertuples():
    #for row in new_df_tmp.index.tolist(): 
    for row in new_df_tmp.iterrows():
        #row = list(row)
        
        #print(row[1])
        if row[1]['label'] is np.nan:
            resDict[int(row[1]['label'])] = resDict[int(row[1]['label'])] + 1
        else:
            continue
        #resDict[int(row[1]['label'])] = resDict[int(row[1]['label'])] + 1
        
    #print("新resDict")
    #print(resDict)
    df_csvs.append(new_df_tmp)
    
    
    
df_total = pd.concat(df_csvs)#合并成一个dataframe
totalRes=df_total.groupby('label').size()
print('totalRes：')
print(totalRes)

print('最后resDict：')
print(resDict)

#profile = ProfileReport(df_total)
#profile.to_file("my_report.html")
#print(df_total)

nealys

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
统计数据的脚本

import codecsimport numpy as npimport pandas as pdimport matplotlib.pyplot as plt# import seaborn as sns # from pandas_profiling import ProfileReport#from numpy import *import operatorfrom os import listdirdef readTXT(path): #print(path) .
复制链接

扫一扫