python中读取特定字符串,文件自然排序，单列时间前后时间差，批量读取csv文件

本文链接：https://blog.csdn.net/garra_/article/details/105239859

python3.6

1，读取特定字符串

字符串list，取特定字符，可以借助正则表达式，很方便，举例说明：

cli=['cm_00102_cont','cm_00102','cm_00103_cont',
      'cm_00103','cm_00104','cm_00104_cont']
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))

输出结果

[['cm', '00102'], ['cm', '00103'], ['cm', '00104']]

如果cli是bytes格式，而非str，则可以先将pytes转换为str后变成cli的str格式再进行。

clim=[b'cm_00102_cont',b'cm_00102',b'cm_00103_cont',
      b'cm_00103',b'cm_00104',b'cm_00104_cont']
cli=[]
cli=[str(s, encoding = "utf-8") for s in clim]
match = [x for x in cli if ('cm_' in x and '_cont' not in x)]
mats = [x.split('_') for x in match]
print((mats))

2，对文件夹下文件进行自然排序（文件有编号）

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

调用函数csvfilepath下所有文件：

 allfile_list=natural_sort(os.listdir(csvfilepath))

3，矩阵中时间错位，计算时间差,时间的格式为'%Y-%m-%d %H:%M:%S'，c_dett存放前后时间差，newtim存放以0开始的时间累计

def get_dettime(dataori):
    import datetime
    timev  = dataori
    c_dett =[0]
    newtim = [0]
    newtim1 = 0
    for i in range(len(timev)-1):
        restime1 = datetime.datetime.strptime(timev[i],'%Y-%m-%d %H:%M:%S')
        restime2 = datetime.datetime.strptime(timev[i+1],'%Y-%m-%d %H:%M:%S')
        c_dett.append(( restime2 - restime1).seconds)
        newtim2 = newtim1+ ( restime2 - restime1).seconds
        newtim1 = newtim2
        newtim.append(newtim2)
    return c_dett

newdfa['ntime']=[datetime.strptime(x,'%Y-%m-%d %H:%M:%S') for x in newdfa['time']]
    #转换时间，将字符时间转换为日期时间
sectim = newdfa['ntime']-newdfa['ntime'].iloc[0]
newdfa['timeseconds']=[(x.days*86400 +x.seconds) for x in sectim]

dettim1 = newdfa['ntime'].iloc[:-1].reset_index(drop=True)
dettim2 = newdfa['ntime'].iloc[1:].reset_index(drop=True)
det = [x.seconds for x in (dettim2-dettim1)]
det.insert(0,0)
newdfa['dettime']= det

4，批量读取csv文件

def get_csvdatas(csvfilepath):
    import os
    import re
    import pandas as pd
    # (re.split(r'[\_,.]',csv_list[0]))
    # (re.findall('\d+',csv_list[0]))[1]
    allfile_list=natural_sort(os.listdir(csvfilepath))
    csv_list = [x for x in allfile_list if x.endswith('.csv')]
    cs_data=[0 for row in range(len(csv_list))] 
    for csvid in range(len(csv_list)):
        if csv_list[csvid].endswith('.csv'):
            cs_data[csvid] = pd.read_csv(csvfilepath+csv_list[csvid])
            cs_data[csvid]['dettime']=get_dettime(cs_data[csvid]['time'])
    return cs_data