python 使用yield进行数据或文件的流式处理

demo:重写某个文件下的所有分发文件,
要求:
1、时间路经中的空格改为“T”
2、所有日志内容的datetime字段带有6位毫秒,如果位数不足,可用0补齐
3、所有日志内容的datetime字段都为UTC时间,且带“Z”后缀

import os
import sys
import json
from datetime import datetime, timedelta

def read_lines(path):
    with open(path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                yield line

def read_objects(path):
    for line in read_lines(path):
        yield json.loads(line)

def write_lines(path,all_data):
    os.remove(path)
    for file_line in all_data:
        with open(path, 'a') as r:
            r.writelines(json.dumps(file_line) + "\n")

def overwirte(path):
    for root,dirs,files in os.walk(path):
        new_root = root.replace(" ", "T")
        os.rename(root, new_root)
        for file_name in files:
            file_path = os.path.join(new_root,file_name)
            if file_path.endswith('.log'):
                local_hour = file_path.split("/")[-2].split("T")[-1][:2]

                all_data = []
                for tran in read_objects(file_path):
                    if tran.get('sale'):
                        sale = tran['sale']
                        old_datetime = sale['datetime']
                        #是否为utc时间
                        log_hour = old_datetime.split("T")[-1][:2]
                        #是否与path相同
                        if old_datetime[-1] != 'Z':
                            if int(log_hour) + 8 > int(local_hour):
                                # log的时间是utc时间
                                timestamp = datetime.strptime(old_datetime, '%Y-%m-%dT%H:%M:%S.%f')
                                utc_timestamp = timestamp - timedelta(hours=8)
                                log_datetime = utc_timestamp.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
                            else:
                                timestamp = datetime.strptime(old_datetime, '%Y-%m-%dT%H:%M:%S.%f')
                                log_datetime = timestamp.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

                            new_datetime = log_datetime
                            sale['datetime'] = new_datetime
                        all_data.append(tran)
                write_lines(file_path,all_data)

if __name__ == '__main__':
    # path = '/data'
    if len(sys.argv) != 2:
        print("Usage: overwrite.py <log_file>")
        sys.exit(1)
    path = sys.argv[1]
    overwirte(path)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值