0630py

import os
import shutil
import time
from datetime import datetime
from zipfile import ZipFile
import pandas as pd

INPUT_PATH = 'tmp/test_data_multiple_run/input_files'
OUTPUT_PATH = 'tmp/test_data_multiple_run/output_files'
PROD_OUTPUT_PATH = 'tmp/test_data_multiple_run/output_prod_files'


def prepare_input_files(date_str):
    # 从指定日期的文件路径中准备输入文件
    pass


def rename_files_and_update_contents():
    current_date = datetime.now().strftime('%Y%m%d')
    for root, dirs, files in os.walk(INPUT_PATH):
        for file in files:
            old_path = os.path.join(root, file)
            if file.endswith('.zip'):
                with ZipFile(old_path, 'r') as zip_ref:
                    zip_ref.extractall(root)
                    for name in zip_ref.namelist():
                        new_name = name.replace('2024-06-01', current_date)
                        os.rename(os.path.join(root, name), os.path.join(root, new_name))
                new_zip_name = file.replace('2024-06-01', current_date)
                with ZipFile(os.path.join(root, new_zip_name), 'w') as zip_ref:
                    for name in zip_ref.namelist():
                        zip_ref.write(os.path.join(root, name), name)
                os.remove(old_path)
            else:
                new_path = old_path.replace('2024-06-01', current_date)
                os.rename(old_path, new_path)
                df = pd.read_csv(new_path)
                df['time'] = pd.to_datetime(df['time']).apply(lambda x: x.replace(year=datetime.now().year, month=datetime.now().month, day=datetime.now().day))
                df.to_csv(new_path, index=False)


def wait_for_minute():
    time.sleep(60)


def rename_output_files(date_str):
    current_date = datetime.now().strftime('%Y-%m-%d')
    for root, dirs, files in os.walk(OUTPUT_PATH):
        for file in files:
            old_path = os.path.join(root, file)
            new_path = old_path.replace(current_date, date_str)
            os.rename(old_path, new_path)


def compare_files(*fields):
    date_str = datetime.now().strftime('%Y-%m-%d')
    for root, dirs, files in os.walk(OUTPUT_PATH):
        for file in files:
            output_path = os.path.join(root, file)
            prod_output_path = os.path.join(PROD_OUTPUT_PATH, file)
            if os.path.exists(prod_output_path):
                output_df = pd.read_csv(output_path)
                prod_output_df = pd.read_csv(prod_output_path)
                for field in fields:
                    if not output_df[field].equals(prod_output_df[field]):
                        print(f"Difference found in field {field} for file {file}")
 

  • 5
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值