import os
import csv
import zipfile
import time
from datetime import datetime
from shutil import move
class FileProcessor:
BASE_PATH = os.path.abspath(os.path.dirname(__file__))
INPUT_PATH = os.path.join(BASE_PATH, '../../tmp/inputdata')
OUTPUT_QA_PATH = os.path.join(BASE_PATH, '../../tmp/output_qa')
OUTPUT_PROD_PATH = os.path.join(BASE_PATH, '../../tmp/output_prod')
def __init__(self):
self.today_str = datetime.now().strftime("%Y%m%d")
def update_csv_date(self, file_path, date_column):
with open(file_path, mode='r', newline='', encoding='utf-8') as file:
reader = csv.DictReader(file)
rows = list(reader)
fieldnames = reader.fieldnames
if date_column not in fieldnames:
raise ValueError(f"The CSV file {file_path} does not contain a '{date_column}' field.")
for row in rows:
timestamp = datetime.strptime(row[date_column], "%Y-%m-%d %H:%M:%S")
new_timestamp = timestamp.replace(year=int(self.today_str[:4]), month=int(self.today_str[4:6]), day=int(self.today_str[6:8]))
row[date_column] = new_timestamp.strftime("%Y-%m-%d %H:%M:%S")
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def process_zip_file(self, zip_path, date_column):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall('tmp')
extracted_csv_path = os.path.join('tmp', 'opt', os.path.basename(zip_path).replace('.zip', '.csv'))
self.update_csv_date(extracted_csv_path, date_column)
new_zip_path = os.path.join(os.path.dirname(zip_path), self.today_str + '_a3.zip')
with zipfile.ZipFile(new_zip_path, 'w') as zip_ref:
zip_ref.write(extracted_csv_path, arcname=os.path.join('opt', os.path.basename(extracted_csv_path)))
os.remove(extracted_csv_path)
return new_zip_path
def rename_and_process_files(self, input_files):
new_files = []
for file_path in input_files:
if file_path.endswith('.zip'):
new_file_path = self.process_zip_file(file_path, 'update time')
else:
new_file_path = os.path.join(os.path.dirname(file_path), self.today_str + '_' + os.path.basename(file_path).split('_')[-1])
move(file_path, new_file_path)
date_column = 'time' if 'a1' in new_file_path else 'timestamp'
self.update_csv_date(new_file_path, date_column)
new_files.append(new_file_path)
return new_files
def compare_files(self, qa_file, prod_file):
with open(qa_file, mode='r', newline='', encoding='utf-8') as file1, open(prod_file, mode='r', newline='', encoding='utf-8') as file2:
reader1 = csv.reader(file1)
reader2 = csv.reader(file2)
for row1, row2 in zip(reader1, reader2):
if row1 != row2:
print(f"Difference found in files")
print(f"Row in QA: {row1}")
print(f"Row in Prod: {row2}")
return False
print(f"No differences found in files")
return True
def cleanup_output_qa(self):
for file_name in os.listdir(FileProcessor.OUTPUT_QA_PATH):
if file_name.endswith('.csv'):
os.remove(os.path.join(FileProcessor.OUTPUT_QA_PATH, file_name))
import os
import time
import pytest
from pytest_bdd import scenarios, given, when, then, parsers
from backend.test.steps.base import FileProcessor
scenarios('../features/file_processing.feature')
file_processor = FileProcessor()
@given(parsers.parse('the input files {input_a1}, {input_a2}, and {input_a3} are available for iteration {iteration:d}'))
def input_files_available(input_a1, input_a2, input_a3, iteration):
global input_files, prod_output
input_files = [
os.path.join(FileProcessor.INPUT_PATH, input_a1),
os.path.join(FileProcessor.INPUT_PATH, input_a2),
os.path.join(FileProcessor.INPUT_PATH, input_a3),
]
prod_output = os.path.join(FileProcessor.OUTPUT_PROD_PATH, f"{iteration}_b.csv")
@when(parsers.parse('I rename and update the input files with today\'s date for iteration {iteration:d}'))
def rename_and_update_files():
file_processor.rename_and_process_files(input_files)
@when('I wait for manual upload')
def wait_for_manual_upload():
time.sleep(600)
@then(parsers.parse('I rename the input files to reflect execution count {iteration:d}'))
def rename_input_files_to_execution_count(iteration):
for i, file_path in enumerate(input_files):
base_name = f"{iteration}_a{i+1}.csv" if not file_path.endswith('.zip') else f"{iteration}_a{i+1}.zip"
new_file_path = os.path.join(os.path.dirname(file_path), base_name)
os.rename(file_path, new_file_path)
@then(parsers.parse('I compare the QA and production output files with {prod_output} for iteration {iteration:d}'))
def compare_files():
qa_file = os.path.join(FileProcessor.OUTPUT_QA_PATH, file_processor.today_str + '_b.csv')
comparison_result = file_processor.compare_files(qa_file, prod_output)
assert comparison_result, f"Output comparison failed for iteration"
@then(parsers.parse('I verify the outputs are identical for iteration {iteration:d}'))
def verify_outputs_are_identical():
pass
@then('I clean up the QA output files')
def clean_up_qa_output_files():
file_processor.cleanup_output_qa()