Feature: File processing Scenario Outline: Process and validate input files Given the input files are available When I rename and update the input files with today's date for <iteration> And I wait for 10 minutes Then I compare the QA and production output files for <iteration> And I verify the outputs are identical for <iteration> And I clean up the QA output files Examples: | iteration | a1_path | a2_path | a3_zip_path | | 1 | ../tmp/inputdata/1_a1.csv | ../tmp/inputdata/1_a2.csv | ../tmp/inputdata/1_a3.zip | | 2 | ../tmp/inputdata/2_a1.csv | ../tmp/inputdata/2_a2.csv | ../tmp/inputdata/2_a3.zip | | 3 | ../tmp/inputdata/3_a1.csv | ../tmp/inputdata/3_a2.csv | ../tmp/inputdata/3_a3.zip |
import os import csv import zipfile import time from datetime import datetime from shutil import move import pytest from pytest_bdd import scenarios, given, when, then, parsers scenarios('../features/file_processing.feature') class FileProcessor: def __init__(self, output_qa_path, output_prod_path): self.output_qa_path = output_qa_path self.output_prod_path = output_prod_path self.today_str = datetime.now().strftime("%Y%m%d") def update_csv_date(self, file_path, date_column): with open(file_path, mode='r', newline='', encoding='utf-8') as file: reader = csv.DictReader(file) rows = list(reader) fieldnames = reader.fieldnames if date_column not in fieldnames: raise ValueError(f"The CSV file {file_path} does not contain a '{date_column}' field.") for row in rows: timestamp = datetime.strptime(row[date_column], "%Y-%m-%d %H:%M:%S") new_timestamp = timestamp.replace(year=int(self.today_str[:4]), month=int(self.today_str[4:6]), day=int(self.today_str[6:8])) row[date_column] = new_timestamp.strftime("%Y-%m-%d %H:%M:%S") with open(file_path, mode='w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) def process_zip_file(self, zip_path, date_column): with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall('tmp') extracted_csv_path = os.path.join('tmp', 'opt', os.path.basename(zip_path).replace('.zip', '.csv')) self.update_csv_date(extracted_csv_path, date_column) new_zip_path = os.path.join(os.path.dirname(zip_path), self.today_str + '_a3.zip') with zipfile.ZipFile(new_zip_path, 'w') as zip_ref: zip_ref.write(extracted_csv_path, arcname=os.path.join('opt', os.path.basename(extracted_csv_path))) os.remove(extracted_csv_path) return new_zip_path def rename_and_process_files(self, iteration, input_files): new_files = [] for file_path in input_files: if file_path.endswith('.zip'): new_file_path = self.process_zip_file(file_path, 'update time') else: new_file_path = os.path.join(os.path.dirname(file_path), self.today_str + '_' + os.path.basename(file_path).split('_')[-1]) move(file_path, new_file_path) date_column = 'time' if 'a1' in new_file_path else 'timestamp' self.update_csv_date(new_file_path, date_column) new_files.append(new_file_path) self.compare_files(iteration) self.cleanup_output_qa() def compare_files(self, iteration): qa_file = os.path.join(self.output_qa_path, self.today_str + '_b.csv') prod_file = os.path.join(self.output_prod_path, f"{iteration}_b.csv") with open(qa_file, mode='r', newline='', encoding='utf-8') as file1, open(prod_file, mode='r', newline='', encoding='utf-8') as file2: reader1 = csv.reader(file1) reader2 = csv.reader(file2) for row1, row2 in zip(reader1, reader2): if row1 != row2: print(f"Difference found in iteration {iteration}") print(f"Row in QA: {row1}") print(f"Row in Prod: {row2}") return print(f"No differences found in iteration {iteration}") def cleanup_output_qa(self): qa_file = os.path.join(self.output_qa_path, self.today_str + '_b.csv') if os.path.exists(qa_file): os.remove(qa_file) @pytest.fixture def file_processor(): output_qa_path = r'..\tmp\output_qa' output_prod_path = r'..\tmp\output_prod' return FileProcessor(output_qa_path, output_prod_path) @given("the input files are available") def input_files_available(): pass @when(parsers.parse("I rename and update the input files with today's date for <iteration>")) def rename_and_update_files(file_processor, iteration, request): input_paths = [ request.getfixturevalue("a1_path"), request.getfixturevalue("a2_path"), request.getfixturevalue("a3_zip_path") ] file_processor.rename_and_process_files(iteration, input_paths) @when("I wait for 10 minutes") def wait_ten_minutes(): time.sleep(600) @then(parsers.parse("I compare the QA and production output files for <iteration>")) def compare_output_files(file_processor, iteration): file_processor.compare_files(iteration) @then(parsers.parse("I verify the outputs are identical for <iteration>")) def verify_outputs_identical(): pass @then("I clean up the QA output files") def cleanup_qa_files(file_processor): file_processor.cleanup_output_qa()
import pytest from backend.test.steps.file_processing_steps import file_processor @pytest.fixture(scope="module") def file_processor(): output_qa_path = r'..\tmp\output_qa' output_prod_path = r'..\tmp\output_prod' return FileProcessor(output_qa_path, output_prod_path)
import os import csv import zipfile import time from datetime import datetime from shutil import move import pytest from pytest_bdd import scenarios, given, when, then, parsers scenarios('../features/file_processing.feature') class FileProcessor: def __init__(self, output_qa_path, output_prod_path): self.output_qa_path = output_qa_path self.output_prod_path = output_prod_path self.today_str = datetime.now().strftime("%Y%m%d") def update_csv_date(self, file_path, date_column): with open(file_path, mode='r', newline='', encoding='utf-8') as file: reader = csv.DictReader(file) rows = list(reader) fieldnames = reader.fieldnames if date_column not in fieldnames: raise ValueError(f"The CSV file {file_path} does not contain a '{date_column}' field.") for row in rows: timestamp = datetime.strptime(row[date_column], "%Y-%m-%d %H:%M:%S") new_timestamp = timestamp.replace(year=int(self.today_str[:4]), month=int(self.today_str[4:6]), day=int(self.today_str[6:8])) row[date_column] = new_timestamp.strftime("%Y-%m-%d %H:%M:%S") with open(file_path, mode='w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) def process_zip_file(self, zip_path, date_column): with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall('tmp') extracted_csv_path = os.path.join('tmp', 'opt', os.path.basename(zip_path).replace('.zip', '.csv')) self.update_csv_date(extracted_csv_path, date_column) new_zip_path = os.path.join(os.path.dirname(zip_path), self.today_str + '_a3.zip') with zipfile.ZipFile(new_zip_path, 'w') as zip_ref: zip_ref.write(extracted_csv_path, arcname=os.path.join('opt', os.path.basename(extracted_csv_path))) os.remove(extracted_csv_path) return new_zip_path def rename_and_process_files(self, iteration, input_files): new_files = [] for file_path in input_files: if file_path.endswith('.zip'): new_file_path = self.process_zip_file(file_path, 'update time') else: new_file_path = os.path.join(os.path.dirname(file_path), self.today_str + '_' + os.path.basename(file_path).split('_')[-1]) move(file_path, new_file_path) date_column = 'time' if 'a1' in new_file_path else 'timestamp' self.update_csv_date(new_file_path, date_column) new_files.append(new_file_path) self.compare_files(iteration) self.cleanup_output_qa() def compare_files(self, iteration): qa_file = os.path.join(self.output_qa_path, self.today_str + '_b.csv') prod_file = os.path.join(self.output_prod_path, f"{iteration}_b.csv") with open(qa_file, mode='r', newline='', encoding='utf-8') as file1, open(prod_file, mode='r', newline='', encoding='utf-8') as file2: reader1 = csv.reader(file1) reader2 = csv.reader(file2) for row1, row2 in zip(reader1, reader2): if row1 != row2: print(f"Difference found in iteration {iteration}") print(f"Row in QA: {row1}") print(f"Row in Prod: {row2}") return print(f"No differences found in iteration {iteration}") def cleanup_output_qa(self): qa_file = os.path.join(self.output_qa_path, self.today_str + '_b.csv') if os.path.exists(qa_file): os.remove(qa_file) @pytest.fixture def file_processor(): output_qa_path = r'..\tmp\output_qa' output_prod_path = r'..\tmp\output_prod' return FileProcessor(output_qa_path, output_prod_path) @given("the input files are available") def input_files_available(): pass @when(parsers.parse("I rename and update the input files with today's date for <iteration>")) def rename_and_update_files(file_processor, iteration, request): input_paths = [ request.getfixturevalue("a1_path"), request.getfixturevalue("a2_path"), request.getfixturevalue("a3_zip_path") ] file_processor.rename_and_process_files(iteration, input_paths) @when("I wait for 10 minutes") def wait_ten_minutes(): time.sleep(600) @then(parsers.parse("I compare the QA and production output files for <iteration>")) def compare_output_files(file_processor, iteration): file_processor.compare_files(iteration) @then(parsers.parse("I verify the outputs are identical for <iteration>")) def verify_outputs_identical(): pass @then("I clean up the QA output files") def cleanup_qa_files(file_processor): file_processor.cleanup_output_qa()
【无标题】123
最新推荐文章于 2024-11-17 20:38:28 发布