#Import Modulesfromdatetimeimportdatetime,timedeltafromairflowimportDAGfromairflow.hooks.base_hookimportBaseHookfromairflow.modelsimportVariablefromairflow.operators.python_operatorimportPythonOperatorfromairflow.hooks.postgres_hookimportPostgresHookfromsshtunnelimportSSHTunnelForwarder,create_loggerfromioimportStringIOimportloggingimportparamikoimportMySQLdbassqlimportpsycopg2importpsycopg2.extensionsfrompsycopg2.extrasimportRealDictCursorimportpsycopg2.extras
my_master_table='schema1'database_name='db1'schema_name='schema1'table_name='table1'#Get the record of the select querydefget_etl():pg_hook=PostgresHook(postgre_conn_id="postgres_default",schema='schema1')connection=pg_hook.get_conn()col_query="""
select * from {0}
Where database_name = '{1}'
and schema_name = '{2}'
and table_name = '{3}' ;
""".format(my_master_table,database_name,schema_name,table_name)cursor=connection.cursor()cursor.execute(col_query)desc=cursor.description
column_names=[col[0]forcolindesc]data=[dict(zip(column_names,row))forrowincursor.fetchall()]connection.commit()connection.close()returndatadefcopy_into_redshift(**kwargs):data=get_etl(kwargs['my_master_table'],kwargs['database_name'],kwargs['schema_name'],kwargs['table_name'])copy_sql='''COPY schema1.{0}
FROM 'bucket_name'
iam_role 'aws role'
REGION 'us-east-1' ESCAPE DELIMITER ',' ACCEPTANYDATE;'''.format(record['table_name'])pg_hook=PostgresHook(postgre_conn_id="postgres_default",schema='schema1')connection=pg_hook.get_conn()cursor=connection.cursor()cursor.execute(copy_sql)connection.commit()connection.close()# Identify Default Argumentsdefault_args={'owner':'mae','depends_on_past':False,'start_date':datetime(2019,12,15),}#Instantiate DAG instancedag=DAG('testing_1',description='This is a test ',default_args=default_args,schedule_interval='@hourly',catchup=False)t1=PythonOperator(task_id='new_oneee',python_callable=get_etl,dag=dag)t2=PythonOperator(task_id='s3_update',python_callable=copy_into_redshift,depends_on_past=True,dag=dag)t1>>t2