有两种方式第一种通过调用boto3使用,第二种pyathenajdbc使用,相对而言两种方式各有优劣。
第一种
import boto3
import pandas as pd
import io
import time
athena = boto3.client('athena', region_name='ap-southeast-1')
s3 = boto3.client('s3')
def get_data_by_boto3(sql):
bucket='bigdata-athena'
print(sql)
s3_output="s3://bigdata-athena/staging/parquet/mysql/"
response = athena.start_query_execution(
QueryString=sql,
QueryExecutionContext={
'Database': 'systemlogs'
},
ResultConfiguration={
'OutputLocation': s3_output
}
)
print(response)
file_name = response['QueryExecutionId'] + '.csv'
file_name = response['QueryExecutionId'] + '.txt'
key="staging/parquet/mysql/"+file_name
print(key)
obj = None
for i in range(2):