使用的是impala
包。
如何安装impala
:https://blog.csdn.net/weixin_43713105/article/details/111921221
def read_sql(sql):
'''
读取hive的sql,二维列表
参数:
sql:sql语句
返回:
DataFrame
'''
from impala.dbapi import connect
import pandas as pd
import time
start = time.time() # 开始时间
host_name = '主机地址'
port = 10000
user = '用户名'
password = '密码'
conn = connect(host=host_name, port=port, auth_mechanism='PLAIN', user=user, password=password)
cur = conn.cursor()
cur.execute(sql) # 执行查询语句
sql_data = cur.fetchall() # 获取查询结果:二维列表
end = time.time() # 结束时间
print('耗时%s秒'%(end - start)) # 计算耗时
columns = [i[0] for i in cur.description] # dataframe的列名
sql_data = pd.DataFrame(sql_data, columns=columns) # 将二维列表转为dataframe,并设置列名
cur.close()
conn.close()
return sql_data # 返回DataFrame
方法二(没试过,看起来很简单)
import pandas as pd
from sqlalchemy import create_engine
sql = """
SELECT
*
FROM ...
"""
egine = create_engine('mysql+pymysql://root:123456@localhost:3306/{0}?charset=utf8'.format(db))
df = pd.read_sql(sql.engine)