使用Python连接kerberos认证的Hive
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# hive util from CDH with kerberos
from impala.dbapi import connect
from krbcontext import krbcontext
import pandas as pd
from config.conf import HiveConfig
class HiveClient:
__instance = None
@classmethod
def instance(cls, *args, **kwargs):
if cls.__instance:
return cls.__instance
else:
try:
hive_conf = HiveConfig.instance()
with krbcontext(using_keytab=True, principal=hive_conf.principal, keytab_file=hive_conf.keytab):
conn = connect(host=hive_conf.host,
port=hive_conf.port,
auth_mechanism='GSSAPI',
kerberos_service_name='hive')
return cls(conn)
except Exception as e:
raise e
def __init__(self, conn):
self.conn = conn
# 执行
def execute(self, sql):
try:
cur = self.conn.cursor()
cur.execute(sql)
except Exception as err:
print(err)
raise err
# 查询
def query(self, sql):
cur = self.conn.cursor()
res = None
try:
cur.execute(sql)
res = cur.fetchall()
except Exception as err:
print("Phoenix 查询失败, %s" % err)
raise err
finally:
return res
# 查询输出DataFrame
def query_to_df(self, sql):
with self.conn.cursor() as cursor:
cursor.execute(sql)
columns = [col[0] for col in cursor.description]
records = [dict(zip(columns, row)) for row in cursor.fetchall()]
results = pd.DataFrame(records)
results.columns = columns
return results
def close(self):
self.conn.close()
if __name__ == '__main__':
hive_client = HiveClient.instance()
print(hive_client.query_to_df('SELECT * from table1 limit 10'))
print(hive_client.execute('SELECT * from table2 limit 10'))