在python中连接hive和impala有很多中方式,有pyhive,impyla,pyspark,ibis等等,本篇我们就逐一介绍如何使用这些包连接hive或impala,以及如何通过kerberos认证。
Kerberos
如果集群没开启kerberos认证则不需要这里的代码,或者在系统环境内通过kinit命令认证也不需要这部分的代码。
krbcontext.context_shell
# -*- coding: utf-8 -*-
__all__ = [ 'krbcontext', 'KRB5KinitError', ]
import os, sys
# import pwd
import subprocess
from contextlib import contextmanager
class KRB5KinitError(Exception):
pass
# def get_login():
# ''' Get current effective user name '''
#
# return pwd.getpwuid(os.getuid()).pw_name
def init_ccache_as_regular_user(principal=None, ccache_file=None):
'''Initialize credential cache as a regular user
Return the filename of newly initialized credential cache
'''
if not sys.stdin.isatty():
raise IOError('This is not running on console. So, you need to run kinit '
'with your principal manually before anything goes.')
cmd = 'kinit %(ccache_file)s %(principal)s'
args = {}
args['principal'] = principal
args['ccache_file'] = '-c %s' % ccache_file
kinit_proc = subprocess.Popen(
(cmd % args).split(),
stderr=subprocess.PIPE)
stdout_data, stderr_data = kinit_proc.communicate()
if kinit_proc.returncode > 0:
raise KRB5KinitError(stderr_data)
return ccache_file
def init_ccache_with_keytab(principal, keytab_file, ccache_file):
'''Initialize credential cache using keytab file
Return the filename of newly initialized credential cache
'''
cmd = 'kinit -kt %(keytab_file)s -c %(ccache_file)s %(principal)s'
args = {}
args['principal'] = principal
args['ccache_file'] = ccache_file
args['keytab_file'] = keytab_file
kinit_proc = subprocess.Popen(
(cmd % args).split(),
stderr=subprocess.PIPE)
stdout_data, stderr_data = kinit_proc.communicate()
if kinit_proc.returncode > 0:
raise KRB5KinitError(stderr_data)
return ccache_file
@contextmanager
def krbcontext(using_keytab=False, **kwargs):
'''A context manager for Kerberos-related actions
using_keytab: specify to use Keytab file in Kerberos context if True,
or be as a regular user.
kwargs: contains the nec