在python中连接hive和impala有很多中方式,有pyhive,impyla,pyspark,ibis等等,本篇我们就逐一介绍如何使用这些包连接hive或impala,以及如何通过kerberos认证。
Kerberos
如果集群没开启kerberos认证则不需要这里的代码,或者在系统环境内通过kinit命令认证也不需要这部分的代码。
krbcontext.context_shell
# -*- coding: utf-8 -*- __all__ = [ 'krbcontext', 'KRB5KinitError', ] import os, sys# import pwdimport subprocess from contextlib import contextmanager class KRB5KinitError(Exception): pass # def get_login():# ''' Get current effective user name '''## return pwd.getpwuid(os.getuid()).pw_name def init_ccache_as_regular_user(principal=None, ccache_file=None): '''Initialize credential cache as a regular user Return the filename of newly initialized credential cache ''' if not sys.stdin.isatty(): raise IOError('This is not running on console. So, you need to run kinit ' 'with your principal manually before anything goes.') cmd = 'kinit %(ccache_file)s %(principal)s' args = {} args['principal'] = principal args['ccache_file'] = '-c %s' % ccache_file kinit_proc = subprocess.Popen( (cmd % args).split(), stderr=subprocess.PIPE) stdout_data, stderr_data = kinit_proc.communicate() if kinit_proc.returncode > 0: raise KRB5KinitError(stderr_data) return ccache_file def init_ccache_with_keytab(principal, keytab_file, ccache_file): '''Initialize credential cache using keytab file Return the filename of newly initialized credential cache ''' cmd = 'kinit -kt %(keytab_file)s -c %(ccache_file)s %(principal)s' args = {} args['principal'] = principal args['ccache_file'] = ccache_file args['keytab_file'] = keytab_file kinit_proc = subprocess.Popen( (cmd % args).split(), stderr=subprocess.PIPE) stdout_data, stderr_data = kinit_proc.communicate() if kinit_proc.returncode > 0: raise KRB5KinitError(stderr_data) return ccache_file @contextmanagerdef krbcontext(using_keytab=False, **kwargs): '''A context manager for Kerberos-related actions using_keytab: specify to use Keytab file in Kerberos context if True,