HiveServer2为客户端在远程执行hive查询提供了接口,通过Thrift RPC来实现,还提供了多用户并发和认证功能。目前使用python的用户可以通过pyhs2这个模块来连接HiveServer2,实现查询和取回结果的操作。
hive_client.py
#!/usr/bin/env python
#coding:utf-8
import pyhs2
'''
hive client
'''
class HiveClient:
'''
docstring for HiveClient
'''
def __init__(self, db_host, user, password, database, port=10000, authMechanism="PLAIN"):
self.conn = pyhs2.connect(host=db_host,
port=port,
authMechanism=authMechanism,
user=user,
password=password,
database=database
)
def query(self,sql):
'''
query
'''
with self.conn.cursor() as cursor:
cursor.execute(sql)
return cursor.fetch()
def queryNoResult(self,sql):
'''
query no result
'''
with self.conn.cursor() as cursor:
cursor.execute(sql)
def close(self):
'''
close connection
'''
self.conn.close()
hive_conf.py
#!/usr/bin/env python
#coding:utf-8
import sys
import os
class HiveConnAttribute:
def getHost(self):
return '**'
def getUser(self):
return '**'
def getPasswd(self):
return '**'
def getDatabase(self):
return '**'
def getPort(self):
return 10000
def getAuthMechanism(self):
return 'PLAIN'
query.py
#!/usr/bin/env python
#coding:utf-8
from hive_client import HiveClient
from hive_conf import HiveConnAttribute
if __name__ == '__main__':
hiveConn = HiveConnAttribute()
hiveHost = hiveConn.getHost()
hiveUser = hiveConn.getUser()
hivePasswd = hiveConn.getPasswd()
hiveDatabase = hiveConn.getDatabase()
hivePort = hiveConn.getPort()
hiveAuthMechanism = hiveConn.getAuthMechanism()
hive_client = HiveClient(db_host = hiveHost, port = hivePort, user = hiveUser, password = hivePasswd,
database = hiveDatabase, authMechanism=hiveAuthMechanism)
hql = '''
.........
'''
hive_client.query(hql)
hive_client.queryNoResult(hql)
hive_client.close()