环境配置说明
- python库python3.6.7
JayDeBeApi github
- 环境变量设置
export LD_LIBRARY_PATH=/hadoop/hadoop-2.7.3/lib/native/:$LD_LIBRARY_PATH
export CLASSPATH=$CLASSPATH:/home/log-analyse/test/jdbc/*
- java jar包依赖
# hadoop==2.7.3,hive==2.1.1
commons-logging-1.2.jar
hive-jdbc-2.1.1.jar
httpclient-4.4.jar
slf4j-api-1.7.26.jar
hadoop-common-2.7.3.jar
hive-metastore-2.1.1.jar
httpcore-4.4.jar
hive-exec-2.1.1.jar
hive-service-2.1.1.jar
libthrift-0.12.0.jar
kerberos验证:
认证之后运行脚本
kinit -k -t /etc/krbX.keytab hadoop/xxx@EXAMPLE.COM
klist
kdestroy # 销毁会话,会话有有效时间
玩具代码供参考:
from concurrent import futures
import pysnooper
MAX_WORKERS = 5
@pysnooper.snoop()
def get_jdbc_connection():
from subprocess import Popen, PIPE
import jaydebeapi
import jpype
if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
jpype.attachThreadToJVM()
jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
DATABASE = 'default'
DRIVER = 'org.apache.hive.jdbc.HiveDriver'
SERVER = 'localhost'
KEYTAB = '/etc/krbX.keytab'
PRINCIPAL = 'hadoop/xxx@EXAMPLE.COM'
PORT = 10000
JARSPATH = ["/home/log-analyse/test/jdbc/hive-jdbc-2.1.1.jar"]
USERNAME = ''
PASSWORD = ''
# JDBC connection string
URL = "jdbc:hive2://" + SERVER + ":" + str(PORT) + "/"+ DATABASE +";principal=" + PRINCIPAL + ";"
# Connect to HiveServer2
conn = jaydebeapi.connect(DRIVER, URL, [USERNAME, PASSWORD], JARSPATH)
return conn
def mainAction():
conn = get_jdbc_connection()
cursor = conn.cursor()
# Execute SQL query
sql = "select * from log_test limit 10"
cursor.execute(sql)
results = cursor.fetchall()
cursor.close()
conn.close()
return results
if __name__ == '__main__':
with futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
to_do = executor.submit(mainAction)
for feture in futures.as_completed([to_do]):
res = feture.result()
msg = '{} result: {!r}'
print(msg.format(feture, res))