- 说明:因为impala使用的源数据是hive的源数据,故impala和hive操作源数据同时基于HiveServer2服务完成,在代码操作层面二者之间没有太大的区别
- Python连接impala配置相关见:Python连接impala相关配置_MY测试之道的博客-CSDN博客
- Python操作hive工具类见:Python使用pyhive操作hive工具类_MY测试之道的博客-CSDN博客
1、Python操作impala工具类代码
"""
impala数据库相关操作
"""
from impala.dbapi import connect
class ImpalaUtils():
def __init__(self, host, port=10000, user=None, password=None, database=None, auth_mechanism=None):
try:
self.conn = connect(host=host,port=port, user=user, password=password, database=database, auth_mechanism=auth_mechanism)
self.cur = self.conn.cursor()
print("初始化连接impala数据库成功!")
except Exception as e:
raise f"初始化连接impala失败,失败原因: {e}"
def execute_sql(self, sql: str):
"""执行SQL语句"""
if not isinstance(sql, str):
raise ValueError("参数:sql 数据类型错误!应是str类型")
try:
results = None
self.cur.execute(sql)
if "show" in sql or "select" in sql:
results = self.cur.fetchall()
self.conn.commit()
print("SQL语句执行成功!")
return results
except Exception as e:
raise f"SQL语句执行失败!,失败原因: {e}"
def close(self):
"""关闭游标和impala连接"""
self.cur.close()
self.conn.close()