通过python连接presto拉取数据库数据,突然有一天查询的sql要嵌入可变参数了,折腾了一下,发现原来是参数没处理成SQL能解读的形式。
现有本地dataframe数据如下:
我想要将id列的值传入SQL中,作为筛选条件取出其他数据。 id列共有9206个值:
先将id列的数值转成list,再把它处理成str类型,但样子还是list的样子,如下:
处理代码为:
sale_code=test['id'].values.tolist()
sale_code1="'" + "','".join(sale_code) + "'"
接下来就是sql传参了,核心代码:
sql = f"""xxxx({参数名})"""
案例代码:
# 传入可变参数
from dccdb import Presto
_presto = Presto()
data2=_presto.prestoSelect(
sql=f"""select all_count,l_sale_name,l_sale_code
from huting_dm.csr_web_policy_extend
where l_sale_code in ({sale_code1})
group by l_sale_name,l_sale_code
""",
username = '******',
withcolumns=0)
y=list(data2)
df2=pd.DataFrame(y[1])
传参取出数据如下:
其他:
针对presto链接字符串的语法,是我们大数据同事封装了该包 from dccdb import Presto,所以直接拿来就用,问了下具体的封装代码如下,供参考:
class Presto:
try:
from pyhive import presto
except Exception as err_msg:
print (err_msg)
def __init__(self):
self._logbase = DccLog()
self._configfile = ConfigFile()
self.__config = self._configfile.get_all_config_file()
def _presto_connect(self,host,port,username):
'''
presto connect
:return:
'''
return self.presto.connect(host = host,port = port,username = username)
def prestoTableColumns(self,table,username):
# connect sqlite
try:
cursor = self._presto_connect(host = self.__config['presto']['coordinator'], port = self.__config['presto']['port'],username = username).cursor()
#cursor = self.presto.connect(self.__config['presto']['coordinator'], self.__config['presto']['port']).cursor()
except Exception as e:
self._logbase.info('presto connection failure:%s - %s' % (Exception, e),func = sys._getframe().f_code.co_name)
self._logbase.raiseError(sys._getframe().f_code.co_name, e, traceback.format_exc(), config=self.__config, table=table)
# select data
try:
sql_stmt = '''select * from {} where 1=2 '''.format(table)
cursor.execute(sql_stmt)
columns_list = [tuple[0] for tuple in cursor.description]
return columns_list
except Exception as e:
self._logbase.info('presto select failure:%s - %s' % (Exception, e),func = sys._getframe().f_code.co_name)
self._logbase.raiseError(sys._getframe().f_code.co_name, e, traceback.format_exc(),table=table)
finally:
cursor.close()
def prestoTableColumnsType(self,table,username):
# connect sqlite
try:
cursor = self._presto_connect(host=self.__config['presto']['coordinator'], port=self.__config['presto']['port'], username = username).cursor()
#cursor = self.presto.connect(self.__config['presto']['coordinator'], self.__config['presto']['port']).cursor()
except Exception as e:
self._logbase.info('presto connection failure:%s - %s' % (Exception, e),func = sys._getframe().f_code.co_name)
self._logbase.raiseError(sys._getframe().f_code.co_name, e, traceback.format_exc(), config=self.__config, table=table)
# select data
try:
sql_stmt = '''select * from {} where 1=2 '''.format(table)
cursor.execute(sql_stmt)
column_type = {}
for col in cursor.description:
column_type[col[0]] = col[1]
return column_type
except Exception as e:
self._logbase.info('presto select failure:%s - %s' % (Exception, e),func = sys._getframe().f_code.co_name)
self._logbase.raiseError(sys._getframe().f_code.co_name, e, traceback.format_exc(), config=self.__config, table=table)
finally:
cursor.close()
def prestoSelect(self,sql,username,withcolumns=None):
cursor = None
data_list = []
try:
cursor = self._presto_connect(host=self.__config['presto']['coordinator'], port=self.__config['presto']['port'], username = username).cursor()
#cursor = self.presto.connect(self.__config['presto']['coordinator'], self.__config['presto']['port']).cursor()
cursor.execute(sql)
data_cursor = cursor.fetchall()
# 如果没有输入列,则返回列表元组
if withcolumns is None:
for row in data_cursor:
data_list.append(row)
else:
# 如果输入列名,则返回列表字典
# tuple to dict
columns_list = [tuple[0] for tuple in cursor.description]
for row in data_cursor:
row_dict = {}
i = 0
for i in range(len(columns_list)):
row_dict[columns_list[i]] = row[i]
data_list.append(row_dict)
return 0,data_list
except Exception as err_msg:
# print log
self._logbase.printError(sys._getframe().f_code.co_name,err_msg,traceback.format_exc(), config=self.__config, sql=sql, withcolumns=withcolumns)
return returncode(200,err_msg)
finally:
if cursor is not None:
cursor.close()
def main():
_presto = Presto()
print (_presto.prestoSelect('select 1',username = '****'))
if __name__ =='__main__':
main()