# -*- coding: utf-8 -*-
import datetime
from clickhouse_driver import Client
import heapq
import threading
# 返回clickhouse的客户端
def getClickHouseClient():
try:
host_name = 'xxx.xxx.host'
client = Client(
host=host_name,
database='your db',
user='default',
password='root123',
send_receive_timeout=20
)
return client
except Exception as e:
print("Error: "+str(e))
return None
# 多线程主函数
add = 0
def thread_solve_row(SQL, thread_heap_project_rows, insert_sql):
# 变量属于方法的本地变量,相当于每个sub_thread独占一份thread_client,msg_list
thread_client = getClickHouseClient()
msg_list = []
thread_name = threading.currentThread().name
while 1:
if len(thread_heap_project_rows) == 0:
break
con_addr = heapq.heappop(thread_heap_project_rows)
global add #声明我们在函数内部使用的是在函数外部定义的全局变量add
add = add - 1
project_symbol = con_addr[0]
ct_address = con_addr[1]
TMP_SQL = SQL.format(ct_address)
all_contract_users = thread_client.execute(TMP_SQL)
if len(all_contract_users) == 0:
print(add, 'skip', ct_address, project_symbol, thread_name)
continue
for user_addr in all_contract_users:
msg_list.append((project_symbol, user_addr[0], user_addr[1], user_addr[2], user_addr[3], user_addr[4], data_time))
if len(msg_list) >= 5000:
thread_client.execute(insert_sql, msg_list)
print('{0}___{1}___写入数据{2}条__{3}___总剩余:{4}:'.format(add, ct_address, len(msg_list), thread_name, len(thread_heap_project_rows)))
msg_list.clear()
else:
print(add, '当前list.size()=', len(msg_list), thread_name, project_symbol)
thread_client.execute(insert_sql, msg_list)
if __name__ == '__main__':
query_contracts_sql = '''
select
project_symbol,
contract_address
from tmp_table_xxx_1
group by project_symbol,contract_address
order by project_symbol,contract_address
'''
ch_client = getClickHouseClient()
all_project_contracts = ch_client.execute(query_contracts_sql)
SQL = '''
select
to_address as contract_address,
from_address,
minMerge(first_call_time) as first_call_time,
maxMerge(last_call_time) as last_call_time,
countMerge(total_count) as total_count
from tmp_table_xxx_2
where to_address='{0}'
group by to_address,from_address
'''
data_time = datetime.datetime.now().strftime("%Y%m%d")
insert_sql = 'INSERT INTO tmp_talbe_xxx_3(project_symbol, contract_address, from_address, first_call_time, last_call_time, total_count, update_time) VALUES'
# 创建一个空堆,并加入数据, 供多线程同时调用
thread_heap_project_rows = []
for item in all_project_contracts:
heapq.heappush(thread_heap_project_rows, item)
add = len(all_project_contracts)
t1 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t1.start()
t2 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t2.start()
t3 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t3.start()
t4 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t4.start()
t5 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t5.start()
t6 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t6.start()
t7 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t7.start()
t8 = threading.Thread(target=thread_solve_row, args=[SQL, thread_heap_project_rows, insert_sql])
t8.start()