#!/Users/zuojingang/python_venv/bin/python3.6
# -*- coding: UTF-8 -*-
import sys
import time
import os
from datetime import timedelta,datetime
from cassandra.cluster import Cluster
# from cassandra.query import SimpleStatement
# from cassandra.query import tuple_factory
# from cassandra.query import dict_factory
# from cassandra.policies import DCAwareRoundRobinPolicy
# from cassandra.policies import DowngradingConsistencyRetryPolicy
from cassandra.auth import PlainTextAuthProvider
# from cassandra import ConsistencyLevel
import threading
cluster_list = ['ip1','ip2','ip3']
'''''load_balancing_policy是负载均衡,default_retry_policy是重试策略'''
需要认证的话加上auth_provider,不需要认证cluster=Cluster(cluster_list)
cluster=Cluster(cluster_list, auth_provider=PlainTextAuthProvider(username='', password=''))#,load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC2'),default_retry_policy=DowngradingConsistencyRetryPolicy())
session=cluster.connect('keyspace')
session.default_fetch_size=10000
'''''cassandra数据的一致性'''
#session.default_consistency_level = ConsistencyLevel.LOCAL_QUORUM
base_dir = '/Users/zuojingang/file/output/'
print('base_dir=' + base_dir)
result_dir = base_dir + time.strftime('%Y-%m-%d') + '/'
if not os.path.exists(result_dir) or not os.path.isdir(result_dir):
os.makedirs(result_dir)
file_path = result_dir + 'output_file.csv'
if os.path.exists(file_path):
os.remove(file_path)
#异步线程
class PagedResultHandler(object):
def __init__(self, future, process_row):
self.error = None
self.finished_event = threading.Event()
self.future = future
self.process_row = process_row
self.future.add_callbacks(
callback=self.handle_page,
errback=self.handle_error
)
def handle_page(self, rows):
print('len current_rows = ' + str(len(rows)))
print('first obtain_time = ' + str(rows[0].obtain_time))
for row in rows:
self.process_row(row)
if self.future.has_more_pages:
self.future.start_fetching_next_page()
else:
self.finished_event.set()
def handle_error(self, exc):
self.error = exc
self.finished_event.set()
class Process:
file_output = open(file_path, 'a')
filter_types = (0,31,32,33)
result_dir = {}
@classmethod
def init(_cls):
file_output_header = 'c_header1,c_header2\n'
_cls.file_output.write(file_output_header)
@classmethod
def destory(_cls):
# for
# row_str = str(date) + ',' + result_dir['types_num'] + ',' + result_dir['sum_num'] + '\n'
# print('row_str = ' + row_str)
# _cls.file_output.write(row_str)
_cls.file_output.close()
@classmethod
def toProcess(_cls, date):
'''
def a method to process the part
'''
#执行数据查询
key = date.year*10000000+date.month*100000+date.day*1000
for i in range(256):
future = session.execute_async('select * from test_table1 where key={}'.format(str(key+i)))
handler = PagedResultHandler(future, _cls.process_row)
handler.finished_event.wait()
if handler.error:
raise handler.error
@classmethod
def process_row(_cls, row):
print('row=' + str(row) + '\n')
day = int(row.key/1000)
day_result = _cls.result_dir.get(day, {'sum_num':0,'types_num':0})
print('day_result=' + str(day_result) + '\n')
day_result['sum_num'] += row.num
if row.type in _cls.filter_types :
day_result['types_num'] += row.num
_cls.result_dir[day] = day_result
print('this_result_dir=' + str(_cls.result_dir))
Process.init()
s_time = datetime.strptime('2018-05-21 00:00:00', '%Y-%m-%d %H:%M:%S')
for i in range(10):
_datetime_s = s_time + timedelta(days=i)
Process.toProcess(_datetime_s)
Process.destory()
# ++++++++++++++++++++++++++++++++++++++++++++++++++++
# query = 'select * from test_table1 where key=20180501000'
# statement = SimpleStatement(query, fetch_size=5000)
# results = session.execute(statement)
# print('len current_rows = ' + str(len(results.current_rows)))
# print('has_more_pages = ' + str(results.has_more_pages))
# paging_state=results.paging_state
# print('paging_state = ' + str(paging_state))
# print('first obtain_time = ' + str(results[0].day))
# while paging_state:
# # statement = SimpleStatement(query, fetch_size=10000)
# rs = session.execute(statement, paging_state=paging_state);
# print('\n')
# print('len current_rows = ' + str(len(rs.current_rows)))
# print('has_more_pages = ' + str(rs.has_more_pages))
# paging_state=rs.paging_state
# print('paging_state = ' + str(paging_state))
# print('first day = ' + str(rs[0].day))
sys.exit(0)
python3简单查询Cassandra
最新推荐文章于 2020-11-25 18:07:51 发布