python3简单查询Cassandra

最新推荐文章于 2020-11-25 18:07:51 发布

以十

最新推荐文章于 2020-11-25 18:07:51 发布

阅读量1.9k

点赞数

本文链接：https://blog.csdn.net/u010746357/article/details/80662363

版权

Python3 同时被 3 个专栏收录

3 篇文章 0 订阅

订阅专栏

Data Statistics&Analysis

3 篇文章 0 订阅

订阅专栏

Cassandra

1 篇文章 0 订阅

订阅专栏

#!/Users/zuojingang/python_venv/bin/python3.6
# -*- coding: UTF-8 -*-

import sys
import time
import os
from datetime import timedelta,datetime
from cassandra.cluster import Cluster
# from cassandra.query import SimpleStatement
# from cassandra.query import tuple_factory
# from cassandra.query import dict_factory
# from cassandra.policies import DCAwareRoundRobinPolicy
# from cassandra.policies import DowngradingConsistencyRetryPolicy
from cassandra.auth import PlainTextAuthProvider
# from cassandra import ConsistencyLevel
import threading

cluster_list = ['ip1','ip2','ip3']

'''''load_balancing_policy是负载均衡，default_retry_policy是重试策略'''  
需要认证的话加上auth_provider，不需要认证cluster=Cluster(cluster_list）
cluster=Cluster(cluster_list, auth_provider=PlainTextAuthProvider(username='', password=''))#，load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='DC2'),default_retry_policy=DowngradingConsistencyRetryPolicy())  
session=cluster.connect('keyspace')  
session.default_fetch_size=10000
'''''cassandra数据的一致性'''  
#session.default_consistency_level  = ConsistencyLevel.LOCAL_QUORUM 

base_dir = '/Users/zuojingang/file/output/'
print('base_dir=' + base_dir)

result_dir = base_dir + time.strftime('%Y-%m-%d') + '/'
if not os.path.exists(result_dir) or not os.path.isdir(result_dir):
    os.makedirs(result_dir)

file_path = result_dir + 'output_file.csv'
if os.path.exists(file_path):
    os.remove(file_path)

#异步线程
class PagedResultHandler(object):

    def __init__(self, future, process_row):
        self.error = None
        self.finished_event = threading.Event()
        self.future = future
        self.process_row = process_row
        self.future.add_callbacks(
            callback=self.handle_page,
            errback=self.handle_error
            )

    def handle_page(self, rows):
        print('len current_rows = ' + str(len(rows)))
        print('first obtain_time = ' + str(rows[0].obtain_time))
        for row in rows:
            self.process_row(row)

        if self.future.has_more_pages:
            self.future.start_fetching_next_page()
        else:
            self.finished_event.set()

    def handle_error(self, exc):
        self.error = exc
        self.finished_event.set()


class Process:

    file_output = open(file_path, 'a')
    filter_types = (0,31,32,33)
    result_dir = {}

    @classmethod
    def init(_cls):
        file_output_header = 'c_header1,c_header2\n'
        _cls.file_output.write(file_output_header)


    @classmethod
    def destory(_cls):
        # for 
        # row_str = str(date) + ',' + result_dir['types_num'] + ',' + result_dir['sum_num'] + '\n'
        # print('row_str = ' + row_str)
        # _cls.file_output.write(row_str)

        _cls.file_output.close()


    @classmethod
    def toProcess(_cls, date):
        '''
        def a method to process the part
        '''
        #执行数据查询
        key = date.year*10000000+date.month*100000+date.day*1000
        for i in range(256):
            future = session.execute_async('select * from test_table1 where key={}'.format(str(key+i)))
            handler = PagedResultHandler(future, _cls.process_row)
            handler.finished_event.wait()
            if handler.error:
                raise handler.error


    @classmethod
    def process_row(_cls, row):
        print('row=' + str(row) + '\n')
        day = int(row.key/1000)
        day_result = _cls.result_dir.get(day, {'sum_num':0,'types_num':0})
        print('day_result=' + str(day_result) + '\n')
        day_result['sum_num'] += row.num
        if row.type in _cls.filter_types :
            day_result['types_num'] += row.num
        _cls.result_dir[day] = day_result

        print('this_result_dir=' + str(_cls.result_dir))


Process.init()

s_time = datetime.strptime('2018-05-21 00:00:00', '%Y-%m-%d %H:%M:%S')
for i in range(10):
    _datetime_s = s_time + timedelta(days=i)
    Process.toProcess(_datetime_s)

Process.destory()

# ++++++++++++++++++++++++++++++++++++++++++++++++++++
# query = 'select * from test_table1 where key=20180501000'
# statement = SimpleStatement(query, fetch_size=5000)
# results = session.execute(statement)
# print('len current_rows = ' + str(len(results.current_rows)))
# print('has_more_pages = ' + str(results.has_more_pages))
# paging_state=results.paging_state
# print('paging_state = ' + str(paging_state))
# print('first obtain_time = ' + str(results[0].day))

# while paging_state:
#   # statement = SimpleStatement(query, fetch_size=10000)
#   rs = session.execute(statement, paging_state=paging_state);
#   print('\n')
#   print('len current_rows = ' + str(len(rs.current_rows)))
#   print('has_more_pages = ' + str(rs.has_more_pages))
#   paging_state=rs.paging_state
#   print('paging_state = ' + str(paging_state))
#   print('first day = ' + str(rs[0].day))

sys.exit(0)

以十

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
python3简单查询Cassandra

#!/Users/zuojingang/python_venv/bin/python3.6# -*- coding: UTF-8 -*-import sysimport timeimport osfrom datetime import timedelta,datetimefrom cassandra.cluster import Cluster# from cassandra.q...
复制链接

扫一扫

专栏目录