# 需要导入模块: import kafka [as 别名]
# 或者: from kafka import SimpleConsumer [as 别名]
def __init__(self, *args, **kwargs):
import kafka
super(KafkaRandomReader, self).__init__(*args, **kwargs)
brokers = self.read_option('brokers')
group = self.read_option('group')
topic = self.read_option('topic')
client = kafka.KafkaClient(map(bytes, brokers))
# TODO: Remove this comments when next steps are decided.
# If resume is set to true, then child should not load initial offsets
# child_loads_initial_offsets = False if settings.get('RESUME') else True
# self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1,
# child_loads_initial_offsets=child_loads_initial_offsets,
# auto_commit=False)
self.consumer = kafka.SimpleConsumer(client, group, topic,
auto_commit=False)
self.decompress_fun = zlib.decompress
self.processor = self.create_processor()
self.partitions = client.get_partition_ids_for_topic(topic)
self.logger.info(
'KafkaRandomReader has been initiated. '
'Topic: {}. Group: {}'.format(self.read_option('topic'), self.read_option('group')))
self.logger.info('Running random sampling')
self._reservoir = self.fill_reservoir()
self.logger.info('Random sampling completed, ready to process batches')