from pyspark import SparkContext
from pyspark.streaming import StreamingContext
sc = SparkContext("yarn","stream_test")
ssc = StreamingContext(sc,1)
lines = ssc.socketTextStream('localhost',9999)
lines.pprint()
ssc.start()
ssc.awaitTerminationOrTimeout(100)
from kafka import KafkaConsumer,KafkaProducer
import logging
import json
def kafka_consumer(kafkatopic, groupid, server):
consumer = KafkaConsumer(
kafkatopic,
group_id = groupid,
bootstrap_servers = server,
session_timeout_ms = 15000,
auto_commit_interval_ms = 1000,
enable_auto_commit = True
)
for message in consumer:
logging.info(message)
print(message)
def kafka_producer(kafkatopic, server):
producer = KafkaProducer(
bootstrap_servers = server,
value_serializer = lambda v: json.dumps(v).encode('utf-8')
)
for i in range(10):
producer.send(kafkatopic, {i: i**2})
print(producer.metrics())
producer.close()
if __name__ == '__main__':
kafka_producer(
server = ['10.1.3.xxx:9092','10.1.4.xxx:9092','10.1.5.xxx:9092'],
kafkatopic='topic_producer_xxx'
)