#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
word count with kafka
consume data from kafka topic "test" and do word count with batch duration of 1 second
"""
from __future__ import print_function
import os
from pyspark.sql import SparkSession
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
os.environ["PYSPARK_PYTHON"] = "/usr/bin/python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "/usr/bin/python3"
os.environ['PYSPARK_SUBMIT_ARGS'] = \
'--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.3.1 ' \
'pyspark-shell'
spark = SparkSession\
.builder\
.appName("word_count")\
.master("local[*]")\
.getOrCreate()