相当于在旧的dataframe上并上新的数据
微批处理:先写入日志
持续处理:异步处理
操作步骤
# 1.导入pyspark模块
From pyspark.sql import SparkSession
From pyspark.sql.functions import split
From pyspark.sql.functions import explode
# 2.创建sparksession对象
if __name__ =='__main__':
spark = SparkSession\
.builder\
.appName("StructuredNetworkWordCount")\
.getOrCreate()
spark.sparkContext.setLogLevel('WARN')
# 3.创建输入源
lines = spark\
.readStream\
.format('socket')\
.option('host','localhost')\
.option('port','9999')\
.load()
# 4.定义流计算过程
word = lines.select(
explode(
split(lines.value,