继上一篇spark2.4 cdh
演示:实时监控hdfs
a.文件1
b.添加文件
代码
import org.apache.spark.sql.SparkSession import org.apache.spark.sql.types.StructType object FileInputStructuredStreaming { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("local") .appName("StructuredNetworkWordCount") .getOrCreate() spark.sparkContext.setLogLevel("WARN") import spark.implicits._ val userSchema = new StructType().add("name", "string").add("age", "integer") val lines = spark.readStream .option("sep", ";") .schema(userSchema) .csv("hdfs://192.168.50.135:8020/user/hdfs/yanke_data/data3/") val query = lines.writeStream .outputMode("append") .format("console") .start() query.awaitTermination() } }
kafka