环境:spark2.2.0,kafka0.10.0,scala2.11.0
def main(args: Array[String]): Unit = {
System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop\\hadoop-2.7.7")
val spark = SparkSession.builder()
.appName("socketStreaming")
.master("spark://bigdata.slave01.com:7077")
.getOrCreate()
val url = "jdbc:mysql://bigdata.master.com:3306/mytrain"
val username = "root"
val password = "123456"
val writer = new JDBCSink(url, username, password)
import spark.implicits._
val lines = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "bigdata.master.com:9092,bigdata.slave01.com:9092,bigdata.slave02.com:9092")
.option("subscribe", "train")
.load()
.selectExpr("CAST(value AS STRING)").as[String]
val words = lines.map(_.split(",")).map(x => {
(x(0), x(1), x(2), x(3)
, x(4), x(5), x(6), x(7)
, x(8), x(9), x(10), x(11)
, x(12), -1*(x(13).toInt), x(14).toInt,
x(15), x(16))
})
.toDF("shdm", "shmc",
"xflb", "jydd",
"jylx", "digitalsign",
"sfrzh", "xm",
"kh", "jyrq",
"jysj", "jylsh",
"zdjh", "jyje",
"kye", "xtdm",
"xtmc")
val wordCounts = words.filter(words("jyje") > 0).groupBy("shmc").agg(("jyje", "sum")).withColumnRenamed("sum(jyje)", "jyje")
wordCounts.printSchema()
val query = wordCounts.writeStream.foreach(writer).outputMode("update")