**
maven pom依赖配置
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
**
一、基本数据源支持
package com.spark.test.Structured
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.Trigger
/**
* Created by Administrator on 2019/5/24.
*/
object wordCount {
def main(args: Array[String]) {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
val spark=SparkSession
.builder()
.master("local")
.appName("ss")
.getOrCreate()
import spark.implicits._
//lines(是一个DataFrame)代表一个包含流文本数据的无边界的表,
// 表只包含一个字段"value",流数据中的每一行就是该表的一条记录。
val lines = spark.readStream
.format("socket")
.option("host", "172.16.28.224")
.option("port", 19999)
.load()
// Split the lines into words
//需要使用as方法将DataFrame转换成DataSet,才能使用flatMap转换操作
val words = lines.as[String].flatMap(_.split(" "))
val cons=new consoleSink()
// Generate running word count
val query=words.writeStream.outputMode("append").format("console").start()
query.awaitTermination()
}
}
二、structred streaming自定义输出consoleSink,需要继承ForeachWriter 类重写其方法
package com.spark.test.Structured
import org.apache.spark.sql.ForeachWriter
/**
* Created by Administrator on 2019/5/24.
*/
class consoleSink extends ForeachWriter[String]{
def open(partitionId: Long, version: Long): Boolean = {
true
}
def process(value: String): Unit = {
println(value)
}
def close(errorOrNull: Throwable): Unit = {
}
}
**二、自定义jdbc sink,需要继承ForeachWriter 类重写其方法
import java.sql._
import org.apache.spark.sql.ForeachWriter
/**
* Created by Administrator on 2019/5/24.
*/
class JDBCSink(url:String,user:String,password:String,table:String) extends ForeachWriter[(String,Int)]{
val driver="com.mysql.jdbc.Driver"
var conn:Connection=_
var stat:Statement=_
def open(partitionId: Long, version: Long): Boolean = {
Class.forName(driver)
conn=DriverManager.getConnection(url,user,password)
stat=conn.createStatement()
true
}
def process(value: (String,Int)): Unit = {
stat.executeUpdate("INSERT INTO "+table+" VALUES("+value._1+","+value._2+")")
}
def close(errorOrNull: Throwable): Unit = {
conn.close()
}
}
三、demo演示
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.Trigger
/**
* Created by Administrator on 2019/5/24.
*/
object wordCount {
def main(args: Array[String]) {
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
val spark=SparkSession
.builder()
.master("local")
.appName("ss")
.getOrCreate()
import spark.implicits._
//lines(是一个DataFrame)代表一个包含流文本数据的无边界的表,
// 表只包含一个字段"value",流数据中的每一行就是该表的一条记录。
val lines = spark.readStream
.format("socket")
.option("host", "172.16.28.224")
.option("port", 19999)
.load()
// Split the lines into words
//需要使用as方法将DataFrame转换成DataSet,才能使用flatMap转换操作
val words = lines.as[String].flatMap(_.split(" "))
//JDBC sink
val write=new JDBCSink("url","user","pass","table")
//console sink
val cons=new consoleSink()
// Generate running word count
val query = words.writeStream.outputMode("append").foreach(cons).start()
query.awaitTermination()
}
}