先创建一个连接池,提高效率:
MysqlPool:
object MysqlPool {
private val max = 8 //连接池的连接总数
private val connectionNum = 10 //每次产生的连接数
private var conNum = 0 //当前连接池已经产生的连接数
import java.util
private val pool = new util.LinkedList[Connection]() //连接池
{
Class.forName("com.mysql.jdbc.Driver")
}
/**
* 释放连接
* @param conn
*/
def releaseConn(conn:Connection) ={
pool.push(conn)
}
/**
* 获取连接
*/
def getJdbcConn():Connection ={
//同步代码块
AnyRef.synchronized({
if(pool.isEmpty){
for(i <- 1 to connectionNum){
val conn = DriverManager.getConnection("jdbc:mysql://hadoop:13306/test","root","root")
pool.push(conn)
conNum + 1
}
}
pool.poll()
})
}
}
将scoket获取到的数据,经过处理使用foreachRDD算子,持久化到数据库:
ForeachRDDTest:
object ForeachRDDTest {
def main(args: Array[String]): Unit = {
val checkpointDirectory = "D:\\tmp\\checkPoint"
/**
* 创建程序入口
*/
val conf = new SparkConf().setAppName(s"${this.getClass.getSimpleName}").setMaster("local[2]")
val ssc = new StreamingContext(conf, Seconds(10))
ssc.checkpoint(checkpointDirectory)
val myDStream = ssc.socketTextStream("hadoop", 9999)
val wordAndOneDStream = myDStream.flatMap(_.split(","))
.map((_, 1))
val wordCount = (values: Seq[Int], state: Option[Int]) => {
val currentCount = values.sum
val lastCount = state.getOrElse(0)
Some(currentCount + lastCount)
}
val resultDStream: DStream[(String, Int)] = wordAndOneDStream.updateStateByKey(wordCount)
/**
* SparkCore -> RDD
* RDD
* foreachPartiion
* foreach
* SparkStreaming ->DStream
* DStream
* foreachRDD
* foreachPartiion
* foreach
*
*/
resultDStream.foreachRDD(rdd => {
rdd.foreachPartition(partition => {
val connection = MysqlPool.getJdbcConn()
val statement = connection.createStatement()
partition.foreach(line => {
//持久化到MySQL 数据库
//connection
val word = line._1
val count = line._2.toInt
val sql =
s"""
insert into
test.wordcount
values(now(),'${word}','${count}')
"""
statement.execute(sql)
})
MysqlPool.releaseConn(connection)
})
})
/**
*程序的结尾
*/
ssc.start()
ssc.awaitTermination()
}
}