spark存储mysql数据到本地文件_sparkStraming存储数据到mysql

package sparkStreaming

import org.apache.spark.SparkConf

import org.apache.spark.streaming.Seconds

import org.apache.spark.streaming.StreamingContext

import org.apache.spark.streaming.kafka.KafkaUtils

import org.apache.spark.HashPartitioner

import org.apache.spark.streaming.Duration

import org.apache.spark.sql.SQLContext

import org.apache.spark.{SparkContext, SparkConf}

import spark.bean.orders

import java.util.Properties

import java.sql.{DriverManager, PreparedStatement, Connection}

import org.apache.spark.{SparkContext, SparkConf}

object WebPagePopularityValueCalculator {

def main(args: Array[String]) {

val Array(zkQuorum, group, topics) = Array("localhost:2181", "1", "sun_test_topic")

val sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]")

val ssc = new StreamingContext(sparkConf, Seconds(2))

ssc.checkpoint("checkpoint")

val topicpMap = topics.split(",").map((_, 2)).toMap

val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap).map(_._2)

val popularityData = lines.map { msgLine =>

{

val dataArr: Array[String] = msgLine.split("\\|")

val pageID = dataArr(0)

val popValue: Double = dataArr(1).toFloat * 0.8 + dataArr(2).toFloat * 0.8 + dataArr(3).toFloat * 1

(pageID, popValue)

}

}

//sum the previous popularity value and current value

val updatePopularityValue = (iterator: Iterator[(String, Seq[Double], Option[Double])]) => {

iterator.flatMap(t => {

val newValue: Double = t._2.sum

val stateValue: Double = t._3.getOrElse(0);

Some(newValue + stateValue)

}.map(sumedValue => (t._1, sumedValue)))

}

val initialRDD = ssc.sparkContext.parallelize(List(("page1", 0.00)))

val stateDstream = popularityData.updateStateByKey[Double](updatePopularityValue,

new HashPartitioner(ssc.sparkContext.defaultParallelism), true, initialRDD)

//set the checkpoint interval to avoid too frequently data checkpoint which may

//may significantly reduce operation throughput

stateDstream.checkpoint(Duration(8 * 2 * 1000))

//after calculation, we need to sort the result and only show the top 10 hot pages

stateDstream.foreachRDD { rdd =>

{

val sortedData = rdd.map { case (k, v) => (v, k) }.sortByKey(false)

val topKData = sortedData.take(10).map { case (v, k) => (k, v) }

topKData.foreach{ case (k, v) =>

if(v != 0) {

println("page" + k + " " + "value" + v)

val itb = Iterator((k, v))

toMySql(itb)

}

}

}

}

ssc.start()

ssc.awaitTermination()

}

def toMySql(iterator: Iterator[(String, Double)]): Unit = {

var conn: Connection = null

var ps: PreparedStatement = null

val sql = "insert into userbehavior(page, number) values (?, ?)"

try {

Class.forName("com.mysql.jdbc.Driver");

conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/spark", "root", "Sun@123")

iterator.foreach(dataIn => {

ps = conn.prepareStatement(sql)

ps.setString(1, dataIn._1)

ps.setDouble(2, dataIn._2)

ps.executeUpdate()

}

)

} catch {

case e: Exception => e.printStackTrace()

} finally {

if (ps != null) {

ps.close()

}

if (conn != null) {

conn.close()

}

}

}

}

sparkStraming存储数据到mysql

标签:exce   value   tor   cut   set   opera   order   man   nec

本条技术文章来源于互联网,如果无意侵犯您的权益请点击此处反馈版权投诉

本文系统来源:http://www.cnblogs.com/sunyaxue/p/6544033.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值