建表:
图片.png
1)JDBCUtil
package com.atguigu.my_realtime.util
object JDBCUtil {
def query(url: String, sql: String, args: List[Object]) = {
import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, ResultSetMetaData}
val conn: Connection = DriverManager.getConnection(url)
val ps: PreparedStatement = conn.prepareStatement(sql)
// 替换占位符
args.zipWithIndex.foreach {
// def setObject(parameterIndex: Int, x: Any): Unit
// @param parameterIndex the first parameter is 1, the second is 2, ...
case (arg, i) =>
ps.setObject(i + 1, arg)
}
val rs: ResultSet = ps.executeQuery()
var ans = List[Map[String, Object]]()
val meta: ResultSetMetaData = rs.getMetaData
while (rs.next()) {
var map = Map[String, Object]()
for (i
val key: String = meta.getColumnName(i)
val value: AnyRef = rs.getObject(i)
map += key -> value
}
// ans = map :: ans
// ans +:= map
ans :+= map
}
ps.close()
conn.close()
ans
}
def main(args: Array[String]): Unit = {
val url = "jdbc:mysql://hadoop162:3306/gmall?user=root&password=aaaaaa"
val ans: List[Map[String, Object]] = query(url, "select * from spu_info", Nil)
ans.foreach(println)
}
}
图片.png
2)从mysql读offset(s)
在工具类OffsetManager, 增加此方法:
图片.png
图片.png
2个分区各自的偏移量:
图片.png
图片.png
图片.png
代码:
package com.atguigu.my_realtime.ads
import com.atguigu.my_realtime.base.BaseApp___
object AdsOrderWideApp extends BaseApp___ {
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.OffsetRange
import scala.collection.mutable.ListBuffer
override val master: String = "local[2]"
override val appName: String = "ABC"
override val groupId: String = "xyz"
override val topic: String = "dws_order_wide"
override val bachTime: Int = 3
override def run(ssc: StreamingContext, sourceStream: DStream[String], offsetRanges: ListBuffer[OffsetRange]): Unit = {
sourceStream.map(str => {
import com.atguigu.my_realtime.bean.OrderWide
import org.json4s.jackson.JsonMethods
implicit val f = org.json4s.DefaultFormats
JsonMethods.parse(str).extract[OrderWide]
}).foreachRDD( // 每三秒 有一个rdd
rdd => {
import com.atguigu.my_realtime.bean.OrderWide
val arr: Array[OrderWide] = rdd.collect()
arr.foreach(println)
}
)
}
}
3)按照品牌 聚合 实际分摊价格
图片.png
防止出现多位小数:
图片.png
4)使用scalike
先建表:
CREATE TABLE tm_amount(
stat_time DATETIME,
tm_id VARCHAR(20),
tm_name VARCHAR(200),
amount DECIMAL(16,2),
PRIMARY KEY (stat_time,tm_id,tm_name)
)ENGINE=INNODB DEFAULT CHARSET=utf8;
图片.png
配置文件:
db.default.driver="com.mysql.jdbc.Driver"
db.default.url="jdbc:mysql://hadoop162:3306/gmall_result?characterEncoding=utf-8&useSSL=false"
db.default.user="root"
db.default.password="aaaaaa"
图片.png
5)插入数据到mysql
测试插入,成功:
图片.png
:_* 的用法(以下两图):
:* 作为一个整体,告诉编译器,我希望将某个参数当作参数序列处理。例如val s = sum(1 to 4:*)就是将1 to 4当作参数序列处理。
图片.png
图片.png
插入数据成功(暂未插入偏移量到mysql):
图片.png
图片.png
对比控制台输出,可知同样的数据被写入了mysql:
图片.png
图片.png
6)写入偏移量到mysql
以下两种方式,均可更已存在的数据:
图片.png
关键代码:
图片.png
import com.atguigu.my_realtime.base.BaseApp___
object AdsOrderWideApp extends BaseApp___ {
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.OffsetRange
import scala.collection.mutable.ListBuffer
override val master: String = "local[2]"
override val appName: String = "ABC"
override val groupId: String = "xyz"
override val topic: String = "dws_order_wide"
override val bachTime: Int = 3
override def run(ssc: StreamingContext, sourceStream: DStream[String], offsetRanges: ListBuffer[OffsetRange]): Unit = {
import scalikejdbc.config.DBs
DBs.setup()
sourceStream.map(str => {
// println("AdsOrderWideApp类的run方法内; STR:" + str)
import com.atguigu.my_realtime.bean.OrderWide
import org.json4s.jackson.JsonMethods
implicit val f = org.json4s.DefaultFormats
val wide: OrderWide = JsonMethods.parse(str).extract[OrderWide]
// println("=================>" + wide)
// wide
// if (wide.tm_id == 2) println("~~~~~~~~~~~" + wide.final_detail_amount)
((wide.tm_id -> wide.tm_name), wide.final_detail_amount)
}) //.reduceByKey(_ + _)
.reduceByKey((x, y) => {
val sum = (math.round(x * 100).toDouble + math.round(y * 100).toDouble) / 100
sum
})
.foreachRDD( // 每三秒 有一个rdd
rdd => {
import java.time.LocalDateTime
import scalikejdbc.DB
val now: LocalDateTime = LocalDateTime.now()
val dateTime = s"${now.toLocalDate} ${now.toLocalTime.toString.substring(0, 8)}"
val tm_amount = rdd.collect()
.map {
case ((tm_id, tm_name), amount) =>
Seq(dateTime, tm_id, tm_name, amount)
}
val ads_offsets: ListBuffer[Seq[Any]] = offsetRanges.map(or => {
Seq(groupId, topic, or.partition, or.untilOffset)
})
DB.localTx(implicit session => {
// 以下码 会在一个事务中执行
println("进入session~~~")
import scalikejdbc.SQL
val dataSql =
"""
|insert into tm_amount values(?, ?, ?, ?)
|""".stripMargin
SQL(dataSql).batch(tm_amount: _*).apply()
// throw new UnsupportedOperationException
val offsetSql =
"""
|replace into ads_offset values(?, ?, ?, ?)
|""".stripMargin
SQL(offsetSql).batch(ads_offsets: _*).apply()
println("刚刚进行了,对DBSession的对象session的操作。。。")
})
}
)
}
}
7)发布接口
图片.png
TMMapper.xml :
图片.png
TMMapper.java :
图片.png
通过接口查询数据:
图片.png
一段时间后,可见数据发生了变化:
图片.png