计算不同商品的销售总价
package com.ws.orderCount
import java.sql.{Connection, DriverManager}
import com.google.gson.Gson
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}
object OrderCount {
val logger: Logger = LoggerFactory.getLogger(OrderCount.getClass)
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("OrderCount").setMaster("local[*]")
val sc = new SparkContext(conf)
val orders: RDD[String] = sc.textFile("data/order.log")
/*
val gson = new Gson
val orderDetail: RDD[(String, Double)] = orders.map(x => {
(gson.fromJson(x, classOf[Order]).cid, gson.fromJson(x, classOf[Order]).money)
})
这样写会导致Task not serializable异常,需要把Gson放到map里
21/01/26 20:55:18 INFO SparkContext: Created broadcast 0 from textFile at OrderCount.scala:11
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
*/
// 读取一行--转换Gson--获取cid,money组成元组
var cid: String = ""
var money: Double = 0.0
val allOrder: RDD[(String, Double)] = orders.map(x => {
try {
cid = (new Gson).fromJson(x, classOf[Order]).cid
money = (new Gson).fromJson(x, classOf[Order]).money
} catch {
case _: Exception =>
cid = "err"
money = 0.0
logger.error("pass json err => " + x)
}
(cid, money)
})
val orderDetail: RDD[(String, Double)] = allOrder.filter(_._1 != "err")
// 先聚合 后join,这样效率高
val sumedvalue: RDD[(String, Double)] = orderDetail.reduceByKey(_ + _)
// 获取商品列表 与 商品cid关联
val goodsType: RDD[(String, String)] = sc.parallelize(List(("1", "家具"), ("2", "手机"), ("3", "服装")), 2)
val joinR: RDD[(String, (String, Double))] = goodsType.join(sumedvalue)
val r: RDD[(String, Double)] = joinR.map(_._2)
r.foreachPartition(writeToMysql)
sc.stop()
}
val writeToMysql: Iterator[(String, Double)] => Unit = (it: Iterator[(String, Double)]) => {
val conn: Connection = DriverManager.getConnection("jdbc:mysql://dream3:3306/bigdata?characterEncoding=UTF-8", "root", "root")
val statement = conn.prepareStatement("insert into order_count(id,name,money) values(null,?,?)")
try {
it.foreach(x=>{
var i = 1
statement.setString(1,x._1)
statement.setDouble(2,x._2)
statement.addBatch()
if(i%100==0){
statement.executeBatch()
}
i+=1
})
statement.executeBatch()
} catch {
case e:Exception =>
e.printStackTrace()
logger.error("insert err")
} finally {
if(statement!=null){
statement.close()
}
if(conn!=null){
conn.close()
}
}
}
}
数据
{"oid":"o123","cid": 1, "money": 600.0, "longitude":116.397128,"latitude":39.916527}
oid":"o112", "cid": 3, "money": 200.0, "longitude":118.396128,"latitude":35.916527}
{"oid":"o124", "cid": 2, "money": 200.0, "longitude":117.397128,"latitude":38.916527}
{"oid":"o125", "cid": 3, "money": 100.0, "longitude":118.397128,"latitude":35.916527}
{"oid":"o127", "cid": 1, "money": 100.0, "longitude":116.395128,"latitude":39.916527}
{"oid":"o128", "cid": 2, "money": 200.0, "longitude":117.396128,"latitude":38.916527}
{"oid":"o129", "cid": 3, "money": 300.0, "longitude":115.398128,"latitude":35.916527}
{"oid":"o130", "cid": 2, "money": 100.0, "longitude":116.397128,"latitude":39.916527}
{"oid":"o131", "cid": 1, "money": 100.0, "longitude":117.394128,"latitude":38.916527}
{"oid":"o132", "cid": 3, "money": 200.0, "longitude":118.396128,"latitude":35.916527}
结果
ArrayBuffer((手机,500.0), (服装,600.0), (家具,800.0))