package com._51doit.spark.pers
import java.sql.{Connection, Date, DriverManager, PreparedStatement, SQLException}
import com.alibaba.fastjson.{JSON, JSONException}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object Order {
// private val logger: Logger = LoggerFactory.getLogger(Order.getClass)
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Order").setMaster("local[*]")
val sc = new SparkContext(conf)
//读取FastJion文件
val lines: RDD[String] = sc.textFile("C:\\Users\\admin\\Desktop\\FastJion.txt")
//读取文件
val rdd: RDD[OrderBean] = lines.map(line => {
var orderBean: OrderBean = null
try {
//解析文件
orderBean = JSON.parseObject(line, classOf[OrderBean])
} catch {
case e: JSONException => {
// e.printStackTrace()
// logger.error("parse json error=>",line)
}
}
orderBean
})
//过滤掉坏文件,确保文件由内容
val rdd2NotNull: RDD[OrderBean] = rdd.filter(_ != null)
//获取pojo内所需要的变量
val cidAndMoney: RDD[(Int, Double)] = rdd2NotNull.map(i => (i.cid, i.money))
//进行聚合
val rdd4: RDD[(Int, Double)] = cidAndMoney.reduceByKey(_ + _)
//读取order文件
val commodity = sc.textFile("C:\\Users\\admin\\Desktop\\commodity.txt")
//切割成数组,并获取文件文件具体内容
val cidAndCity: RDD[(Int, String)] = commodity.map(it => {
val strings = it.split(",")
val cid = strings(0).toInt
val city = strings(1)
(cid, city)
}
)
//合并文件,相同的key的value形成元祖,翻转一下value好看
val value = rdd4.join(cidAndCity)
val rdd5: RDD[(String, Double)] = value.map(it =>
it._2.swap
)
//数据量多不要写到Driver端,会造成内存溢出,经过网络传输,效率低
//数量大写入mysql或者HBase
/*
rdd5.foreach(it=> {
var con: Connection =null
var statement: PreparedStatement =null
try {
con = DriverManager.getConnection(
"jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8", "root", "root")
statement = con.prepareStatement("INSERT INTO tb_result values (2,?, ?, ?)")
statement.setString(1, it._1)
statement.setDouble(2, it._2)
statement.setDate(3, new Date(System.currentTimeMillis()))
statement.executeLargeUpdate()
} catch {
case e:SQLException =>
}finally {
if(statement!=null){
statement.close()
}
if(con!=null){
con.close()
}
}
}
)
*/
rdd5.foreachPartition(dataToMYSQL)
sc.stop()
}
val dataToMYSQL: Iterator[(String, Double)] => Unit = (it:Iterator[(String,Double)])=>{
var statement: PreparedStatement=null
var conn: Connection =null
try {
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8", "root", "root")
statement = conn.prepareStatement("INSERT INTO tb_result values(3,?,?,?)")
it.foreach(i => {
statement.setString(1, i._1)
statement.setDouble(2, i._2)
statement.setDate(3, new Date(System.currentTimeMillis()))
statement.addBatch()
})
statement.executeBatch()
()//返回值要有即使为空,没有就是Any
} catch {
case e:SQLException =>
}finally {
if(statement!=null){
statement.close()
}
if(conn!=null){
conn.close()
}
}
}
}
{"cid": 1, "money": 600.0, "longitude":116.397128,"latitude":39.916527,"oid":"o123", }
"oid":"o112", "cid": 3, "money": 200.0, "longitude":118.396128,"latitude":35.916527}
{"oid":"o124", "cid": 2, "money": 200.0, "longitude":117.397128,"latitude":38.916527}
{"oid":"o125", "cid": 3, "money": 100.0, "longitude":118.397128,"latitude":35.916527}
{"oid":"o127", "cid": 1, "money": 100.0, "longitude":116.395128,"latitude":39.916527}
{"oid":"o128", "cid": 2, "money": 200.0, "longitude":117.396128,"latitude":38.916527}
{"oid":"o129", "cid": 3, "money": 300.0, "longitude":115.398128,"latitude":35.916527}
{"oid":"o130", "cid": 2, "money": 100.0, "longitude":116.397128,"latitude":39.916527}
{"oid":"o131", "cid": 1, "money": 100.0, "longitude":117.394128,"latitude":38.916527}
{"oid":"o132", "cid": 3, "money": 200.0, "longitude":118.396128,"latitude":35.916527}
数据2
oid:订单id,String类型
cid: 商品分类id,Int类型
money: 订单金额,Double类型
longitude: 经度,Double类型
latitude: 纬度,Double类型
分类信息
1,家具
2,手机
3,服装