//spark 数据 第t天
package Spark
import java.text.SimpleDateFormat
import org.apache.spark.{Partition, SparkConf, SparkContext, TaskContext}
object zuoye1 {
def main(args: Array[String]): Unit = {
val dateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd")
System.setProperty("hadoop.home.dir", "D:/")
val conf: SparkConf = new SparkConf().setAppName("login").setMaster("local[*]")
//创建SparkContext,使用SparkContext来创建RDD
val sc: SparkContext = new SparkContext(conf)
val lines = sc.textFile("data1.txt")
val line = lines.map(line => {
val strings: Array[String] = line.split(",")
val name = strings(0)
val device = strings(1)
val time = strings(2)
(device, name, time)
})
val value = line.groupBy(_._1)
val value1 = value.flatMapValues(x => {
var a = 110
x.map(k => {
a = a - 10
(k._2, k._3, a)
})
})
val value2 = value1.map(x => {
((x._1, x._2._1), x._2._2, x._2._3)
}).groupBy(_._1).map(x=>{
(x._1,x._2.map(k=>{
(k._2,k._3)
}))
}).map(x=>{
val time = x._2.head._1
(x._1,x._2.map(k=>{
(time,k._2)
}))
})
val value3 = value2.map(x => {
(x._1, x._2.groupBy(_._1))
}).map(x=>{
((x._1._1,x._1._2,x._2.map(j=>{j._2.map(y=>{y._1})}).map(s=>{(s.head)}),x._2.map(k=>{
k._2.map(g=>{
(g._2)
})
}).flatMap(l=>l)))
}).map(x=>{
(x._1,x._2,x._3.head,x._4.reduce(_+_))
}).map(x=>{
(x._1,(x._2,x._3,x._4))
}).groupByKey().sortBy(_._1).map(x=>{
if(x._2.head._1!=""){
var a = "guid:"+x._2.head._1
}else {
var a = "guid:" + x._1
}
("deviceId:"+x._1,x._2)
})
// value2.map(x=>{
// ((x._1._1,x._1._2,x._2.map(k=>{k._1})),x._2.map(p=>{}))
// })
println(value3.collect().toBuffer)
spark实习案例
最新推荐文章于 2021-12-15 11:00:29 发布