司机ID 上车区域ID 下次区域ID 上车时间 下车时间
A 龙华区 宝安区 2020-07-15 10:05:10 2020-07-15 10:25:02
B 宝安区 福田区 2020-07-15 11:43:22 2020-07-15 11:55:45
A 龙岗区 宝安区 2020-07-15 11:55:55 2020-07-15 12:12:23
B 福田区 宝安区 2020-07-15 12:05:05 2020-07-15 12:22:33
A 龙岗区 龙华区 2020-07-15 11:02:08 2020-07-15 11:17:15
A 宝安区 龙岗区 2020-07-15 10:35:15 2020-07-15 10:40:50
B 龙华区 龙岗区 2020-07-15 10:45:25 2020-07-15 10:50:00
A 龙华区 龙岗区 2020-07-15 11:33:12 2020-07-15 11:45:35
B 宝安区 龙岗区 2020-07-15 12:27:20 2020-07-15 12:43:31
A 宝安区 龙岗区 2020-07-15 12:17:10 2020-07-15 12:33:21
B 福田区 龙华区 2020-07-15 10:15:21 2020-07-15 10:35:12
B 龙岗区 宝安区 2020-07-15 11:12:18 2020-07-15 11:27:25
需求: 每个区域的平均等客时间[每个区域所有司机的等客时间平均值]
package com.atguigu.chapter09
import java.text.SimpleDateFormat
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import scala.io.Source
object $03_Home {
def main(args: Array[String]): Unit = {
val datas = Source.fromFile("datas/aa","utf-8").getLines().toList
//1、切割(司机、区域、时间[转成时间戳])
datas.map(line=>{
//line="A 龙华区 宝安区 2020-07-15 10:05:10 2020-07-15 10:25:02"
val arr = line.split("\t")
val id = arr.head
val fromAddr = arr(1)
val toAddr = arr(2)
val fromTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arr(3)).getTime
//LocalDateTime.parse(fromTime,DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))
val toTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(arr(4)).getTime
(id,fromAddr,toAddr,fromTime,toTime)
})
//2、按照司机分组
.groupBy(_._1)
/**
* Map(
* A-> List(
* (A,龙华区,宝安区,2020-07-15 10:05:10,2020-07-15 10:25:02),
* (A,龙岗区,宝安区,2020-07-15 11:55:55,2020-07-15 12:12:23),
* (A,龙岗区,龙华区,2020-07-15 11:02:08,2020-07-15 11:17:15),
* (A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50),
* (A,龙华区,龙岗区,2020-07-15 11:33:12,2020-07-15 11:45:35),
* (A,宝安区,龙岗区,2020-07-15 12:17:10,2020-07-15 12:33:21)
* )
* B-> List(
* (B,宝安区,福田区,2020-07-15 11:43:22 2020-07-15 11:55:45),
* (B,福田区,宝安区,2020-07-15 12:05:05 2020-07-15 12:22:33),
* (B,龙华区,龙岗区,2020-07-15 10:45:25 2020-07-15 10:50:00),
* (B,宝安区,龙岗区,2020-07-15 12:27:20 2020-07-15 12:43:31),
* (B,福田区,龙华区,2020-07-15 10:15:21 2020-07-15 10:35:12),
* (B,龙岗区,宝安区,2020-07-15 11:12:18 2020-07-15 11:27:25)
* )
* )
*/
.toList
//3、对每个司机的数据按照上车/下车时间排序
.flatMap(x=>{
//x= A-> List(
// (A,龙华区,宝安区,2020-07-15 10:05:10,2020-07-15 10:25:02),
// (A,龙岗区,宝安区,2020-07-15 11:55:55,2020-07-15 12:12:23),
// (A,龙岗区,龙华区,2020-07-15 11:02:08,2020-07-15 11:17:15),
// (A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50),
// (A,龙华区,龙岗区,2020-07-15 11:33:12,2020-07-15 11:45:35),
// (A,宝安区,龙岗区,2020-07-15 12:17:10,2020-07-15 12:33:21)
// )
val list = x._2.sortBy(_._4)
// List(
// (A,龙华区,宝安区,2020-07-15 10:05:10,2020-07-15 10:25:02),
// (A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50),
// (A,龙岗区,龙华区,2020-07-15 11:02:08,2020-07-15 11:17:15),
// (A,龙华区,龙岗区,2020-07-15 11:33:12,2020-07-15 11:45:35),
// (A,龙岗区,宝安区,2020-07-15 11:55:55,2020-07-15 12:12:23),
// (A,宝安区,龙岗区,2020-07-15 12:17:10,2020-07-15 12:33:21)
//)
val slidingList = list.sliding(2)
/**
* List(
* List((A,龙华区,宝安区,2020-07-15 10:05:10,2020-07-15 10:25:02),(A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50)),
* List((A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50),(A,龙岗区,龙华区,2020-07-15 11:02:08,2020-07-15 11:17:15)),
* List((A,龙岗区,龙华区,2020-07-15 11:02:08,2020-07-15 11:17:15),(A,龙华区,龙岗区,2020-07-15 11:33:12,2020-07-15 11:45:35)),
* List((A,龙华区,龙岗区,2020-07-15 11:33:12,2020-07-15 11:45:35),(A,龙岗区,宝安区,2020-07-15 11:55:55,2020-07-15 12:12:23)),
* List((A,龙岗区,宝安区,2020-07-15 11:55:55,2020-07-15 12:12:23),(A,宝安区,龙岗区,2020-07-15 12:17:10,2020-07-15 12:33:21)),
*
* )
*/
//4、计算每个司机每次的等客时间
val result = slidingList.map(y=>{
//y = List((A,龙华区,宝安区,2020-07-15 10:05:10,2020-07-15 10:25:02),(A,宝安区,龙岗区,2020-07-15 10:35:15,2020-07-15 10:40:50)),
//等客区域
val region = y.head._3
//等客时间 = 下一次的上车时间 - 上一次下车时间
val duration = y.last._4 - y.head._5
(region,duration)
})
result
})
//List( (宝安区,10),(龙岗区,22),(龙华区,16),(龙岗区,10),(宝安区,5) ,(龙华区,10),(龙岗区,22),(宝安区,16),(福田区,10),(宝安区,5))
//6、按照区域分组
.groupBy(_._1)
/**
* Map(
* 宝安区-> List((宝安区,10),(宝安区,5),(宝安区,16),(宝安区,5))
* ....
* )
*/
//7、计算平均等客时间
.map(x=>{
//x = 宝安区-> List((宝安区,10),(宝安区,5),(宝安区,16),(宝安区,5))
val time = x._2.map(_._2).sum
val count = x._2.size
(x._1,time/count/1000)
})
//8、结果展示
.foreach(println(_))
}
}