1 有数据文件如下,数据信息代表
name 用户名称
age 年龄
temp 温度
ct 测试时间
c4ijk,24,99,1597550000000
1351f,45,51,1597550000000
9m2bg,81,55,1597550000000
nbchj,58,58,1597550000000
...
需求:scala语言实现下列功能
1 产生报表结果
时间(天) 用户数量 最高温度 ...
20201010 100 60
答案
package homework
import java.text.SimpleDateFormat
import java.util.Date
import scala.collection.mutable
import scala.io.BufferedSource
object HW6 {
//name,age,temp,ct
case class TempInfo(partition:String, name:String, age:Int, temp:Int, ct:Long)
case class TempInfoMean(userCount:Int, maxTemp:Int)
/**
* 时间格式化
*/
def tranTimeToString(tm:Long,formater:String) :String={
val fm = new SimpleDateFormat(formater)
val tim = fm.format(new Date(tm))
tim
}
def readFile()={
//数据源:本地文件
val fileDatas :BufferedSource= scala.io.Source.fromFile("D:/data/spark/csv/temp.csv")
val lines :List[String] = fileDatas.getLines().toList
//name,age,temp,ct
val datas = lines.map(
(line:String) => {
val Array(name, ages, temps, cts) = line.split(",")
val age = ages.toInt
val ct = cts.toLong
val temp = temps.toInt
val partition = tranTimeToString(ct, "yyyyMMdd")
TempInfo(partition, name, age, temp, ct)
}
)
.groupBy(_.partition)
.mapValues(
(records:List[TempInfo]) => {
val userList = new mutable.HashSet[String]()
var maxTemp:Int = 0
for(record <- records){
userList.add(record.name)
maxTemp = maxTemp.max(record.temp)
}
val userCount = userList.size
TempInfoMean(userCount, maxTemp)
}
)
println(s"datas=${datas}")
}
def main(args: Array[String]): Unit = {
}
}
我自己写的
import java.nio.file.Files.{lines, list}
import java.sql.Date
import java.text.SimpleDateFormat
import scala.io.BufferedSource
import scala.collection.mutable.ListBuffer
/**
* @Author 12654
* @Create 2020/10/16 - 14:34
* @Description
*/
object zhoukao {
def main(args: Array[String]): Unit = {
val fileDatas: BufferedSource = scala.io.Source.fromFile("D:/code/test_10_15/src/temp.csv")
val list: List[String] = fileDatas.getLines().toList
val list1 = list.map(line => line.split(",")).map(arr => {
val fm = new SimpleDateFormat("yyyyMMdd")
val time = fm.format(new Date(arr(3).toLong))
(time, arr(0), arr(1), arr(2)) // (date,url)
}).groupBy(_._1)
.map(s => (s._1,s._2.size,s._2.map(a=>a._4.toInt).max))
println(list1)
}
}