三个集合的小练习
wordCount简单
/**
* 对一个List进行wordCount,返回出现次数最多的三个单词,及其出现次数
*/
object WordCountSimple {
def main(args: Array[String]): Unit = {
// WordCount
val list = List("Hello", "world", "Hello", "world", "Hello World", "Hello Hadoop", "Hello spark")
// 将词组转换单词,1=>将一个String转换为一个字符数组,再将数组扁平化,可以直接使用flatMap()
val list1: List[String] = list.flatMap(_.split(" "))
// 将单词分组
val stringToStrings: Map[String, List[String]] = list1.groupBy(s => s)
// 将Map(hellp->list(hello,hello))转为Map(hello,2)
val list2: List[(String, Int)] = stringToStrings.map(s => (s._1, s._2.length)).toList
// 排序取前三
list2.sortBy(_._2).reverse.take(3)
}
}
wordtoCount复杂
方法一,同上
package cn.lpc
/**
* 对一个List进行wordCount,返回出现次数最多的三个单词,及其出现次数
* 不过list之间又是一个新的tuple
*/
object WordCountHard {
def main(args: Array[String]): Unit = {
val list = List(("Hello Hadoop Hive Kafka", 4), ("Hello Hadoop Hive", 3), ("Hello Hadoop", 2), ("Hello", 1))
println(list.map(s => (s._1 + " ") * s._2).flatMap(s => s.split(" ")).groupBy(s => s).toList.map(s=>(s._1,s._2.size)).sortBy(_._2).reverse.take(3))
}
}
方法二,
在map时也可以用函数做临时变量
object WordCountHard2 {
def main(args: Array[String]): Unit = {
val list = List(("Hello Hadoop Hive Kafka", 4), ("Hello Hadoop Hive", 3), ("Hello Hadoop", 2), ("Hello", 1))
val tuples: List[(String, Int)] = list.flatMap(
t => {
val strings: Array[String] = t._1.split(" ")
strings.map(s => (s, t._2))
}
)
val stringToTuples: Map[String, List[(String, Int)]] = tuples.groupBy(s => s._1)
val tuples1: List[(String, Int)] = stringToTuples.map(
t => {
(t._1, t._2.map(l => l._2))
}
).map(m => (m._1, m._2.sum)).toList.sortBy(s => s._2).reverse.take(3)
println(tuples1)
}
}
两个map的合并
将两个map相同的key的value相加,没有的key新增
object MapUnion {
def main(args: Array[String]): Unit = {
val map = Map(("a", 1), ("b", 2), ("c", 3))
val map1 = Map(("a", 4), ("d", 5), ("c", 6))
val map3: Map[String, Int] = map1.foldLeft(map)((mapTemp, kv) => {
val map2: Map[String, Int] = mapTemp.updated(kv._1, mapTemp.getOrElse(kv._1, 0) + kv._2)
map2
})
println(map3)
}
}
java
求每个地区单日的平均降雨量,一行到位
object ScalaTest8 {
def main(args: Array[String]): Unit = {
val time1 = List(
( "anheqiao", 1549044122, 10.0 ),
( "shengbeilu", 1549044122, 32.0 ),
( "pinganjie", 1549044122,25.0 )
)
val time2 = List(
( "anheqiao", 1549044122, 13.0 ),
( "shengbeilu", 1549044122, 34.0 ),
( "pinganjie", 1549044122,27.0 )
)
val time3 = List(
( "anheqiao", 1549130531, 14.0 ),
( "shengbeilu", 1549130522, 33.0 ),
( "pinganjie", 1549130522,26.0 )
)
val time4 = List(
( "anheqiao", 1549130542, 11.0 ),
( "shengbeilu", 154913032, 32.0 ),
( "pinganjie", 1549130545,23.0 )
)
val data = time1:::time2:::time3:::time4
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
println(data.map(
m => {
(m._1 + "_" + dateFormat.format((m._2 * 10).toLong), m._3)
}
).groupBy(
t => t._1
).mapValues(
t => {
t.map(
t1 => {
t1._2
}
)
}
).toList.map(
m => {
(m._1, m._2.sum / m._2.size)
}
).map(
t => {
val strings: Array[String] = t._1.split("_")
(strings(0), strings(1), t._2)
}
).groupBy(
t => {
t._1
}
).mapValues(
t => {
t.map(
t1=>{
(t1._2,t1._3)
}
)
}
))
}
}