数据准备
allprovince.txtproduct.txt
需求一:统计没有农贸市场的省份有哪些?
- 对农产品数据进行处理(过滤脏数据)
- 对第一步处理后的数据列裁剪,只获取省份信息
- 对第二步获取到的所有省份信息去重
- 对省份数据与第三步获取到的去重的省份信息求差集,得到的就是没有农贸市场的省份
package com.atguigu.demo
import scala.io.Source
object demo1 {
def main(args: Array[String]): Unit = {
val allProvincesList = Source.fromFile("datas/allprovince.txt").getLines().toList
val productList = Source.fromFile("datas/product.txt").getLines().toList
val productFilterList = productList.filter(_.split("\t").size == 6)
val productSelectList = productFilterList.map(line => {
val arr = line.split("\t")
arr(4)
})
val disList = productSelectList.distinct
val result = allProvincesList.diff(disList)
result.foreach(println(_))
}
}
需求二:获取农产品种类数最多的三个省份?
- 对农产品数据进行处理(过滤掉脏数据)
- 对第一步处理后的数据列裁剪,只保留农产品和省份信息
- 对第二步处理后的数据进行去重
- 对第三步处理后的数据按照省份进行分组(groupBy)
- 对第四步处理后的数据进行map,获取每个省份的农产品种类数
- 对第五步处理后的数据进行排序(取前三)
package com.atguigu.demo
import scala.io.Source
object demo2 {
def main(args: Array[String]): Unit = {
val productList = Source.fromFile("datas/product.txt").getLines().toList
val filterList = productList.filter(_.split("\t").size == 6)
val selectList = filterList.map(line => {
val arr = line.split("\t")
(arr(4), arr(0))
})
val disList = selectList.distinct
val groupedMap = disList.groupBy({
case (provice, name) => provice
})
val reduceList = groupedMap.map(x => {
(x._1, x._2.length)
})
val result = reduceList.toList.sortBy {
case (provice, num) => num
}.reverse.take(3)
result.foreach(print(_))
}
}
需求三获取每个省份农产品种类数最多的三个农贸市场?
- 统计每个省份每个农贸市场的菜的种类数
- 按照省份进行分组
- 对每个省所有农贸市场数据排序取前三
package com.atguigu.demo
import scala.io.Source
object demo3 {
def main(args: Array[String]): Unit = {
val productList = Source.fromFile("datas/product.txt").getLines().toList
val filterList = productList.filter(_.split("\t").size == 6)
val selectList = filterList.map(line => {
val arr = line.split("\t")
(arr(4), arr(3), arr(0))
})
val disList = selectList.distinct
val pmGroupedMap = disList.groupBy({
case (provice, market, name) => (provice, market)
})
val pmNumList = pmGroupedMap.map(x => {
(x._1, x._2.length)
})
val proviceMap = pmNumList.groupBy({
case ((provice, market), num) => provice
})
val result = proviceMap.map(x => {
val top3List = x._2.toList.sortBy {
case ((provice, market), num) => num
}.reverse.take(3)
val top3 = top3List.map {
case ((provice, market), num) => (market, num)
}
(x._1,top3)
})
result.foreach(println(_))
}
}