package com.qf.gp1707.day06
import java.sql.{Connection, Date, DriverManager, PreparedStatement}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 求区域用户访问量(每个省的访问量)
*/
object IPSearch {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("IPSearch")
.setMaster("local[2]")
val sc = new SparkContext(conf)
//獲取全國IP的分配的基礎數據
val ipInfo: Array[(String, String, String)] = sc.textFile("./src/main/scala/com/qf/gp1707/day06/ipsearch/ip.txt").map(line => {
//切分
val fields = line.split("\\|")
//開始IP
val startIP = fields(2)
//結束IP
val endIP = fields(3)
//省份
val province = fields(6)
(startIP, endIP, province)
}).collect()
//將需要廣播的數據廣播到集群中的相應的Executor 广播到所有Executor避免网络大量IO,但是主要内存溢
利用Spark求区域用户访问量(每个省的访问量)
最新推荐文章于 2022-04-17 16:29:10 发布