上一篇来自 : ip归属地统计 II 优化(广播变量)
package com.ws.spark
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 统计日志中ip归属地出现次数导入mysql
*/
object IpFromCountToMysql {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("IpFromCount2").setMaster("local[4]")
val sc = new SparkContext(conf)
//从hdfs中读取规则
val rulesHDFS: RDD[String] = sc.textFile(args(0))
val rules: RDD[(Long, Long, String)] = rulesHDFS.map(line => {
val rules: (Long, Long, String) = IpFromUtils.generalRules(line)
rules
})
//将多个Executor中的ip规则聚合到Driver端
val allRules: Array[(Long, Long, String)] = rules.collect()
//Driver端的数据广播到Executor,广播变量的引用(还