一、需求:把最终结果存储在mysql中
1、UrlGroupCount1类
import java.net.URL
import java.sql.DriverManager
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 把最终结果存储在mysql中
*/
object UrlGroupCount1 {
def main(args: Array[String]): Unit = {
//1.创建spark程序入口
val conf: SparkConf = new SparkConf().setAppName("UrlGroupCount1").setMaster("local[2]")
val sc: SparkContext = new SparkContext(conf)
//2.加载数据
val rdd1: RDD[String] = sc.textFile("e:/access.log")
//3.将数据切分
val rdd2: RDD[(String, Int)] = rdd1.map(line => {
val s: Array[String] = line.split("\t")
//元组输出
(s(1), 1)
})
//4.累加求和
val rdd3: RDD[(String, Int)] = rdd2.reduceByKey(_+_)