Spark读取本地数据到数据库

主程序

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object ProvinceAndIpDemo {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setMaster("local[*]").setAppName("ProvinceAndIpDemo")
    val sc = new SparkContext(conf)

    //读取ip文件
    //如需读取hdfs上的文件只需修改路径即可,hadoop集群记得启动
    val ip = sc.textFile("E:\\study\\Date\\ip.txt")

    //数据截取
    val partIpRules: RDD[(Long, Long, String)] = ip.map(line => {
      val ipRules = line.split("[|]")
      val startIp = ipRules(2).toLong
      val endIp = ipRules(3).toLong
      val province = ipRules(6)
      (startIp, endIp, province)
    })
    //将ip文件内容拉去到Driver端
    val allIpRules: Array[(Long, Long, String)] = partIpRules.collect()

    //广播
    val broadCastIp: Broadcast[Array[(Long, Long, String)]] = sc.broadcast(allIpRules)

    //读取待处理文件
    val accessLog = sc.textFile("E:\\study\\Date\\access.log")

    //字段截取
    val provinceAndOne = accessLog.map(line => {
      val ip = line.split("[|]")(1)
      val ipLong = MyUtils.ip2Long(ip)
      //将数据拉取到executor里
      val allIpRulesExecutor: Array[(Long, Long, String)] = broadCastIp.value
      val index = MyUtils.binarySearch(allIpRulesExecutor, ipLong)
      var province = "未知省份"
      //判断ip文件中是否包含待处理文件的ip地址
      if (index != -1) {
        province = allIpRulesExecutor(index)._3
      }
      (province, 1)
    })
    //聚合
    val redeced: RDD[(String, Int)] = provinceAndOne.reduceByKey(_+_)

    //排序
    val result: RDD[(String, Int)] = redeced.sortBy(_._2,false)

    //打印到窗口
    result.collect().foreach(println)

    //打印到数据库
    result.foreachPartition(MyUtils.ipLocationData2MySQL)

    //关闭资源
    sc.stop()
  }
}

工具代码

import java.sql.DriverManager

object MyUtils {

  //ip转换成long类型
  def ip2Long(ip:String):Long ={
    val fragments = ip.split("[.]")
    var ipNum =0L
    for(i<- 0 until fragments.length){
      ipNum = fragments(i).toLong | ipNum << 8L
    }
    ipNum
  }

  //二分法
  def binarySearch(lines: Array[(Long,Long,String)],ip: Long):Int ={
    var low =0
    var high =lines.length-1
    while(low <=high){
      val middle =(low+high)/2
      if((ip>=lines(middle)._1) && (ip<=lines(middle)._2))
        return middle
      if(ip < lines(middle)._1)
        high=middle -1
      else{
        low =middle +1
      }
    }
    -1
  }

  //连接数据库
  def ipLocationData2MySQL(it:Iterator[(String,Int)])={

    //获取数据库连接
    val conn = DriverManager.getConnection("jdbc:mysql://hadoop01:3306/xiaoniu?characterEncoding=UTF-8", "root", "123456")
    //sql语句
    val statement = conn.prepareStatement("insert into ip_local values(?,?)")
    it.foreach(tp=>{
      //给定值
      statement.setString(1,tp._1)
      statement.setInt(2,tp._2)
      //执行
      statement.executeLargeUpdate()
    })

    //关闭资源
    if(statement != null) statement.close()
    if(conn != null) conn.close()
  }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值