广播变量写入mysql

package com._51doit.spark.Day07

import java.sql.{Connection, Driver, DriverManager, PreparedStatement, SQLException}

import com._51doit.spark.utils.IpUtils
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object LocalToMysql {
  def main(args: Array[String]): Unit = {

//1.0.32.0|1.0.63.255|16785408|16793599|亚洲|中国|广东|广州||电信|440100|China|CN|113.280637|23.125178
    val conf = new SparkConf().setAppName(this.getClass.getCanonicalName).setMaster("local[*]")
    val sc = new SparkContext(conf)
    //先读取iP规则数据
    val ipRuleLines: RDD[String] = sc.textFile( "data\\ip.txt")
    val spIpAndProvinceAndCity: RDD[(Long, Long, String, String)] = ipRuleLines.map(i => {

      val strings = i.split("[|]")
      val startTen = strings.apply(2).toLong
      val endTen = strings.apply(3).toLong
      val province = strings.apply(6)
      val city = strings.apply(7)
      (startTen, endTen, province, city)
    })

    val InPutDrive: Array[(Long, Long, String, String)] = spIpAndProvinceAndCity.collect()
    val InDriver: Broadcast[Array[(Long, Long, String, String)]] = sc.broadcast(InPutDrive)
//20090121000132095572000|125.213.100.123|show.51.com|/shoplist.php?phpfile=shoplist2.php&style=1&sex=137|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0(Compatible Mozilla/4.0(Compatible-EmbeddedWB 14.59 http://bsalsa.com/ EmbeddedWB- 14.59  from: http://bsalsa.com/ )|http://show.51.com/main.php|

    val ipaccessLines = sc.textFile("data\\ipaccess.log")


    val value: RDD[(String, Int)] = ipaccessLines.map(i => {

      val ipaccessSplit = i.split("[|]")

      val www = ipaccessSplit.apply(1)

      val tenWWW: Long = IpUtils.ip2Long(www)

      val InDriverValue: Array[(Long, Long, String, String)] = InDriver.value

      val i1: Int = IpUtils.bir(InDriverValue, tenWWW)

      var province = "未知"

      if (i1 != -1) {
          province = InDriverValue(i1)._3
      }
      (province, 1)
    })
    val value1: RDD[(String, Int)] = value.reduceByKey(_ + _)
    value1.foreachPartition(dataToMysql)
    sc.stop()


  }

  val dataToMysql: Iterator[(String, Int)] => Unit = (f: Iterator[(String,Int)])=>{
    var conn:Connection=null
    var stat:PreparedStatement=null

    try {
      conn=DriverManager.getConnection("jdbc:mysql://localhost:3306/spark?characterEncoding=UTF-8"
        , "root",
        "root"
      )
     stat= conn.prepareStatement("INSERT INTO city values(?,?)")
      f.foreach(i => {
        stat.setString(1, i._1)
        stat.setInt(2, i._2)
        stat.addBatch()
      })
      stat.executeBatch()
      ()
    } catch {
      case e:SQLException =>
    }finally {
      if(stat!=null){
        stat.close()
      }
      if(conn!=null){
        conn.close()
      }

    }
  }

}


工具类
package com._51doit.spark.utils

import scala.collection.mutable.ArrayBuffer

object IpUtils {

  /**
    * 将IP地址转成十进制
    *
    * @param ip
    * @return
    */

  def ip2Long(ip: String): Long = {
    val fragments = ip.split("[.]")
    var ipNum = 0L
    for (i <- 0 until fragments.length) {
      ipNum = fragments(i).toLong | ipNum << 8L
    }
    ipNum
  }

  /**
    * 二分法查找
    *
    * @param lines
    * @param ip
    * @return
    */
  def binarySearch(lines: ArrayBuffer[(Long, Long, String, String)], ip: Long): Int = {
    var low = 0 //起始
    var high = lines.length - 1 //结束
    while (low <= high) {
      val middle = (low + high) / 2
      if ((ip >= lines(middle)._1) && (ip <= lines(middle)._2))
        return middle
      if (ip < lines(middle)._1)
        high = middle - 1
      else {
        low = middle + 1
      }
    }
    -1 //没有找到
  }

  def binarySearch(lines: Array[(Long, Long, String, String)], ip: Long): Int = {
    var low = 0 //起始
    var high = lines.length - 1 //结束
    while (low <= high) {
      val middle = (low + high) / 2
      if ((ip >= lines(middle)._1) && (ip <= lines(middle)._2))
        return middle
      if (ip < lines(middle)._1)
        high = middle - 1
      else {
        low = middle + 1
      }
    }
    -1 //没有找到
  }
  def bir (lines: Array[(Long,Long,String,String)],line:Long):Int={
    var start = 0
    var end = lines.length - 1
    while (start<=end){
      val mind=(start+end)/2
      if(line>=lines(mind)._1 && (line<=lines(mind)._2))
            return mind

      if(line>lines(mind)._1) {
      start=mind+1
      }else
        {
          end=mind-1
        }
    }
         -1

  }

}

 

 

 

 

 

 

 

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值