spark之broadcast后分析数据并行分区保存到mysql

package cn.bw.spark.day03

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import java.util.Date

import org.apache.spark.{SparkConf, SparkContext}

object IpLocaltion extends App {

  //二分查找
  def binarySearch(lines: Array[String], ip: Long): Int = { //用你这个ip传到array中去找
    var low = 0
    var high = lines.length - 1  //最大是数组长度减去一 长度从0开始
    while (low <= high) {   //当low 小于 high的时候
      val middle = (low + high) / 2  //我就取中间的值
      //lines(middle) 意思是取array数组中间的值的第二个值 转换成long类型
      //如果当前的ip大于你数组中的最小值 并且小于你数组中的最大值 就返回它了
      if ((ip >= lines(middle).split("\\|")(2).toLong) && (ip <= lines(middle).split("\\|")(3).toLong))
        return middle
      if (ip < lines(middle).split("\\|")(2).toLong)
        high = middle - 1
      else {
        low = middle + 1
      }
    }
    -1
  }

  //转换ip为数字
  def ip2Long(ip: String): Long = {
    val fragments = ip.split("[.]")
    var ipNum = 0L
    for (i <- 0 until fragments.length) {
      ipNum = fragments(i).toLong | ipNum << 8L
    }
    ipNum
  }

  val conf = new SparkConf().setAppName("IpLocaltion").setMaster("local[2]")
  val sc = new SparkContext(conf)
  val rdd1 = sc.textFile("e:/IPLog").map(line => {
    line.split("\\|")(1) //相当于缩表 另一种优化方式
  }).distinct() //把IP去重
  //println(rdd1.collect().toBuffer)

  //读取规则库 到driver
  val rules = sc.textFile("e:/ip.txt").collect()
  val broadCast = sc.broadcast(rules) //广播变量


  //方法二:计算完成的结果放到数据库   driver去写 效率低
  //  def data2Mysql(arr: Array[(String, Int)]) = {
  //    var con: Connection = null
  //    var psmt: PreparedStatement = null //Statement  PrepareStatement   1防止sql 注入  2.效率高
  //    //加载驱动
  //    try {
  //      Class.forName("com.mysql.jdbc.Driver")
  //      //获取连接
  //      con = DriverManager.getConnection("jdbc:mysql://localhost:3306/bigdata?user=root&password=123456&characterEncoding=utf-8")
  //      //sql 模版
  //      val sql: String = "insert into localtion_info values(null,?,?,?)"
  //      arr.foreach(t => {
  //        psmt = con.prepareStatement(sql)
  //        //设置参数
  //        psmt.setString(1, t._1)
  //        psmt.setInt(2, t._2)
  //        //  psmt.setDate(3,new Date(System.currentTimeMillis()))
  //        psmt.setObject(3, new Date())
  //        //发送sql语句
  //        psmt.executeUpdate()
  //      })
  //    }
  //    catch {
  //      case e: SQLException => e.printStackTrace()
  //    } finally {
  //      if (con != null) {
  //        con.close()
  //      }
  //      if (psmt != null) {
  //        psmt.close()
  //      }
  //    }
  //  }

  def data2Mysql2(it: Iterator[(String, Int)]) = {
    var con: Connection = null
    var psmt: PreparedStatement = null //Statement  PrepareStatement   1防止sql 注入  2.效率高
    //加载驱动
    try {
      Class.forName("com.mysql.jdbc.Driver")
      //获取连接
      con = DriverManager.getConnection("jdbc:mysql://localhost:3306/bigdata?user=root&password=123456&characterEncoding=utf-8")
      //sql 模版
      val sql: String = "insert into localtion_info values(null,?,?,?)"
      it.foreach(t => {
        psmt = con.prepareStatement(sql)
        //设置参数
        psmt.setString(1, t._1)
        psmt.setInt(2, t._2)
        //  psmt.setDate(3,new Date(System.currentTimeMillis()))
        psmt.setObject(3, new Date())
        //发送sql语句
        psmt.executeUpdate()
      })
    }
    catch {
      case e: SQLException => e.printStackTrace()
    } finally {
      if (con != null) {
        con.close()
      }
      if (psmt != null) {
        psmt.close()
      }
    }
  }

  val rdd2 = rdd1.map(ips => {
    //将Ip换成数字
    val ipLong = ip2Long(ips)
    //调用二分查找
    val index = binarySearch(broadCast.value, ipLong) //返回此元素在规则库中第几个元素 就是下标
    val row = broadCast.value(index) //得到每一行
    val provice = row.split("\\|")(6) //下标从0开始 取省份
    (provice, 1)
  }).reduceByKey(_ + _)


  rdd2.foreachPartition(data2Mysql2(_))
  // data2Mysql(rdd2.collect())
  // println(rdd2.collect().toBuffer)   //返回的是array
  // data2Mysql(rdd2.collect())
  sc.stop() //记得关闭
}

e:IPLog
20090121000132095572000|125.213.100.123|show.51.com|/shoplist.php?phpfile=shoplist2.php&style=1&sex=137|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0(Compatible Mozilla/4.0(Compatible-EmbeddedWB 14.59 http://bsalsa.com/ EmbeddedWB- 14.59  from: http://bsalsa.com/ )|http://show.51.com/main.php|
20090121000132124542000|117.101.215.133|www.jiayuan.com|/19245971|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TencentTraveler 4.0)|http://photo.jiayuan.com/index.php?uidhash=d1c3b69e9b8355a5204474c749fb76ef|__tkist=0; myloc=50%7C5008; myage=2009; PROFILE=14469674%3A%E8%8B%A6%E6%B6%A9%E5%92%96%E5%95%A1%3Am%3Aphotos2.love21cn.com%2F45%2F1b%2F388111afac8195cc5d91ea286cdd%3A1%3A%3Ahttp%3A%2F%2Fimages.love21cn.com%2Fw4%2Fglobal%2Fi%2Fhykj_m.jpg; last_login_time=1232454068; SESSION_HASH=8176b100a84c9a095315f916d7fcbcf10021e3af; RAW_HASH=008a1bc48ff9ebafa3d5b4815edd04e9e7978050; COMMON_HASH=45388111afac8195cc5d91ea286cdd1b; pop_1232093956=1232468896968; pop_time=1232466715734; pop_1232245908=1232469069390; pop_1219903726=1232477601937; LOVESESSID=98b54794575bf547ea4b55e07efa2e9e; main_search:14469674=%7C%7C%7C00; registeruid=14469674; REG_URL_COOKIE=http%3A%2F%2Fphoto.jiayuan.com%2Fshowphoto.php%3Fuid_hash%3D0319bc5e33ba35755c30a9d88aaf46dc%26total%3D6%26p%3D5; click_count=0%2C3363619
20090121000132406516000|117.101.222.68|gg.xiaonei.com|/view.jsp?p=389|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; CIBA)|http://home.xiaonei.com/Home.do?id=229670724|_r01_=1; __utma=204579609.31669176.1231940225.1232462740.1232467011.145; __utmz=204579609.1231940225.1.1.utmccn=(direct)
20090121000132581311000|115.120.36.118|tj.tt98.com|/tj.htm|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)|http://www.tt98.com/|
20090121000132864647000|123.197.64.247|cul.sohu.com|/20071227/n254338813_22.shtml|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)|http://cul.sohu.com/20071227/n254338813_22.shtml|ArticleTab=visit:1; IPLOC=unknown; SUV=0901080709152121; vjuids=832dd37a1.11ebbc5d590.0.b20f858f14e918; club_chat_ircnick=JaabvxC4aaacQ; spanel=%7B%22u%22%3A%22%22%7D; vjlast=1232467312,1232467312,30
20090121000133296729000|222.55.57.176|down.chinaz.com|/|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; iCafeMedia; TencentTraveler 4.0)||cnzz_a33219=0; vw33219=%3A18167791%3A; sin33219=http%3A//www.itxls.com/wz/wyfx/it.html; rtime=0; ltime=1232464387281; cnzz_eid=6264952-1232464379-http%3A//www.itxls.com/wz/wyfx/it.html
20090121000133331104000|123.197.66.93|www.pkwutai.cn|/down/downLoad-id-45383.html|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7)|http://www.baidu.com/s?tn=b1ank_pg&ie=gb2312&bs=%C3%C0%C6%BC%B7%FE%D7%B0%B9%DC%C0%ED%C8%ED%BC%FE&sr=&z=&cl=3&f=8&wd=%C6%C6%BD%E2%C3%C0%C6%BC%B7%FE%D7%B0%B9%DC%C0%ED%C8%ED%BC%FE&ct=0|
20090121000133446262000|115.120.12.157|v.ifeng.com|/live/|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; CIBA)|http://www.ifeng.com/|userid=1232466610953_4339; location=186; sclocationid=10002; vjuids=22644b162.11ef4bc1624.0.63ad06717b426; vjlast=1232466614,1232467297,13
20090121000133456256000|115.120.7.240|cqbbs.soufun.com|/3110502342~-1~2118/23004348_23004348.htm|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; CIBA)||new_historysignlist=%u534E%u6DA6%u4E8C%u5341%u56DB%u57CE%7Chttp%3A//cqbbs.soufun.com/board/3110502342/%7C%7C%u9A8F%u9038%u7B2C%u4E00%u6C5F%u5CB8%7Chttp%3A//cqbbs.soufun.com/board/3110169184/%7C%7C%u793E%u533A%u4E4B%u661F%7Chttp%3A//cqbbs.soufun.com/board/sqzx/%7C%7C; SoufunSessionID=2y5xyr45kslc0zbdooqnoo55; viewUser=1; vjuids=-870e9088.11ee89aba57.0.be9c3d988def8; vjlast=1232263101,1232380806,11; new_viewtype=1; articlecolor=#000000; usersms_pop_type=1; articlecount=186; __utma=101868291.755195653.1232450942.1232450942.1232450942.1; __utmz=101868291.1232450942.1.1.utmccn=(referral)
20090121000133586141000|117.101.219.241|12.zgwow.com|/launcher/index.htm|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)||
20090121000133744103000|123.197.49.171|2.82yyy.com|/32/webpage/L/2.Html|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; QQDownload 1.7; TencentTraveler ; Maxthon; .NET CLR 1.1.4322)|http://2.82yyy.com/32/webpage/L/1.Html|cnzz_a998284=3; vw998284=%3A52225577%3A68566865%3A68566789%3A68566815%3A; sin998284=none; rtime=0; ltime=1232466017187; cnzz_eid=1870962-1232464084-; cnzz_a1021073=3; vw1021073=%3A34926533%3A; sin1021073=none; 61kkk=1,1232464210281
20090121000133757842000|117.101.213.104|game.7679.com|/scroll.php|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CIBA)|http://game.7679.com/games/1021/|cnzz_a30008507=7; rtime=2; ltime=1232466389781; cnzz_eid=12877395-http%3A//apps.51.com/application.php%3Fapp_key%3D4a99277cca695a34ba39719399030076; 4a99277cca695a34ba39719399030076_user=tangqingqing33; 4a99277cca695a34ba39719399030076_session_key=1b203792173c71e961fd8cafdf011f9d; 4a99277cca695a34ba39719399030076_time=1232466378; 4a99277cca695a34ba39719399030076=cf9441753f0b3312fd76b18b68261287
20090121000134038848000|115.120.10.205|bf.bearcn.com|/user.asp?userid=9795|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TencentTraveler ; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 1.1.4322; .NET CLR 2.0.50727)|http://bf.bearcn.com/Photo.asp?page=7|ASPSESSIONIDCATABTSR=MDIFCDPCMMGJBDEBMJCPCGHF; BearCN=viewid=20944; ASPSESSIONIDCAQDCQSR=OEDPCHPCOJIKCGECBIFLAOGI
20090121000134178887000|117.101.218.147|www.baidu.com|/|test||BAIDUID=4221AC111420E40EFA125AEC596813B7:FG=1
20090121000134259104000|115.120.17.80|www.sjshu.com|/bookdown/ShowSoftDown.asp?UrlID=1&SoftID=22222|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7; .NET CLR 2.0.50727)|http://www.sjshu.com/bookdown/200803/22222.shtml|ASPSESSIONIDQQASTRAD=CEJGLDLCADCNKJOPLAMEKDJJ; AJSTAT_ok_pages=3; AJSTAT_ok_times=1; ppad_cookie_0=1
20090121000134372468000|117.101.220.175|www.zhaodll.com|/dll/softdown.asp?softid=306|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)|http://www.zhaodll.com/dll/k/200607/306.html|ASPSESSIONIDQSTACDQD=HGCADNMCGHEGNPAFENHNDHKM; cnzz_a206791=0; vw206791=%3A60592519%3A; sin206791=http%3A//www.baidu.com/s%3Fwd%3Dksuser.dll%26tn%3Dyjhy_dg%26bar%3D; rtime=0; ltime=1232467247934; cnzz_eid=75465914-1232467247-http%3A//www.baidu.com/s%3Fwd%3Dksuser.dll%26tn%3Dyjhy_dg%26bar%3D
20090121000134389671000|123.197.66.12|video.baidu.com|/v?ct=301989888&rn=20&pn=0&db=0&s=8&word=%C9%AB%BC%B4%CA%C7%BF%D52%CC%F0%D0%D4%C9%AC%B0%AE|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)|http://web.gougou.com/bfs?search=%e8%89%b2%e5%8d%b3%e6%98%af%e7%a9%ba%32%e7%94%9c%e6%80%a7%e6%b6%a9%e7%88%b1&page=1&id=10000001&f=1&t=-1|BAIDUID=7AEB83E2A2E24FE200CE048A70DCBD9E:FG=1; BDSTAT=cd2b93848358eb180fb30f2442a7d933c895d143ad4bd11373f0820258afe324
20090121000134422762000|125.213.100.236|longma168.com|/al/468x60-1.htm|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)|http://www.longwang.biz/pm.php?action=view&pmid=1985410|
20090121000134802198000|123.197.66.208|webim.51.com|/webim/main.php?to=|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CIBA)|http://my.51.com/webim/index.php?refer=/|

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值