package com._51doit.spark.Day07 import java.sql.{Connection, Driver, DriverManager, PreparedStatement, SQLException} import com._51doit.spark.utils.IpUtils import org.apache.spark.broadcast.Broadcast import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.rdd.RDD object LocalToMysql { def main(args: Array[String]): Unit = { //1.0.32.0|1.0.63.255|16785408|16793599|亚洲|中国|广东|广州||电信|440100|China|CN|113.280637|23.125178 val conf = new SparkConf().setAppName(this.getClass.getCanonicalName).setMaster("local[*]") val sc = new SparkContext(conf) //先读取iP规则数据 val ipRuleLines: RDD[String] = sc.textFile( "data\\ip.txt") val spIpAndProvinceAndCity: RDD[(Long, Long, String, String)] = ipRuleLines.map(i => { val strings = i.split("[|]") val startTen = strings.apply(2).toLong val endTen = strings.apply(3).toLong val province = strings.apply(6) val city = strings.apply(7) (startTen, endTen, province, city) }) val InPutDrive: Array[(Long, Long, String, String)] = spIpAndProvinceAndCity.collect() val InDriver: Broadcast[Array[(Long, Long, String, String)]] = sc.broadcast(InPutDrive) //20090121000132095572000|125.213.100.123|show.51.com|/shoplist.php?phpfile=shoplist2.php&style=1&sex=137|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0(Compatible Mozilla/4.0(Compatible-EmbeddedWB 14.59 http://bsalsa.com/ EmbeddedWB- 14.59 from: http://bsalsa.com/ )|http://show.51.com/main.php| val ipaccessLines = sc.textFile("data\\ipaccess.log") val value: RDD[(String, Int)] = ipaccessLines.map(i => { val ipaccessSplit = i.split("[|]") val www = ipaccessSplit.apply(1) val tenWWW: Long = IpUtils.ip2Long(www) val InDriverValue: Array[(Long, Long, String, String)] = InDriver.value val i1: Int = IpUtils.bir(InDriverValue, tenWWW) var province = "未知" if (i1 != -1) { province = InDriverValue(i1)._3 } (province, 1) }) val value1: RDD[(String, Int)] = value.reduceByKey(_ + _) value1.foreachPartition(dataToMysql) sc.stop() } val dataToMysql: Iterator[(String, Int)] => Unit = (f: Iterator[(String,Int)])=>{ var conn:Connection=null var stat:PreparedStatement=null try { conn=DriverManager.getConnection("jdbc:mysql://localhost:3306/spark?characterEncoding=UTF-8" , "root", "root" ) stat= conn.prepareStatement("INSERT INTO city values(?,?)") f.foreach(i => { stat.setString(1, i._1) stat.setInt(2, i._2) stat.addBatch() }) stat.executeBatch() () } catch { case e:SQLException => }finally { if(stat!=null){ stat.close() } if(conn!=null){ conn.close() } } } } 工具类
package com._51doit.spark.utils import scala.collection.mutable.ArrayBuffer object IpUtils { /** * 将IP地址转成十进制 * * @param ip * @return */ def ip2Long(ip: String): Long = { val fragments = ip.split("[.]") var ipNum = 0L for (i <- 0 until fragments.length) { ipNum = fragments(i).toLong | ipNum << 8L } ipNum } /** * 二分法查找 * * @param lines * @param ip * @return */ def binarySearch(lines: ArrayBuffer[(Long, Long, String, String)], ip: Long): Int = { var low = 0 //起始 var high = lines.length - 1 //结束 while (low <= high) { val middle = (low + high) / 2 if ((ip >= lines(middle)._1) && (ip <= lines(middle)._2)) return middle if (ip < lines(middle)._1) high = middle - 1 else { low = middle + 1 } } -1 //没有找到 } def binarySearch(lines: Array[(Long, Long, String, String)], ip: Long): Int = { var low = 0 //起始 var high = lines.length - 1 //结束 while (low <= high) { val middle = (low + high) / 2 if ((ip >= lines(middle)._1) && (ip <= lines(middle)._2)) return middle if (ip < lines(middle)._1) high = middle - 1 else { low = middle + 1 } } -1 //没有找到 } def bir (lines: Array[(Long,Long,String,String)],line:Long):Int={ var start = 0 var end = lines.length - 1 while (start<=end){ val mind=(start+end)/2 if(line>=lines(mind)._1 && (line<=lines(mind)._2)) return mind if(line>lines(mind)._1) { start=mind+1 }else { end=mind-1 } } -1 } }