mysql跳过解析ip_ip解析存放MySQL

package ipAndAccress

import org.apache.spark.rdd.RDD

import org.apache.spark.{SparkConf, SparkContext}

import scala.io.Source

case object EtlIp {

def main(args: Array[String]): Unit = {

val conf: SparkConf = new SparkConf().setAppName("ipAndAccress").setMaster("local")

val sc: SparkContext = new SparkContext(conf)

//广播文件

val strings: Iterator[String] = Source.fromFile("D:\\bigdata\\project\\scala2\\src\\main\\scala\\ipAndAccress\\ip.txt").getLines()

val arr = strings.map(t=>{

val arr: Array[String] = t.split("\\|")

val start_time: Long = arr(2).toLong

val end_time: Long = arr(3).toLong

val provice: String = arr(6)

(start_time,end_time,provice)

}).toArray

val b = sc.broadcast(arr)

val file: RDD[String] = sc.textFile("D:\\bigdata\\project\\scala2\\src\\main\\scala\\ipAndAccress\\access.log")

val value: RDD[Array[String]] = file.map(l =>l.split("\\|"))

val num = file.map(t=>{

val classed = t.split("\\|")

val num = (MyUtils.ip2Long(classed(1)))

num

})

val vl = b.value

val tuples = num.map(t=>{

//二分查找

var provice = "未知"

val index = MyUtils.binarySearch(vl,t)

if (index != -1){

provice = vl(index)._3

}

(provice,1)

})

将数据进行聚合

val reduce: RDD[(String, Int)] = tuples.reduceByKey(_+_)

//降序排列

val sort: RDD[(String, Int)] = reduce.sortBy(_._2,false)

//写道数据库

sort.foreachPartition(it=>MyUtils.data2MySQL(it))

sort.foreach(println)

sc.stop()

}

}

package ipAndAccress

import java.sql.{Connection, DriverManager, PreparedStatement}

import scala.io.{BufferedSource, Source}

object MyUtils {

// 算法解析Ip转换十进制

def ip2Long(ip: String): Long = {

val fragments = ip.split("[.]")

var ipNum = 0L

for (i

ipNum = fragments(i).toLong | ipNum << 8L

}

ipNum

}

// 获取需要的起始Ip和结束Ip和地域分布

def readRules(path: String): Array[(Long, Long, String)] = {

//读取ip规则

val bf: BufferedSource = Source.fromFile(path)

val lines: Iterator[String] = bf.getLines()

//对ip规则进行整理

val rules: Array[(Long, Long, String)] = lines.map(line => {

val fileds = line.split("[|]")// 需要转义

val startNum = fileds(2).toLong

val endNum = fileds(3).toLong

val province = fileds(6)

(startNum, endNum, province)

}).toArray

rules

}

def binarySearch(lines: Array[(Long, Long, String)], ip: Long) : Int = {

var low = 0

var high = lines.length - 1

while (low <= high) {

val middle = (low + high) / 2

if ((ip >= lines(middle)._1) && (ip <= lines(middle)._2))

return middle

if (ip < lines(middle)._1)

high = middle - 1

else {

low = middle + 1

}

}

-1

}

def data2MySQL(it: Iterator[(String, Int)]): Unit = {

//一个迭代器代表一个分区,分区中有多条数据

//先获得一个JDBC连接

val conn: Connection = DriverManager.getConnection(

"jdbc:mysql://localhost:3306/spark?characterEncoding=UTF-8", "root", "123")

//将数据通过Connection写入到数据库

val pstm: PreparedStatement = conn.prepareStatement(

"INSERT INTO access_log VALUES (?, ?)")

//将分区中的数据一条一条写入到MySQL中

it.foreach(tp => {

pstm.setString(1, tp._1)

pstm.setInt(2, tp._2)

pstm.executeUpdate()

})

//将分区中的数据全部写完之后,在关闭连接

if(pstm != null) {

pstm.close()

}

if (conn != null) {

conn.close()

}

}

}

xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

4.0.0

SouGouProject

SouGouProject

1.0-SNAPSHOT

1.8

1.8

UTF-8

2.10.6

1.6.3

2.6.4

org.scala-lang

scala-library

${scala.version}

com.typesafe.akka

akka-actor_2.10

2.3.14

mysql

mysql-connector-java

5.1.42

com.typesafe.akka

akka-remote_2.10

2.3.14

org.apache.spark

spark-core_2.10

${spark.version}

org.apache.spark

spark-sql_2.10

${spark.version}

org.apache.hadoop

hadoop-client

${hadoop.version}

src/main/spark

net.alchim31.maven

scala-maven-plugin

3.2.2

compile

testCompile

-dependencyfile

${project.build.directory}/.scala_dependencies

org.apache.maven.plugins

maven-shade-plugin

2.4.3

package

shade

*:*

META-INF/*.SF

META-INF/*.DSA

META-INF/*.RSA

access.log 数据样例

20090121000132095572000|125.213.100.123|show.51.com|/shoplist.php?phpfile=shoplist2.php&style=1&sex=137|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0(Compatible Mozilla/4.0(Compatible-EmbeddedWB 14.59 http://bsalsa.com/ EmbeddedWB- 14.59 from: http://bsalsa.com/ )|http://show.51.com/main.php|

20090121000132124542000|117.101.215.133|www.jiayuan.com|/19245971|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TencentTraveler 4.0)|http://photo.jiayuan.com/index.php?uidhash=d1c3b69e9b8355a5204474c749fb76ef|__tkist=0; myloc=50%7C5008; myage=2009; PROFILE=14469674%3A%E8%8B%A6%E6%B6%A9%E5%92%96%E5%95%A1%3Am%3Aphotos2.love21cn.com%2F45%2F1b%2F388111afac8195cc5d91ea286cdd%3A1%3A%3Ahttp%3A%2F%2Fimages.love21cn.com%2Fw4%2Fglobal%2Fi%2Fhykj_m.jpg; last_login_time=1232454068; SESSION_HASH=8176b100a84c9a095315f916d7fcbcf10021e3af; RAW_HASH=008a1bc48ff9ebafa3d5b4815edd04e9e7978050; COMMON_HASH=45388111afac8195cc5d91ea286cdd1b; pop_1232093956=1232468896968; pop_time=1232466715734; pop_1232245908=1232469069390; pop_1219903726=1232477601937; LOVESESSID=98b54794575bf547ea4b55e07efa2e9e; main_search:14469674=%7C%7C%7C00; registeruid=14469674; REG_URL_COOKIE=http%3A%2F%2Fphoto.jiayuan.com%2Fshowphoto.php%3Fuid_hash%3D0319bc5e33ba35755c30a9d88aaf46dc%26total%3D6%26p%3D5; click_count=0%2C3363619

20090121000132406516000|117.101.222.68|gg.xiaonei.com|/view.jsp?p=389|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; CIBA)|http://home.xiaonei.com/Home.do?id=229670724|_r01_=1; __utma=204579609.31669176.1231940225.1232462740.1232467011.145; __utmz=204579609.1231940225.1.1.utmccn=(direct)

20090121000132581311000|115.120.36.118|tj.tt98.com|/tj.htm|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)|http://www.tt98.com/|

20090121000132864647000|123.197.64.247|cul.sohu.com|/20071227/n254338813_22.shtml|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)|http://cul.sohu.com/20071227/n254338813_22.shtml|ArticleTab=visit:1; IPLOC=unknown; SUV=0901080709152121; vjuids=832dd37a1.11ebbc5d590.0.b20f858f14e918; club_chat_ircnick=JaabvxC4aaacQ; spanel=%7B%22u%22%3A%22%22%7D; vjlast=1232467312,1232467312,30

20090121000133296729000|222.55.57.176|down.chinaz.com|/|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; iCafeMedia; TencentTraveler 4.0)||cnzz_a33219=0; vw33219=%3A18167791%3A; sin33219=http%3A//www.itxls.com/wz/wyfx/it.html; rtime=0; ltime=1232464387281; cnzz_eid=6264952-1232464379-http%3A//www.itxls.com/wz/wyfx/it.html

20090121000133331104000|123.197.66.93|www.pkwutai.cn|/down/downLoad-id-45383.html|Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7)|http://www.baidu.com/s?tn=b1ank_pg&ie=gb2312&bs=%C3%C0%C6%BC%B7%FE%D7%B0%B9%DC%C0%ED%C8%ED%BC%FE&sr=&z=&cl=3&f=8&wd=%C6%C6%BD%E2%C3%C0%C6%BC%B7%FE%D7%B0%B9%DC%C0%ED%C8%ED%BC%FE&ct=0|

20090121000133446262000|115.120.12.157|v.ifeng.com|/live/|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; CIBA)|http://www.ifeng.com/|userid=1232466610953_4339; location=186; sclocationid=10002; vjuids=22644b162.11ef4bc1624.0.63ad06717b426; vjlast=1232466614,1232467297,13

ip.txt

1.0.1.0|1.0.3.255|16777472|16778239|亚洲|中国|福建|福州||电信|350100|China|CN|119.306239|26.075302

1.0.8.0|1.0.15.255|16779264|16781311|亚洲|中国|广东|广州||电信|440100|China|CN|113.280637|23.125178

1.0.32.0|1.0.63.255|16785408|16793599|亚洲|中国|广东|广州||电信|440100|China|CN|113.280637|23.125178

1.1.0.0|1.1.0.255|16842752|16843007|亚洲|中国|福建|福州||电信|350100|China|CN|119.306239|26.075302

1.1.2.0|1.1.7.255|16843264|16844799|亚洲|中国|福建|福州||电信|350100|China|CN|119.306239|26.075302

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值