package utils
import java.sql.DriverManager
object UtilsDemo {
/**
* 定义一个ip转换的成十进制
* @param ip
* @return
*/
def ip2Long(ip:String):Long={
val fragments = ip.split("[.]")
var ipNum =0L
for(i<- 0 until fragments.length){
ipNum = fragments(i).toLong | ipNum << 8L
}
ipNum
}
/**
* 二分查找
* @param lines
* @param ip
* @return
*/
def binarySearch(lines:Array[(Long,Long,String)],ip:Long):Int={
//定义一个初始值
var low =0
//定义一个末位置
var high =lines.length-1
while(low<= high){
val middle =(low +high) /2
if((ip>=lines(middle)._1) && (ip <=lines(middle)._2))
return middle
if (ip< lines(middle)._1)
high = middle -1
else{
low = middle +1
}
}
-1
}
/**
* 定义一个数据库,将数据写入MySQL中
*/
def data2Mysql(part:Iterator[(String,Int)]): Unit ={
//创建一个jdbc的连接
val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/bigdata?serverTimezone=Asia/Shanghai&useSSL=false","root","123456")
val st = conn.prepareStatement("insert into access_log values(?,?)")
part.foreach(data=>{
st.setString(1,data._1)
st.setInt(2,data._2)
st.executeUpdate()
})
conn.close()
st.close()
}
}
package IP
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{SparkConf, SparkContext}
import utils.UtilsDemo
/**
* 通过ip地址查找地区
*/
object IpTest {
def main(args: Array[String]): Unit = {
val conf =new SparkConf().setAppName("IpTest").setMaster("local[*]")
val sc =new SparkContext(conf)
//读取access文件数据
val lines = sc.textFile(args(0))
val access_log = lines.map(line => {
val fields = line.split("[|]")
val startNum = fields(2).toLong
val endNum = fields(3).toLong
val province = fields(6)
(startNum, endNum, province)
})
//将数据收集到Driver端
val access = access_log.collect()
//引用广播变量
val broadcast: Broadcast[Array[(Long, Long, String)]] = sc.broadcast(access)
//读取第二份数据
val lines2 = sc.textFile(args(1))
val ipToProvince = lines2.map(line => {
val fields = line.split("[|]")
val ip = fields(1)
//将ip转换成十进制
val ipNum = UtilsDemo.ip2Long(ip)
val rules: Array[(Long, Long, String)] = broadcast.value
//利用二分查找进行搜索
val index: Int = UtilsDemo.binarySearch(rules, ipNum)
//根据索引找到省份
var province = "未知省份"
if (index != -1) {
province = rules(index)._3
}
(province, 1)
})
val reduced = ipToProvince.reduceByKey(_+_)
//排序
val result = reduced.sortBy(_._2,false)
// result.collect().foreach(println(_))
result.foreachPartition(pait=>{
UtilsDemo.data2Mysql(pait)
})
//关闭资源
sc.stop()
}
}