根据IP地址计算访问日志中每一个归属地的出现的次数
# 根据IP地址计算访问日志中每一个归属地的出现的次数(用累计器实现各个省份的访问次数)
##实现
object _09_IP {
def main(args: Array[String]): Unit = {
val sc: SparkContext = SparkUtils.getSparkContext
//1.0.1.0|1.0.3.255|16777472|16778239|亚洲|中国|福建|福州||电信|350100|China|CN|119.306239|26.075302
val ipRDD1 = sc.textFile("D:\\hdp_demo\\spark\\ip\\ip.txt")
//20090121000132095572000|125.213.100.123...
val accessRDD1 = sc.textFile("D:\\hdp_demo\\spark\\ip\\ipaccess.log")
val ipRDD2 = ipRDD1.map(line => {
val arr: Array[String] = line.split("\\|")
(arr(0), arr(1),arr(6))
})
val list: List[(String, String, String)] = ipRDD2.collect().toList
val bc: Broadcast[List[(String, String, String)]] = sc.broadcast(list)
val accessRDD2 =accessRDD1.map(line => {
val arr = line.split("\\|")
arr(1)
})
val proRDD = accessRDD2.map(elem => {
val list: List[(String, String, String)] = bc.value
val province: String = IPUtils.binarySearch(list, elem)
province
})
//获取自定义的累加器
val acc = new MyAccumulator
//注册累加器
sc.register(acc)
//用累计器实现各个省份的访问次数
proRDD.foreach(elem => {
acc.add(elem)
})
val value: mutable.Map[String, Int] = acc.value
//把结果,写到mysql中
//注册驱动
classOf[Driver]
//获得连接
val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/sql_01?characterEncoding=utf8",
"root","123456")
var ps: PreparedStatement = null
for (elem <- value) {
if(!elem._1.equals("未知")){
//获取预编译对象
ps = conn.prepareStatement("insert into spark_city (name,count) values (? ,?)")
ps.setString(1,elem._1)
ps.setInt(2,elem._2)
ps.executeUpdate()
}
}
//关闭资源
ps.close()
conn.close()
sc.stop()
}
}
```scala
##工具类
```scala
object IPUtils {
/**
* 方法:接收一个String类型的IP如:192.168.100.100,返回一个数字如:3232261220
* @param ip int类型的ip
* @return Long值
*/
def ipToLong(ip: String): Long = {
//注意:IP个原始面貌:
//10111111.10111010.11110000.11110000
val ipArr: Array[Int] = ip.split("[.]").map(s => Integer.parseInt(s))
var ipnum = 0L
for (i <- ipArr) {
//<<表示位运算左移 ,0L左移之后还是0L,二进制形式:00000000.00000000.00000000.00000000
//其他数,左移之后,后面补0
//|表示位运算或,或的特点是,与0进行或,返回本身
ipnum = i | (ipnum << 8)
}
ipnum
}
/**
* 根据二分查找,确定归属地
* @param listIp 集合
* @param ip ip
* @return 省份
*/
def binarySearch(listIp: List[(String, String, String)], ip: String): String = {
var left= 0
var right = listIp.length - 1
while (left <= right) {
var middle = ( left + right) / 2
if(ipToLong(ip) >= ipToLong(listIp(middle)._1) && ipToLong(ip) <= ipToLong(listIp(middle)._2) ) {
return listIp(middle)._3
}
if (ipToLong(ip) > ipToLong(listIp(middle)._2)) {
left = middle + 1
}
if (ipToLong(ip) < ipToLong(listIp(middle)._1) ){
right = middle - 1
}
}
"未知"
}
}
```scala
##自定义累加器
```scala
/**
* 自定义累加器
*/
class MyAccumulator extends AccumulatorV2[String,mutable.Map[String,Int]]{
private val map: mutable.Map[String, Int] = mutable.Map[String, Int]()
//确认初始化状态
override def isZero: Boolean = {
map.isEmpty
}
override def copy(): AccumulatorV2[String, mutable.Map[String, Int]] = new MyAccumulator
//重置,清空map集合
override def reset(): Unit = {
map.clear()
}
//一个分区内相加
override def add(v: String): Unit = {
val i = map.getOrElse(v, 0) + 1
map.update(v,i)
}
//全局进行聚合操作
override def merge(other: AccumulatorV2[String, mutable.Map[String, Int]]): Unit = {
val otherMap: mutable.Map[String, Int] = other.value
otherMap.foreach(tp => {
val i = map.getOrElse(tp._1,0) + tp._2
map.update(tp._1,i)
})
}
override def value: mutable.Map[String, Int] = map
}
```scala