利用 GeoLite2-City 将服务器日志中的IP 信息,解析成更加直观有效的地域信息。
话不多说,直接上代码:
实际运用时代码要根据需求稍加改动,像我是读取parquet 日志文件,分析后将结果写入MySQL。
package come.prince.spark.demo
import java.io.File
import java.net.InetAddress
import java.util.UUID
import com.maxmind.db.CHMCache
import com.maxmind.geoip2.DatabaseReader
import com.maxmind.geoip2.model.CityResponse
import com.prince.spark.util.ConnectionPool
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.json4s.DefaultFormats
/**
* ip地域转化
* Created by prince on 2018/3/27.
*/
object BXD_IPConversion {
implicit val formats = DefaultFormats
Logger.getLogger("org").setLevel(Level.WARN)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local").getOrCreate()
val input = spark.read.parquet("E:\\NB\\log\\bxd\\parquet\\20180523.parquet")
input.createOrReplaceTempView("table")
val ip = spark.sql("select 20180523 as date,client_IP,count(client_IP) from table group by client_IP").rdd
.map(line => line.mkString(",").replace("[", "").replace("]", ""))
ip.foreachPartition(p => {
val url = "E:\\NB\\GeoLite2-City.mmdb\\GeoLite2-City.mmdb"
val geoDB = new File(url)
val geoIPResolver = new DatabaseReader.Builder(geoDB).withCache(new CHMCache()).build()
def resolve_ip(resp: CityResponse): (String, String, String) = {
(resp.getCountry.getNames.get("zh-CN"), resp.getSubdivisions.get(0).getNames.get("zh-CN"), resp.getCity.getNames.get("zh-CN"))
}
val conn = ConnectionPool.getConnection
p.foreach(x => {
if (x!=null && x!=""){
try {
val address = InetAddress.getByName(x.split(",")(1))
val geoResponse = geoIPResolver.city(address)
val result = resolve_ip(geoResponse)
val sql = "insert into ip_position(id,date,ip,country,province,city,num) values(" +
"'"+UUID.randomUUID+"'," +
"'"+x.split(",")(0)+"'," +
"'"+x.split(",")(1)+"'," +
"'"+result._1+"'," +
"'"+result._2+"'," +
"'"+result._3+"'," +
"'"+ x.split(",")(2)+"'" +
")"
val stmt = conn.createStatement
stmt.executeUpdate(sql)
}catch {
case e: Exception =>
val sql = "insert into ip_position(id,date,ip,country,province,city,num) values(" +
"'"+UUID.randomUUID+"'," +
"'"+x.split(",")(0)+"'," +
"'"+x.split(",")(1)+"'," +
"'局域网'," +
"'局域网'," +
"'局域网'," +
"'"+x.split(",")(2)+"'" +
")"
val stmt = conn.createStatement
stmt.executeUpdate(sql)
}
}
})
ConnectionPool.returnConnection(conn)
})
spark.stop()
}
}
日志格式:
root
|-- request_time: string (nullable = true)
|-- request_ID: string (nullable = true)
|-- app_ID: string (nullable = true)
|-- client_IP: string (nullable = true)
|-- request_host: string (nullable = true)
|-- request_method: string (nullable = true)
|-- request_url: string (nullable = true)
|-- http_status: integer (nullable = true)
|-- bytes: integer (nullable = true)
|-- response_time: integer (nullable = true)
|-- request_parameter: string (nullable = true)
|-- result_parameter: string (nullable = true)
|-- resultNum_parameter: integer (nullable = true)
|-- resultMessage_parameter: string (nullable = true)
附上 GeoLite2-City 的下载链接:https://download.csdn.net/download/qq_39869388/10433881