离线批量数据,频繁请求接口匹配国家,不太合适,这里使用离线库方式。
GeoIP2下载: https://dev.maxmind.com/geoip/geoip2/downloadable/
本次使用库,下载地址 :https://download.csdn.net/download/qq_36470898/16103168
由于需要定时任务执行,我把库上传到hdfs,并通过mapPartitions 方式匹配出国家
依赖:
<dependency>
<groupId>com.maxmind.geoip2</groupId>
<artifactId>geoip2</artifactId>
<version>2.14.0</version>
</dependency>
代码
package com.sm.test
import java.io.InputStream
import java.net.InetAddress
import com.maxmind.db.CHMCache
import com.maxmind.geoip2.DatabaseReader
import com.sm.common.constants.Constants
import com.sm.utils.SparkUtil
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.log4j.{Level, Logger}
import org.slf4j.LoggerFactory
object TestGeolite2 {
private var logger: org.slf4j.Logger = _
private val fs = FileSystem.get(new Configuration())
def main(args: Array[String]): Unit = {
logger = LoggerFactory.getLogger(this.getClass.getSimpleName)
Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.spark_project.jetty").setLevel(Level.WARN)
val start = System.currentTimeMillis()
val spark = SparkUtil.getSparkSession(this.getClass.getSimpleName, Constants.SPARK_LOCAL_MODE) // 初始化spark
import spark.implicits._
import spark.sql
val ds = sql("select * from hw_data.ods_cp_hw_role_pay where date = '2021-03-11' limit 40") //读取hive表
val dff = ds.mapPartitions(
t => {
val url = "hdfs://BigdataCluster/flink/user-libs/GeoLite2-City_2021-02-04.mmdb" // hdfs 地址
val geoDbInputStream: InputStream = fs.open(new Path(url))
val geoIPResolver = new DatabaseReader.Builder(geoDbInputStream).withCache(new CHMCache()).build()
t.map {
item => {
val ipStr = item.getAs("ip").toString // 获取ip字段值
var country = ""
try {
val inetAddress = InetAddress.getByName(ipStr)
val geoResponse = geoIPResolver.city(inetAddress)
country = geoResponse.getCountry.getIsoCode
} catch {
case e: Exception => e.printStackTrace()
}
(ipStr, country)
}
}
}
).toDF("ip", "country")
dff.show(40)
spark.stop()
val end = System.currentTimeMillis()
logger.warn(s"=================== 耗时: ${(end - start) / 1000} 秒 ===================")
}
}
效果
到这里就批量将ip转换成国家,希望对你有帮助