作者:小涛
object Legend01 {
private val logger: Logger = LoggerFactory.getLogger(“Legend”)
def main(args: Array[String]): Unit = {
if (args.length != 1) {
println(
“”"
|请输入applist数据的输入路径:
|input:
“”".stripMargin)
sys.exit(1)
}
//用数组接收所有数据的输入路径
val Array(input) = args
val session = SparkSession.builder() //初始化spark对象
.master(“local[*]”)
.config(“spark.testing.memory”, “2147480000”)
.appName(Legend01.getClass.getSimpleName)
.config(“spark.serializer”, “org.apache.spark.serializer.KryoSerializer”)
.config(“es.nodes”, “ip”)
.config(“es.port”, “9200”)
.config(“es.index.auto.create”, “true”)
.getOrCreate()
//读取applist数据并且去重
var Get = session.read.textFile(input).distinct()
//导入隐士转换
import session.implicits._
val jsonj = Get.map({ t =>
val data = null
try {
val nObject = JSON.parseObject(t)
val imei = nObject.getString(“imei”)
val imsi: String = nObject.getString(“imsi”)
val extras = nObject.getJSONObject(“extras”)
val app_list = extras.getJSONArray(“app_list”)
val trim = app_list.toString.trim
val strings = trim.substring(1, trim.length - 1).split(",")
val result = new StringBuilder
val sb = new StringBuilder
for (i <- strings) {
val quchukongge = i.trim
val str = quchukongge.substring(1, quchukongge.length - 1)
sb.append(str + “,”)
}
result.append(imei + “,” + imsi + “,” + sb).toString()
} catch {
case e: Exception => {
//如果数据不规范跟不是合法数据进行抛异常
logger.error("数据为不合法数据请检查不合法数据是否有用 : " + t)
}
data
}
}
)
val dewq = jsonj.filter(b => b != null).cache()
val mj = dewq.rdd.filter(b => b != null).cache()
val dewr = mj.filter(x => {
// x.contains(“com.aifeng.woshichuanqi.yingyongbao”) || x.contains(“com.tencent.cqsj”) || x.contains(“com.tencent.tmgp.rxcq”) || x.contains(“com.tencent.tmgp.shenghewzcq”) || x.contains(“com.tencent.tmgp.xymobile”) || x.contains(“com.tmcq.anzhi”)
x.contains(“金融”) || x.contains(“贷款”) || x.contains(“信用卡”) || x.contains(“com.hejgjs.shiyuantouzi”) || x.contains(“net.metaquotes.metatrader4”) || x.contains(“disk.micro.com.microdisk”) || x.contains(“com.pme91.guoxin.app.android”) || x.contains(“com.gwtsz.gts2.bj”) || x.contains(“com.pme91.app.android”) || x.contains(“io.dcloud.com.FKqihuo.app”) || x.contains(“com.qhdhcd.cn”) || x.contains(“com.gwtsz.gts2mj1.hx”) || x.contains(“com.baidao.ytxmobile”) || x.contains(“com.jin91.baiyintong”) || x.contains(“com.wattforex”)
})
val Arrt: RDD[(String, String)] = dewr.map(x => {
val de: Array[String] = x.split(",")
val da: String = de(0)
val da01: String = de(1)
(da, da01)
}
)
case class Ao(
imei:String,
imsi:String
)
Arrt.map(
x =>
Ao(
x._1,
x._2
)
)
.saveToEs(“sparktes/test02”)