/**
* step1:注册一个本地/hdfs
* step2:在open方法中获取到分布式缓存的内容即可
*/
object IPLocation {
def main(args: Array[String]): Unit ={
val env = ExecutionEnvironment.getExecutionEnvironment
val path ="./data/iplocation"// step1:注册一个本地/hdfs文件
env.registerCachedFile(path,"iplocation")import org.apache.flink.api.scala._
val dataSet = env.fromCollection(createData())
dataSet.map(newRichMapFunction[(String, String, Int, Int, Int, String),(String, String, Int, Int, Int, String)]{
var iplocations = scala.collection.mutable.Map.empty[String, String]// step2:在open方法中获取到分布式缓存的内容即可
override def open(parameters: Configuration): Unit ={
val dcFile =getRuntimeContext().getDistributedCache().getFile("iplocation")
val lines = FileUtils.readLines(dcFile)import scala.collection.JavaConverters._
for(ele <- lines.asScala){
val ss = ele.split("\\s")iplocations(ss(0))=ss(1)}}
override def map(x:(String, String, Int, Int, Int, String)):(String, String, Int, Int, Int, String)={
var address ="--"if(iplocations.contains(x._2)){
address =iplocations(x._2)}(x._1, address, x._3, x._4, x._5, x._6)}}).filter(x =>{
x._6.equals("异常")}).map(x =>(x._1, x._2)).print()}
def createData(): Seq[(String, String, Int, Int, Int, String)]={
val ips = List[String]("192.168.100.1","192.168.100.2","192.168.100.3","192.168.100.4","192.168.100.5","192.168.100.6","192.168.100.7","192.168.100.8","192.168.100.9","192.168.100.10")
val lables =List("正常","异常")
val r = scala.util.Random
val list =(1 to 10000).map(x =>{
val index = r.nextInt(10)
val s =10+ r.nextInt(50)
val month = r.nextInt(10)
val day =10+ r.nextInt(18)
val hour = r.nextInt(10)
val minute =10+ r.nextInt(50)
val time =2019+"-0"+ month +"-"+ day +" 0"+ hour +":"+ minute +":"+ s
var ip =ips(index)if(index >7){
ip ="xxxx"}(time, ip, r.nextInt(101), r.nextInt(101), r.nextInt(101),lables(r.nextInt(2)))})
list
}}