Flink集成Kafka 并将评估状态存入状态让外界查询

最新推荐文章于 2024-01-28 19:54:19 发布

笑而抿之乎

最新推荐文章于 2024-01-28 19:54:19 发布

阅读量386

点赞数

文章标签： flink kafka 大数据

本文链接：https://blog.csdn.net/weixin_46001623/article/details/105465150

版权

Flink集成Kafka 并将评估状态存入状态让外界查询

首先需要在工具类上添加两个get方法用来获取应用信息应用名登录用户名来作为状态键

有关这个工具类请看前篇博客《接风控责任链之测试与数据抽取工具类》

 //取登录应用信息应用名
    public static String getUserIdentify(String input){
        //指定一个验证数据对象
        EvaluateData evaluateData = new EvaluateData();
        //获取匹配体
        Matcher matcher = EvaluateUtil.LEGAL_PATTERN.matcher(input);
        if(matcher.find()){
            String group = matcher.group(2);
            return group;
        }
        return null;
    }
    //取登录用户名
    public static String getApplicationName(String input){
        //指定一个验证数据对象
        EvaluateData evaluateData = new EvaluateData();
        System.out.println("走这里了");
        //获取匹配体
        Matcher matcher = EvaluateUtil.LEGAL_PATTERN.matcher(input);
        if(matcher.find()){
            String group = matcher.group(4);
            return group;
        }
        return null;
    }

其次要把之前的评估项目引入到目前写的集成Flink计算引擎项目

<!--        引入前期写的评估模型-->
        <dependency>
            <groupId>com.baizhi</groupId>
            <artifactId>EvaluateModel</artifactId>
            <version>1.0-SNAPSHOT</version>
        </dependency>

当然其他的需要用到的根据需求来引

 <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.9.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>1.10.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>2.2.0</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <!--scala编译插件-->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>4.0.1</version>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <!--创建fatjar插件-->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!--编译插件-->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.2</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
                <executions>
                    <execution>
                        <phase>compile</phase>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

集成Flink 计算机引擎并连接Kafka

import java.util.Properties
import com.baizhi.enties.{EvaluateReport, HistoryData}
import com.baizhi.evaluate.impl.{AreaEvaluate, DeviceEvaluate, InputFeatureEvaluate, SimilarityEvaluate, SpeedEvaluate, TimeSlotEvaluate, TotalEvaluate}
import com.baizhi.evaluate.{Evaluate, EvaluateChain}
import com.baizhi.until.EvaluateUtil
import com.baizhi.update.impl.{GeoPointsUpdate, HistoryCities, HistoryDeviceInformations, HistoryLoginTime, HistoryLoginTimeSlot, HistoryOrdernessPasswords, LatestInputFeatures}
import com.baizhi.update.{Updater, UpdaterChain}
import org.apache.flink.api.common.functions.{ReduceFunction, RichMapFunction}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.StateTtlConfig.{StateVisibility, UpdateType}
import org.apache.flink.api.common.state.{MapState, MapStateDescriptor, ReducingState, ReducingStateDescriptor, StateTtlConfig, ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.time.Time
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
import java.util.{ArrayList => JList}

import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.Semantic
import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema
import org.codehaus.jackson.map.ObjectMapper

object UserLoginEvaluateTopology {
  def main(args: Array[String]): Unit = {

    //1.创建流计算执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //间隔5s执行一次checkpoint 精准一次
    env.enableCheckpointing(5000,CheckpointingMode.EXACTLY_ONCE)
    //设置检查点超时 4s
    env.getCheckpointConfig.setCheckpointTimeout(4000)
    //开启本次检查点 与上一次完成的检查点时间间隔不得小于 2s 优先级高于 checkpoint interval
    env.getCheckpointConfig.setMinPauseBetweenCheckpoints(2000)
    //如果检查点失败，任务宣告退出 setFailOnCheckpointingErrors(true)
    env.getCheckpointConfig.setTolerableCheckpointFailureNumber(0)
    //设置如果任务取消，系统该如何处理检查点数据
    //RETAIN_ON_CANCELLATION:如果取消任务的时候，没有加--savepoint，系统会保留检查点数据
    //DELETE_ON_CANCELLATION:取消任务，自动是删除检查点（不建议使用）
    env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)

    //2.创建DataStream - 细化
    val props = new Properties()
    props.setProperty("bootstrap.servers", "SparkTwo:9092")//设置路径
    props.setProperty("group.id", "g1") //设置消费组
    //3.读源 （1.消费主题，2.反序列化，3.传入配置文件）
    val lines = env.addSource(new FlinkKafkaConsumer[String]("userlogin",new SimpleStringSchema(),props))
    //输入到kafka
     val kafakaSink = new FlinkKafkaProducer[EvaluateReport]("defult_topic",
    new UserDefinedKeyedSerializationSchema, props, Semantic.AT_LEAST_ONCE)

    //执行DataStream的转换算子
    lines.filter(EvaluateUtil.isLegal(_))
      .map(line=>
        (EvaluateUtil.getApplicationName(line)+":"+EvaluateUtil.getUserIdentify(line),line)
      )
      .keyBy(t=> t._1)//将第一个设为键同时也是KedState 的状态键
      .map(new UserHistoryDataMapFunction )
      .filter(_.getApplicationName!=null)
      .addSink(kafakaSink)
    //5.执行流计算任务
    env.execute("UserLoginEvaluateTopology")

  }
  //将数据放到kafka中间件的配置
  class UserDefinedKeyedSerializationSchema extends KeyedSerializationSchema[EvaluateReport]{
    override def serializeKey(t: EvaluateReport): Array[Byte] = null

    override def serializeValue(t: EvaluateReport): Array[Byte] = t.toString.getBytes()

    override def getTargetTopic(t: EvaluateReport): String = {
      if(t.getApplicationName.contains("QQ")){
          return  "topic_qq"
      }else if (t.getApplicationName.contains("wx")){
        return "topic_wx"
      }
      "topic_all"
    }
  }


  class UserHistoryDataMapFunction extends RichMapFunction[(String, String), EvaluateReport] {
    //存储用户的历史状态   之所以用String 是要用json 传对象
    var historyDataState: ValueState[String] = _
    var evaluateReportState: MapState[String, String] = _
    //记录一天以内用户登录总次数
    var numLoginCountState: ReducingState[Int] = _
    var updates: JList[Updater] = _
    var evaluates: JList[Evaluate] = _

    override def open(parameters: Configuration): Unit = {
      //声明历史数据描述
      val historyDataDescriptor = new ValueStateDescriptor[String]("HistoryData", createTypeInformation[String])
      //设置历史状态可查询
      historyDataDescriptor.setQueryable("queryHistoryData")
      //声明评估数据描述
      val evaluateReportStateDescriptor = new MapStateDescriptor[String, String]("EvaluateReport", createTypeInformation[String], createTypeInformation[String])
      //设置评估状态可查询
      evaluateReportStateDescriptor.setQueryable("queryEvaluateReport")

      //当天登录次数描述
      val numLoginCountDescriptor = new ReducingStateDescriptor[Int]("numLoginCount", new ReduceFunction[Int] {
        override def reduce(value1: Int, value2: Int): Int = value1 + value2
      }, createTypeInformation[Int])

      //开启numLoginCount TTL特性、设置为1天以内
      val ttlConfig = StateTtlConfig.newBuilder(Time.days(1))
        .cleanupFullSnapshot()//设置快照清除
        .cleanupInRocksdbCompactFilter(1000)//设置RocksDB清除策略
        .setUpdateType(UpdateType.OnCreateAndWrite)//只有修改操作，才会更新时间
        .setStateVisibility(StateVisibility.NeverReturnExpired)//设置过期数据永不返回
        .build()
      //配置TTL
      numLoginCountDescriptor.enableTimeToLive(ttlConfig)

      historyDataState = getRuntimeContext.getState(historyDataDescriptor)//历史状态
      numLoginCountState = getRuntimeContext.getReducingState(numLoginCountDescriptor)//当天登录次数
      evaluateReportState = getRuntimeContext.getMapState(evaluateReportStateDescriptor)//登录状态
      //所有的更新链条
      updates = new JList[Updater]()
      //创建历史数据更新的链属性
      updates.add(new GeoPointsUpdate)
      updates.add(new HistoryCities)
      updates.add(new HistoryDeviceInformations)
      updates.add(new HistoryLoginTime)
      updates.add(new HistoryLoginTimeSlot)
      updates.add(new HistoryOrdernessPasswords)
      updates.add(new LatestInputFeatures)

      //创建评估链的属性
      evaluates = new JList[Evaluate]()
      evaluates.add(new AreaEvaluate)
      evaluates.add(new DeviceEvaluate)
      evaluates.add(new InputFeatureEvaluate)
      evaluates.add(new SimilarityEvaluate(0.9))
      evaluates.add(new SpeedEvaluate(750.0))
      evaluates.add(new TimeSlotEvaluate(1))
      evaluates.add(new TotalEvaluate(2))
      println("xixi")
    }

    override def map(value: (String, String)): EvaluateReport = {
      //获取用户历史状态
      var historyDataJson = historyDataState.value()

      //使用jackson序列化和反序列化  解耦  应用中可查这个状态
      val mapper = new ObjectMapper()

      var historyData: HistoryData = null
      if (historyDataJson == null) {
        historyData = new HistoryData()
      } else {
        //通过jackson的反序列化
        historyData = mapper.readValue(historyDataJson, classOf[HistoryData])
      }
      var evaluateReport = new EvaluateReport();
      //判断日志类型
      //如果是登录成功的数据
      if (EvaluateUtil.isLoginSuccess(value._2)) { //更新historyData
        //解析
        val loginSuccessData = EvaluateUtil.parseLoginSuccessData(value._2)
        val updaterChain = new UpdaterChain(updates)
        updaterChain.doChain(loginSuccessData, historyData)
        //更新历史状态               序列化成字符串
        historyDataState.update(mapper.writeValueAsString(historyData))
        println("guofen"+historyDataState.value())
      } else if (EvaluateUtil.isEvaluate(value._2)) { //走评估逻辑

        val evaluateData = EvaluateUtil.parseEvaluateData(value._2)
        //创建一个报告
        val evaluateReport = new EvaluateReport(evaluateData.getApplicationName,
          evaluateData.getUserIdentify,
          evaluateData.getLoginSequence,
          evaluateData.getEvaluateTime,
          evaluateData.getCityName,
          evaluateData.getGeoPoint
        );
        //创建评估链
        val evaluateChain = new EvaluateChain(evaluates)
        evaluateChain.daChain(evaluateData, historyData, evaluateReport)

        //存储用户的评估状态，建议(日期:登录序列形式。每一次put之前删除一周以前所有评估状态)   把评估报告序列化成了json
        evaluateReportState.put(evaluateData.getLoginSequence, mapper.writeValueAsString(evaluateReport))

        return evaluateReport
      }
       evaluateReport;
    }
  }
}

将其打包放在WEB UI进行运行
在这里插入图片描述

注意：如果评估模型项目jar 包引不过去记得在评估模型项目的

点install 会将它的jar 打包放在maven 本地仓库供其引用
如果这个项目代码又改动记得更新仓库了的这个jar

附加状态查询

在上述流计算中，有3个状态值

1,历史数据
2.每天的登录次数
3.评估报告

业务系统可以通过，查询评估报告，完成用户登录风险的等级评定此处属于异步实现
同步实现：业务系统进行登录风险的评估计算，需要云计算暴露 ”历史数据的查询接口”

注意所要输入的job ID 是这个id

查询历史数据

import org.apache.flink.api.common.{ExecutionConfig, JobID}
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.queryablestate.client.QueryableStateClient
import org.apache.flink.api.scala._
object History {
  def main(args: Array[String]): Unit = {
   /**
    * 查历史状态数据
    * */
   //链接proxy（代理）服务器
   val client = new QueryableStateClient("SparkTwo", 9069)
    var jobID=JobID.fromHexString("d7dea1883d218879af44bfe1e30f6194")
    var queryName="queryHistoryData" //状态名字
    var queryKey="张三:QQ"       //用户需要查询的 key
   //创建一个状态描述器
    var rsd=new ValueStateDescriptor[String]("queryHistoryData",TypeInformation.of(classOf[String]).createSerializer(new ExecutionConfig))
   //查询,获取完整的查询结果
   val resultFuture = client.getKvState(jobID, queryName, queryKey, createTypeInformation[String], rsd)
   //同步获取结果
    val state: ValueState[String] = resultFuture.get()
  //输出
   println("结果："+state.value())
  //结束
    client.shutdownAndWait()
  }
}

评估状态查询

import org.apache.flink.api.common.state.{MapState, MapStateDescriptor}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.{ExecutionConfig, JobID}
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.queryablestate.client.QueryableStateClient

object ReadReportQueryableState {
  def main(args: Array[String]): Unit = {
    //创建一个可查询客户端
    val client = new QueryableStateClient("pro1",9069)
    //设置jobID
    val id = JobID.fromHexString("d9751ed5b01767a4c83025351b58527e")
    //设置查询的状态名
    val name = "queryEvaluateReport"
    //设置key的类型
    val ty: TypeInformation[String] = createTypeInformation[String]
    //创建一个状态描述器
    val des = new MapStateDescriptor[String,String]("xxxxx",
      createTypeInformation[String].createSerializer(new ExecutionConfig),
      createTypeInformation[String].createSerializer(new ExecutionConfig))

    //查询,获取完整的查询结果
    val result = client.getKvState(id,name,"张三:QQ",ty,des)

    //同步获取结果
     val state: MapState[String, String] = result.get()
     //输出
     println(state.entries())
    //停止
    client.shutdownAndWait()

  }
}

测试数据向Kafka 生产者发送

[root@SparkTwo kafka_2.11-2.2.0]# ./bin/kafka-console-producer.sh --broker-list SparkTwo:9092 --topic userlogin
>INFO 2020-04-03 10:12:01 QQ SUCCESS [张三] 6ebaf4ac780f40f486359f3ea6934620 "103158" jinan "116.4,39.5" [1250,14000,2000] "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36\"
>INFO 2020-04-03 10:12:01 QQ SUCCESS [张三] 6ebaf4ac780f40f486359f3ea6934620 "103158" jinan "116.4,39.5" [1250,14000,2000] "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36\"

INFO 2020-04-03 10:12:00 WX SUCCESS [张三] 6ebaf4ac780f40f486359f3ea6934623 “456123” Zhengzhou “114.4,34.5” [1400,16000,2100] “Mozilla/8.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36”

INFO 2020-04-02 10:50:00 QQ SUCCESS [张三] 6ebaf4ac780f40f486359f3ea6934622 “123456” Beijing “116.4,39.5” [1300,17000,2200] “Mozilla/7.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36”

INFO 2020-04-02 10:52:00 QQ EVALUATE [张三] 6ebaf4ac780f40f486359f3ea6934622 “123456” Beijing “116.4,39.5” [1300,17000,2200] “Mozilla/7.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36”

INFO 2020-04-03 10:54:01 WX EVALUATE [张三] 6ebaf4ac780f40f486359f3ea6934620 “123458” Zhengzhou “114.4,34.5” [1250,14000,2000] “Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36”
351