Flink项目之电商实时数据分析(三)
本文承接上一篇(二):https://blog.csdn.net/weixin_38255444/article/details/104820912
五:flink实时业务开发
-
maven文件导入
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>FlinkCase</artifactId> <groupId>com.ityouxin</groupId> <version>1.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> <artifactId>realProcess</artifactId> <properties> <scala.binary.version>2.11</scala.binary.version> <flink.version>1.6.0</flink.version> <hadoop.version>2.6.0</hadoop.version> <hbase.version>1.2.0</hbase.version> <cdh.version>cdh5.14.0</cdh.version> </properties> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/clouderarepos</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.9_${scala.binary.version} </artifactId> <version>${flink.version}</version> </dependency><!-- https://mvnrepository.com/artifact/org.apache.flink/flinkshaded-jackson --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-shaded-jackson</artifactId> <version>2.7.9-2.0</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.9.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbaseclient --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}-${cdh.version}</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbaseserver --> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbaseserver --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.0-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-scala_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency><groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_${scala.binary.version} </artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.binary.version} </artifactId> <version>${flink.version}</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoopcommon --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}-${cdh.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}-${cdh.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> <exclusions> <exclusion> <groupId>com.google.protobuf</groupId> <artifactId>protobuf-java</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-hbase_2.11</artifactId> <version>${flink.version}</version> </dependency> </dependencies> <!--⽣生产环境--> <profiles> <profile> <id>dev</id> <activation> <activeByDefault>true</activeByDefault><property> <name>env</name> <value>Dev</value> </property> </activation> <build> <resources> <resource> <directory>src/main/resources/dev</directory> </resource> </resources> </build> </profile> <!--开发环境--> <profile> <id>pro</id> <activation> <property> <name>env</name> <value>pro</value> </property> </activation> <build> <resources> <resource> <directory>src/main/resources/pro</directory> </resource> </resources> </build> </profile> <!--测试环境--> <profile> <id>test</id> <activation> <property> <name>env</name> <value>test</value> </property> </activation> <build> <resources> <resource> <directory>src/main/resources/test</directory> </resource> </resources> </build> </profile> </profiles> </project>
- 添加生产、开发、测试环境
resources文件
生产:dev
开发:pro
测试:test
- 添加配置文件application.conf
#kafka配置 bootstrap.servers="hadoop01:9092,hadoop02:9092,hadoop03:9092" zookeeper.connect="hadoop01:2181,hadoop02:2181,hadoop03:2181" input.topic="test" group.id="test" #hbase hbase.master="hadoop01:60000" hbase.zookeeper.quorum="hadoop01:2181,hadoop02:2181,hadoop03:2181" hbase.zookeeper.property.clientPort="2181" hbase.rpc.timeout="60000" hbase.client.operation.timeout="20000" hbase.client.scanner.timeout.period="200000"
- 开发获取配置文件的工具类
package com.ityouxin.tools import com.typesafe.config.ConfigFactory object GlobalConfigUtils { //使用ConfigFactory加载资源文件 private val conf = ConfigFactory.load() //获取资源文件配置 def bootstrapServers:String = conf.getString("bootstrap.servers") def zookeeperConnect:String = conf.getString("zookeeper.connect") def inputTopic:String = conf.getString("input.topic") def groupId:String = conf.getString("group.id") def autoCommit:String = conf.getString("enable.auto.commit") def autoCommitInterval:String = conf.getString("auto.commit.interval.ms") def autoOffsetReset:String = conf.getString("auto.offset.reset") def zookeeperQuorum:String = conf.getString("hbase.zookeeper.quorum") def hbaseMaster:String = conf.getString("hbase.master") def clientPort:String = conf.getString("hbase.zookeeper.property.clientPort") def rpcTimeout:String = conf.getString("hbase.rpc.timeout") def operatorTimeout:String = conf.getString("hbase.client.operator.timeout") def timeoutPeriod:String = conf.getString("hbase.client.scanner.timeout.period") def maxDelayTime:Long = conf.getString("maxDelayTime").toLong def timeWindow:Long = conf.getString("timeWindow").toLong }
- 问题与分析
1、在代码开发之前,必须要考虑如下⼏几点:
1:如何确保数据在处理理过程中的安全性—防丢失
2:如果⽹网络出现延迟情况,应该怎么解决(⽹网络延迟,很坑批次1的数据在批次N中处理理,导致数据理理过程中⽆无法做到实时的⼀一致性)2、解决问题(数据丢失)
在flink开发中,上述问题是⾮非常容易易解决的:
checkpoint机制是Flink可靠性的基⽯石,可以保证Flink集群在某个算⼦子因为某些原因(如 异常退出)出现故障时,能够将整个应⽤用流图的状态恢复到故障之前的某⼀一状态,保 证应⽤用流图状态的⼀一致性。 Flink的checkpoint机制原理理来⾃自“Chandy-Lamport algorithm”算法。
每个需要checkpoint的应⽤用在启动时, Flink的JobManager为其创建⼀一个CheckpointCoordinator,CheckpointCoordinator全权负责本应⽤用的快照制作1. CheckpointCoordinator周期性的向该流应⽤用的所有source算⼦子发送barrier。 2.当某个source算⼦子收到⼀一个barrier时,便便暂停数据处理理过程,然后将⾃自⼰己的当前状 态制作成快 照,并保存到指定的持久化存储中,最后向CheckpointCoordinator报告 ⾃自⼰己快照制作情况,同 时向⾃自身所有下游算⼦子⼴广播该barrier,恢复数据处理理 3.下游算⼦子收到barrier之后,会暂停⾃自⼰己的数据处理理过程,然后将⾃自身的相关状态制作成快照,并 保存到指定的持久化存储中,最后向CheckpointCoordinator报告⾃自身 快照情况,同时向⾃自身所 有下游算⼦子⼴广播该barrier,恢复数据处理理。 4. 每个算⼦子按照步骤3不不断制作快照并向下游⼴广播,直到最后barrier传递到sink算⼦子,快照制作完 成。 5. 当CheckpointCoordinator收到所有算⼦子的报告之后,认为该周期的快照制作成功; 否则,如 果在规定的时间内没有收到所有算⼦子的报告,则认为本周期快照制作失败 6. ⽬目前, Checkpoint持久化存储可以使⽤用如下三种: MemStateBackend FsStateBackend RocksDBStateBackend
-
解决问题(网络延迟)
源源不不断的数据流是⽆无法进⾏行行统计⼯工作的,因为数据流没有边界,就⽆无法统计到底有多少数据经过了了这个流。也⽆无法统计数据流中的最⼤大值,最⼩小值,平均值,累加值等信息。
如果在数据流上,截取固定⼤大⼩小的⼀一部分,这部分是可以进⾏行行统计的。 截取⽅方式主要有两种,1.现实世界中的时间是不不⼀一致的,在flink中被划分为事件时间,提取时间,处理理时间三种。 2.如果以EventTime为基准来定义时间窗⼝口那将形成EventTimeWindow,要求消息本身就应该携带 EventTime 2.如果以IngesingtTime为基准来定义时间窗⼝口那将形成IngestingTimeWindow,以source的 systemTime为准。 2.如果以ProcessingTime基准来定义时间窗⼝口那将形成ProcessingTimeWindow,以operator 的systemTime为准。 对于flink最初设计的时候,就考虑到了了⽹网络延迟,⽹网络乱序等问题,所以提出了了⼀一个抽象概念基座⽔水印(WaterMark);
-
APP驱动类开发
package com.ityouxin.process import java.util.Properties import org.apache.flink.streaming.api.watermark.Watermark import com.alibaba.fastjson.{JSON, JSONObject} import com.ityouxin.process.bean.{Message, UserBrowse} import com.ityouxin.process.task.ChannelRealHotTask import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09 import org.apache.htrace.fasterxml.jackson.databind.module.SimpleSerializers import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.flink.streaming.api.scala._ object App { def main(args: Array[String]): Unit = { //初始化senv val senv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment //开启flink窗口,延迟加载数据 senv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) senv.setParallelism(1) //使用flink来拉取kafka的数据 //加载kfaka的配置 val props: Properties = new Properties() props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,GlobalConfigUtils.bootstrapServers) props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,GlobalConfigUtils.groupId) props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer") props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer") props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,GlobalConfigUtils.autoCommit) props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,GlobalConfigUtils.autoCommitInterval) props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,GlobalConfigUtils.autoOffsetReset) val kafkaSource = new FlinkKafkaConsumer09[String](GlobalConfigUtils.inputTopic,new SimpleStringSchema(),props) //读取kafka的数据,加载成DS val logDataStream: DataStream[String] = senv.addSource(kafkaSource) //将消息体转换为一个对象 val messageDS: DataStream[Message] = logDataStream.map( log => { println(log) val jSONObject: JSONObject = JSON.parseObject(log) val count = jSONObject.get("count").toString.toInt val message: String = jSONObject.get("message").toString val timestamp: Long = jSONObject.get("timestamp").toString.toLong Message(UserBrowse.toBean(message), count, timestamp) } ) //加入水印,延迟加载 val watermarkDS: DataStream[Message] = messageDS.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Message]{ var currentMaxTimestamp = 0L var maxDelayTime = GlobalConfigUtils.maxDelayTime override def getCurrentWatermark:Watermark = { new Watermark(currentMaxTimestamp - maxDelayTime) } override def extractTimestamp(element: Message, previousElementTimestamp: Long) = { //取出最大的时间戳 val timestamp:Long =element.timestamp //比较当前时间错和最大的时间戳 currentMaxTimestamp = Math.max(timestamp,currentMaxTimestamp) timestamp } }) //业务处理 //1、实时频道热点 ChannelRealHotTask.process(watermarkDS) } }
-
业务一:处理理实时频道热点
所谓实时频道热点,就是不不同频道,在规定的时间内的点击量量
1. task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelRealHot, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.RealHostMap import com.ityouxin.process.reduce.ChannelRealHotReduce import com.ityouxin.process.sink.ChannelRealHotSink import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow //业务1—处理理实时频道热点 //使用接口来进行任务的调用 object ChannelRealHotTask extends DataProcess{ //实现 实时频道热点分析 override def process(ds: DataStream[Message]): Unit = { //转换 val flatMapDS: DataStream[ChannelRealHot] = ds.flatMap(new RealHostMap) //按照频道ID进行 分流 val keyByedDS: KeyedStream[ChannelRealHot, Long] = flatMapDS.keyBy(item => item.channelID) //时间窗口 val windowedDS: WindowedStream[ChannelRealHot, Long, TimeWindow] = keyByedDS.timeWindow(Time.seconds(GlobalConfigUtils.timeWindow)) //对频道的点击量进行统计 val reduceDS: DataStream[ChannelRealHot] = windowedDS.reduce(new } } 2. bean package com.ityouxin.process.bean case class ChannelRealHot( channelID:Long,count:Long) package com.ityouxin.process.bean case class Message( userBrowse: UserBrowse,count: Int,timestamp:Long) 3. interface package com.ityouxin.process.inerface import com.ityouxin.process.bean.Message import org.apache.flink.streaming.api.scala.DataStream trait DataProcess { def process(ds:DataStream[Message]) } 4.flatmap收集 package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelRealHot, Message} import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class RealHostMap extends FlatMapFunction[Message,ChannelRealHot]{ override def flatMap(value: Message, out: Collector[ChannelRealHot]): Unit = { //得到用户浏览样例类的 val channelId:Long = value.userBrowse.channelId //收集频道ID out.collect(ChannelRealHot(channelId,1)) } } 5. reduce聚合 package com.ityouxin.process.reduce import com.ityouxin.process.bean.ChannelRealHot import org.apache.flink.api.common.functions.ReduceFunction class ChannelRealHotReduce extends ReduceFunction[ChannelRealHot]{ //实现频道的聚合,对频道进行统计 override def reduce(t: ChannelRealHot, t1: ChannelRealHot): ChannelRealHot = { ChannelRealHot(t.channelID,t.count+t1.count) } } 6.sink下沉 package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelRealHot import com.ityouxin.tools.HbaseUtils import org.apache.commons.lang.StringUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.flink.table.descriptors.HBase import org.apache.hadoop.hbase.TableName import scala.collection.mutable // class ChannelRealHotSink extends SinkFunction[ChannelRealHot]{ override def invoke(channelRealHot: ChannelRealHot,connect:SinkFunction.Context[_]): Unit = { //输出 //得到频道id和统计频道id的个数 val channelID:Long = channelRealHot.channelID var count: Long = channelRealHot.count val tabChannel: TableName = TableName.valueOf("channel" ) val rowKey = channelID val columnFamily = "info" val column = "count" //获取数据 val value: String = HbaseUtils.getTablec(tabChannel,rowKey.toString,columnFamily,column) //判断是否为空或 if(StringUtils.isNotEmpty(value)){ count += value.toLong } //容器 存放表的数据信息 val fieldsData = new mutable.HashMap[String,String]() fieldsData.put(column,count.toString) //写入Hbase HbaseUtils.putData(tabChannel,rowKey.toString,columnFamily,fieldsData) } }
-
业务二:处理理频道的PV、 UV
PV(访问量量):即Page View,⻚页⾯面刷新⼀一次算⼀一次。
UV(独⽴立访客):即Unique Visitor, 00:00-24:00内相同的客户端只被计算⼀一次1. task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelPVUV, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.ChannelPVUVMap import com.ityouxin.process.reduce.ChannelPVUVReduce import com.ityouxin.process.sink.ChannelPVUVSink import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.windows.TimeWindow //业务2:处理理频道的PV、 UV object ChannelPVUVTask extends DataProcess{ override def process(ds: DataStream[Message]): Unit = { //将消息流转化为组装业务bean val flatedMapDS: DataStream[ChannelPVUV] = ds.flatMap(new ChannelPVUVMap) //根据各个维度的key进行分流 (频道和 小时 天 月 组合成复合主键进行分流) val keyedS: KeyedStream[ChannelPVUV, String] = flatedMapDS.keyBy(item => item.channelID + item.dateField) //时间窗口 val window: WindowedStream[ChannelPVUV, String, TimeWindow] = keyedS.timeWindow(Time.seconds(GlobalConfigUtils.timeWindow)) //按频道和各时间维度聚合 val reslut: DataStream[ChannelPVUV] = window.reduce(new ChannelPVUVReduce) //输出 reslut.addSink(new ChannelPVUVSink) } } 2. bean package com.ityouxin.process.bean case class ChannelPVUV ( channelID:Long, userID:Long, timestamp:Long, pv:Long, uv:Long, dateField:String) package com.ityouxin.process.bean case class ChannelRegion ( channelID:Long, country:String, province:String, city:String, pv:Long=0L, uv:Long=0, newCount:Long=0, oldCount:Long=0, timestamp:Long, userId:Long, dateField:String ) { def getArea(): String = { country + "_" + province + "_" + city + "_" } } 3. map package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelRegion, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class ChannelRegionMap extends FlatMapFunction[Message,ChannelRegion]{ override def flatMap(value: Message, out: Collector[ChannelRegion]): Unit = {] //字段 val timestamp = value.userBrowse.entryTime val hour = TimeUtils.getDate(timestamp,"yyyyMMddHH") val day = TimeUtils.getDate(timestamp,"yyyyMMdd") val month = TimeUtils.getDate(timestamp,"yyyyMM") val userId = value.userBrowse.userId val channelId =value.userBrowse.channelId // val country = value.userBrowse.country val province =value.userBrowse.province val city = value.userBrowse.city //取出用户的状态信息 val state: UserState = UserState.getUserState(userId.toString,timestamp) val isNew: Boolean = state.isNew val isFirstHour: Boolean = state.isFirstHour val isFirstDay: Boolean = state.isFirstDay val isFirstMonth: Boolean = state.isFirstMonth //设置pv和不同维度的UV var pv:Long=1L var hourUV:Long=0L var dayUV:Long=0L var monthUV:Long=0L //设置 和判断 //映射成 新老用户 var newCount =0L //小时的 oldCount var hourOldCount=0L //天的 oldCount var dayOldCount=0L //月的 oldCount var monthOldCount=0L if(isNew){ newCount=1L }else{ if(isFirstHour){ hourOldCount=1L hourUV=1L } if(isFirstDay){ dayOldCount=1L dayUV=1L } if(isFirstMonth){ monthOldCount=1L monthUV=1L } } //返回三个时间维度的数据 out.collect(ChannelRegion(channelId,country,province,city,pv,hourUV,newCount,hourOldCount,timestamp,userId,hour)) out.collect(ChannelRegion(channelId,country,province,city,pv,dayUV,newCount,dayOldCount,timestamp,userId,day)) out.collect(ChannelRegion(channelId,country,province,city,pv,monthUV,newCount,monthOldCount,timestamp,userId,month)) } } 3. reduce package com.ityouxin.process.reduce import com.ityouxin.process.bean.ChannelPVUV import org.apache.flink.api.common.functions.ReduceFunction class ChannelPVUVReduce extends ReduceFunction[ChannelPVUV]{ override def reduce(value1: ChannelPVUV, value2: ChannelPVUV): ChannelPVUV = { // ChannelPVUV( value1.channelID, // 频道 value1.userID, value1.timestamp, value1.pv+value2.pv,// PV 聚合相加 value1.uv+value2.uv,// UV 聚合相加 value1.dateField) // 小时| 天| 月 维度 } } 4.sink package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelPVUV import com.ityouxin.tools.HbaseUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.hadoop.hbase.TableName class ChannelPVUVSink extends SinkFunction[ChannelPVUV]{ override def invoke(value: ChannelPVUV, context: SinkFunction.Context[_]): Unit = { //表名 val tableName: TableName = TableName.valueOf("channelPVUV") //列族 val columnFamily = "info" //列 val rowKey = HbaseUtils.lpad(value.channelID.toString,3)+ ":" + value.dateField //PV UV val pvData: String = HbaseUtils.getTablec(tableName, rowKey, columnFamily, "PV") val uvData: String = HbaseUtils.getTablec(tableName, rowKey, columnFamily, "UV") } }
-
业务3:处理理频道的新鲜度
处理数据返回新鲜实体类
1. task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelUserFreshness, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.ChannelUserFreshnessMap import com.ityouxin.process.reduce.ChannelUserFreshnessReduce import com.ityouxin.process.sink.ChannelUserFreshnessSink import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow //业务三:处理理数据返回新鲜度实体类 object ChannelUserFreshnessTask extends DataProcess{ override def process(ds: DataStream[Message]): Unit = { //需要将Message转化为业务bean val flarMapedDS: DataStream[ChannelUserFreshness] = ds.flatMap(new ChannelUserFreshnessMap) //数据的分流key(频道的小时 天 月 的维度分流) val keyedS: KeyedStream[ChannelUserFreshness, String] = flarMapedDS.keyBy(item=>item.channelID) //时间窗口划分 val window: WindowedStream[ChannelUserFreshness, String, TimeWindow] = keyedS.timeWindow(Time.seconds(GlobalConfigUtils.timeWindow)) //窗口内数据聚合 val resoult: DataStream[ChannelUserFreshness] = window.reduce(new ChannelUserFreshnessReduce) //写入sink resoult.addSink(new ChannelUserFreshnessSink) } } 2. bean package com.ityouxin.process.bean case class ChannelUserFreshness ( channelID:String, timestamp:Long, userId:Long, newCount:Long=0, oldCount:Long=0, dateField:String ) 3.map package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelUserFreshness, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class ChannelUserFreshnessMap extends FlatMapFunction[Message,ChannelUserFreshness]{ override def flatMap(value: Message, out: Collector[ChannelUserFreshness]): Unit = { //获取字段信息 val timestamp: Long = value.userBrowse.entryTime val hour= TimeUtils.getDate(timestamp,"yyyyMMddHH") val day = TimeUtils.getDate(timestamp,"yyyyMMdd") val month = TimeUtils.getDate(timestamp,"yyyyMM") val userId = value.userBrowse.userId val channelID = value.userBrowse.channelId //分析用户的状态 val state: UserState = UserState.getUserState(userId.toString, timestamp) val isNew = state.isNew val isFirstHour = state.isFirstHour val isFirstDay = state.isFirstDay val isFirstMonth = state.isFirstMonth //映射为新用户 var newCount = 0L //小时的oldCount var hourOldCount =0L //天的oldCount var dayOldCount = 0L //月的 var monthOldCount = 0L //判断是否是新用户 if (isNew){ newCount=1L }else{ if (isFirstHour){ hourOldCount=1L } if (isFirstDay){ dayOldCount=1L } if (isFirstMonth){ monthOldCount=1L } } //注意返回三个维度的ChannelUserFreshness out.collect(ChannelUserFreshness(channelID.toString ,timestamp ,userId,newCount,hourOldCount,hour)) out.collect(ChannelUserFreshness(channelID.toString ,timestamp,userId,newCount,dayOldCount,day)) out.collect(ChannelUserFreshness(channelID.toString ,timestamp,userId,newCount,monthOldCount,month)) } } 4. reduce package com.ityouxin.process.reduce import com.ityouxin.process.bean.{ChannelUserFreshness, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.ReduceFunction class ChannelUserFreshnessReduce extends ReduceFunction[ChannelUserFreshness]{ override def reduce(value1: ChannelUserFreshness, value2: ChannelUserFreshness): ChannelUserFreshness = { ChannelUserFreshness( value1.channelID, value1.timestamp, value1.userId, value1.newCount+value2.newCount, value1.oldCount+value2.oldCount, value1.dateField ) } } 5. sink package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelUserFreshness import com.ityouxin.tools.HbaseUtils import org.apache.commons.lang.StringUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.hadoop.hbase.TableName import scala.collection.mutable class ChannelUserFreshnessSink extends SinkFunction[ChannelUserFreshness]{ override def invoke(value: ChannelUserFreshness, context: SinkFunction.Context[_]): Unit = { //获取表信息 val tableName = TableName.valueOf("channelPVUV") val columnFamily="info" val newCount:String="newCount" val oldCount:String="oldCount" val rowKey= HbaseUtils.lpad(value.channelID,3) +":"+value.dateField //查询历史数据 val newCountData: String = HbaseUtils.getTablec(tableName,rowKey,columnFamily,newCount) val oldCountData:String = HbaseUtils.getTablec(tableName,rowKey,columnFamily,oldCount) //取出新的数据 var newCountV = value.newCount var oldCountV = value.oldCount //判断新数据是否为空 if(StringUtils.isNotBlank(newCountData)){ //+ newCountV +=newCountData.toLong } if(StringUtils.isNotBlank(oldCountData)){ oldCountV += oldCountData.toLong } //写入Hbase val map = new mutable.HashMap[String,String]() map.put(newCount,newCountV.toString) map.put(oldCount,oldCountV.toString) HbaseUtils.putData(tableName,rowKey,columnFamily,map) } }
-
业务4:处理理频道的地域
1. task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelRegion, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.ChannelRegionMap import com.ityouxin.process.reduce.ChannelRegionReduce import com.ityouxin.process.sink.ChannelRegionSink import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow //业务4:处理理频道的地域 object ChannelRegionTask extends DataProcess{ override def process(ds: DataStream[Message]): Unit = { //转化Message val flatedDS: DataStream[ChannelRegion] = ds.flatMap(new ChannelRegionMap) //分流key 格式(分流的key:channelID,dateField,area(country,province,city)) val keyedStream: KeyedStream[ChannelRegion, String] = flatedDS.keyBy( item => item.channelID + item.dateField + item.country + item.province + item.city ) //划分时间窗口 val window: WindowedStream[ChannelRegion, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(GlobalConfigUtils.timeWindow)) //窗口聚合 val reduceDS: DataStream[ChannelRegion] = window.reduce(new ChannelRegionReduce) //写入hbase reduceDS.addSink(new ChannelRegionSink) } } 2. bean package com.ityouxin.process.bean case class ChannelRegion ( channelID:Long, country:String, province:String, city:String, pv:Long=0L, uv:Long=0, newCount:Long=0, oldCount:Long=0, timestamp:Long, userId:Long, dateField:String ) { def getArea(): String = { country + "_" + province + "_" + city + "_" } } 3. map package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelRegion, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class ChannelRegionMap extends FlatMapFunction[Message,ChannelRegion]{ override def flatMap(value: Message, out: Collector[ChannelRegion]): Unit = {] //字段 val timestamp = value.userBrowse.entryTime val hour = TimeUtils.getDate(timestamp,"yyyyMMddHH") val day = TimeUtils.getDate(timestamp,"yyyyMMdd") val month = TimeUtils.getDate(timestamp,"yyyyMM") val userId = value.userBrowse.userId val channelId =value.userBrowse.channelId // val country = value.userBrowse.country val province =value.userBrowse.province val city = value.userBrowse.city //取出用户的状态信息 val state: UserState = UserState.getUserState(userId.toString,timestamp) val isNew: Boolean = state.isNew val isFirstHour: Boolean = state.isFirstHour val isFirstDay: Boolean = state.isFirstDay val isFirstMonth: Boolean = state.isFirstMonth //设置pv和不同维度的UV var pv:Long=1L var hourUV:Long=0L var dayUV:Long=0L var monthUV:Long=0L //设置 和判断 //映射成 新老用户 var newCount =0L //小时的 oldCount var hourOldCount=0L //天的 oldCount var dayOldCount=0L //月的 oldCount var monthOldCount=0L if(isNew){ newCount=1L }else{ if(isFirstHour){ hourOldCount=1L hourUV=1L } if(isFirstDay){ dayOldCount=1L dayUV=1L } if(isFirstMonth){ monthOldCount=1L monthUV=1L } } //返回三个时间维度的数据 out.collect(ChannelRegion(channelId,country,province,city,pv,hourUV,newCount,hourOldCount,timestamp,userId,hour)) out.collect(ChannelRegion(channelId,country,province,city,pv,dayUV,newCount,dayOldCount,timestamp,userId,day)) out.collect(ChannelRegion(channelId,country,province,city,pv,monthUV,newCount,monthOldCount,timestamp,userId,month)) } } 4. reduce package com.ityouxin.process.reduce import com.ityouxin.process.bean.ChannelRegion import org.apache.flink.api.common.functions.ReduceFunction class ChannelRegionReduce extends ReduceFunction[ChannelRegion]{ override def reduce(value1: ChannelRegion, value2: ChannelRegion): ChannelRegion = { ChannelRegion( value1.channelID, value1.country, value1.province, value1.city, value1.pv+value2.pv, value1.uv+value2.uv, value1.newCount+value2.newCount, value1.oldCount+value2.oldCount, value1.timestamp, value1.userId, value1.dateField ) } } 5.sink package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelRegion import com.ityouxin.tools.HbaseUtils import org.apache.commons.lang.StringUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.hadoop.hbase.TableName import scala.collection.mutable class ChannelRegionSink extends SinkFunction[ChannelRegion]{ override def invoke(value: ChannelRegion, context: SinkFunction.Context[_]): Unit = { //Table val tableName = TableName.valueOf("channelPVUV") val columnFamily="info" val regionPV= value.getArea()+"PV" val regionUV = value.getArea()+ "UV" val regionNewCount =value.getArea()+ "newCount" val regionOldCount = value.getArea()+ "oldCount" val rowKey = HbaseUtils.lpad(value.channelID.toString,3)+":"+value.dateField var regionPVV = value.pv var regionUVV = value.uv var regionNewCountV= value.newCount var regionOldCountV = value.oldCount //取历史数据 val pvData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,regionPV) val uvData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,regionUV) val newCountData =HbaseUtils.getTablec(tableName,rowKey,columnFamily,regionNewCount) val oldCountData=HbaseUtils.getTablec(tableName,rowKey,columnFamily,regionOldCount) if(StringUtils.isNotBlank(pvData)){ regionPVV += pvData.toLong } if(StringUtils.isNotBlank(uvData)){ regionUVV+=uvData.toLong } if(StringUtils.isNotBlank(newCountData)){ regionNewCountV+=newCountData.toLong } if(StringUtils.isNotBlank(oldCountData)){ regionOldCountV+=oldCountData.toLong } //需要写入的字段 val map=new mutable.HashMap[String,String]() map.put(regionPV,regionPVV.toString) map.put(regionUV,regionUVV.toString) map.put(regionNewCount,regionNewCountV.toString) map.put(regionOldCount,regionOldCountV.toString) HbaseUtils.putData(tableName,rowKey,columnFamily,map) } }
-
业务5:处理理运营商平台指标
1.task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelNetWork, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.ChannelNetWorkMap import com.ityouxin.process.reduce.ChannelNetWorkReduce import com.ityouxin.process.sink.ChannelNetWorkSink import com.ityouxin.tools.GlobalConfigUtils import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.scala._ import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.windowing.windows.TimeWindow //业务5:处理理运营商平台指标 object ChannelNetWorkTask extends DataProcess{ override def process(ds: DataStream[Message]): Unit = { //转化 val flatedDS: DataStream[ChannelNetWork] = ds.flatMap(new ChannelNetWorkMap) //分流 val keyedS: KeyedStream[ChannelNetWork, String] = flatedDS.keyBy(item => item.dateField + item.network) //窗口 val window: WindowedStream[ChannelNetWork, String, TimeWindow] = keyedS.timeWindow(Time.seconds(GlobalConfigUtils.timeWindow)) //结果 val reduceedDS: DataStream[ChannelNetWork] = window.reduce(new ChannelNetWorkReduce) //下沉 reduceedDS.addSink(new ChannelNetWorkSink) } } 2. bean package com.ityouxin.process.bean case class ChannelNetWork( network:String, count:Long, newCount:Long, oldCount:Long, timestamp:Long, dateField:String ) 3. map package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelNetWork, ChannelNetWork, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class ChannelNetWorkMap extends FlatMapFunction[Message,ChannelNetWork]{ override def flatMap(value: Message, out: Collector[ChannelNetWork]): Unit = { //获取时间维度和用户state val timestamp = value.userBrowse.entryTime val hour = TimeUtils.getDate(timestamp,"yyyyMMddHH") val day = TimeUtils.getDate(timestamp,"yyyyMMdd") val month = TimeUtils.getDate(timestamp,"yyyyMM") val userID = value.userBrowse.userId val netWork= value.userBrowse.network val state: UserState = UserState.getUserState(userID.toString, timestamp) val isNew = state.isNew val isFirstHour = state.isFirstHour val isFirstDay = state.isFirstDay val isFirstMonth =state.isFirstMonth val count:Long = 1L var newCount =0L var hourOldCount =0L var dayOldCount =0L var monthOldCount =0L if(isNew){ newCount =1L }else{ if(isFirstHour){ hourOldCount=1L } if(isFirstDay){ dayOldCount=1L } if(isFirstMonth){ monthOldCount=1L } } //映射成三个维度的业务数据 out.collect( ChannelNetWork(netWork,count,newCount,hourOldCount,timestamp,hour)) out.collect( ChannelNetWork(netWork,count,newCount,dayOldCount,timestamp,day)) out.collect( ChannelNetWork(netWork,count,newCount,monthOldCount,timestamp,month)) } } 4. reduce package com.ityouxin.process.reduce import com.ityouxin.process.bean.ChannelNetWork import org.apache.flink.api.common.functions.ReduceFunction class ChannelNetWorkReduce extends ReduceFunction[ChannelNetWork]{ override def reduce(value1: ChannelNetWork, value2: ChannelNetWork): ChannelNetWork = { ChannelNetWork( value1.network, value1.count+value2.count, value1.newCount+value2.newCount, value1.oldCount+value2.oldCount, value1.timestamp , value1.dateField ) } } 5. sink package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelNetWork import com.ityouxin.tools.HbaseUtils import org.apache.commons.lang.StringUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.hadoop.hbase.TableName import scala.collection.mutable class ChannelNetWorkSink extends SinkFunction[ChannelNetWork]{ override def invoke(value: ChannelNetWork, context: SinkFunction.Context[_]): Unit = { val network = value.network val timestamp = value.timestamp val tableName = TableName.valueOf("device") val columnFamily="info" val nwCount = network+"_Count" val nwNewCount = network+"_NewCount" val nwOldCount = network+"_OldCount" val rowKey = value.dateField //获取历史数据 val countData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwCount) val newCountData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwNewCount) val oldCountData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwOldCount) var count = value.count var newCount = value.newCount var oldCount = value.oldCount if(StringUtils.isNotBlank(countData)){ count+=countData.toLong } if(StringUtils.isNotBlank(newCountData)){ newCount+=newCountData.toLong } if(StringUtils.isNotBlank(oldCountData)){ oldCount+=oldCountData.toLong } val map = new mutable.HashMap[String,String]() map.put(nwCount,count.toString) map.put(nwNewCount,newCount.toString) map.put(nwOldCount,oldCount.toString) HbaseUtils.putData(tableName,rowKey,columnFamily,map) } }
-
业务6:浏览器器类型指标处理理
1. task package com.ityouxin.process.task import com.ityouxin.process.bean.{ChannelBrowser, Message} import com.ityouxin.process.inerface.DataProcess import com.ityouxin.process.map.ChannelBrowserMap import com.ityouxin.process.reduce.ChannelBrowserReduce import com.ityouxin.process.sink.ChannelBrowserSink import org.apache.flink.streaming.api.scala.DataStream import org.apache.flink.streaming.api.scala._ //业务6:浏览器器类型指标处理理 object ChannelBrowserTask extends DataProcess{ override def process(ds: DataStream[Message]): Unit = { val flatedDS: DataStream[ChannelBrowser] = ds.flatMap(new ChannelBrowserMap) val keyedS: KeyedStream[ChannelBrowser, String] = flatedDS.keyBy(item => item.browser) val resoult: DataStream[ChannelBrowser] = keyedS.reduce(new ChannelBrowserReduce) resoult.addSink(new ChannelBrowserSink) } } 2. bean package com.ityouxin.process.bean case class ChannelBrowser ( browser:String, count:Long, newCount:Long, oldCount:Long, timestamp:Long, dateField:String ) package com.ityouxin.process.bean import com.alibaba.fastjson.{JSON, JSONObject} case class UserBrowse ( channelId:Long, categoryId:Long, productId:Long, country:String, province:String, city:String, network:String, sources:String, browserType:String, entryTime:Long, leaveTime:Long, userId:Long ) object UserBrowse{ //转换json字符串 def toBean(message:String)={ val jSONObject: JSONObject = JSON.parseObject(message) val channelId: Long = jSONObject.get("channelId").toString.toLong val categoryId: Long = jSONObject.get("categoryId").toString.toLong val productId: Long = jSONObject.get("productId").toString.toLong val city: String = jSONObject.get("city").toString val country: String = jSONObject.get("country").toString val province: String = jSONObject.get("province").toString val network: String = jSONObject.get("network").toString val sources: String = jSONObject.get("sources").toString val browserType: String = jSONObject.get("browserType").toString val entryTime: Long = jSONObject.get("entryTime").toString.toLong val leaveTime: Long = jSONObject.get("leaveTime").toString.toLong val userId: Long = jSONObject.get("userId").toString.toLong UserBrowse(channelId,categoryId,productId,country,province,city,network,sources,browserType,entryTime,leaveTime,userId) } } 3.map package com.ityouxin.process.map import com.ityouxin.process.bean.{ChannelBrowser, Message, UserState} import com.ityouxin.tools.TimeUtils import org.apache.flink.api.common.functions.FlatMapFunction import org.apache.flink.util.Collector class ChannelBrowserMap extends FlatMapFunction[Message,ChannelBrowser]{ override def flatMap(value: Message, out: Collector[ChannelBrowser]): Unit = { val timestamp = value.userBrowse.entryTime val hour = TimeUtils.getDate(timestamp,"yyyyMMddHH") val day = TimeUtils.getDate(timestamp,"yyyyMMdd") val month = TimeUtils.getDate(timestamp,"yyyyMM") val userID = value.userBrowse.userId val browser= value.userBrowse.browserType val state: UserState = UserState.getUserState(userID.toString, timestamp) val isNew = state.isNew val isFirstHour = state.isFirstHour val isFirstDay = state.isFirstDay val isFirstMonth =state.isFirstMonth val count:Long = 1L var newCount =0L var hourOldCount =0L var dayOldCount =0L var monthOldCount =0L if(isNew){ newCount =1L }else{ if(isFirstHour){ hourOldCount=1L } if(isFirstDay){ dayOldCount=1L } if(isFirstMonth){ monthOldCount=1L } } out.collect(ChannelBrowser(browser,count,newCount,hourOldCount,timestamp,hour)) out.collect(ChannelBrowser(browser,count,newCount,dayOldCount,timestamp,day)) out.collect(ChannelBrowser(browser,count,newCount,monthOldCount,timestamp,month)) } } 4.reduce package com.ityouxin.process.reduce import com.ityouxin.process.bean.ChannelBrowser import org.apache.flink.api.common.functions.ReduceFunction class ChannelBrowserReduce extends ReduceFunction[ChannelBrowser]{ override def reduce(value1: ChannelBrowser, value2: ChannelBrowser): ChannelBrowser = { ChannelBrowser(value1.browser, value1.count+value2.count, value1.newCount+value2.newCount, value1.oldCount+value2.oldCount, value1.timestamp, value1.dateField ) } } 5.sink package com.ityouxin.process.sink import com.ityouxin.process.bean.ChannelBrowser import com.ityouxin.tools.HbaseUtils import org.apache.commons.lang.StringUtils import org.apache.flink.streaming.api.functions.sink.SinkFunction import org.apache.hadoop.hbase.TableName import scala.collection.mutable class ChannelBrowserSink extends SinkFunction[ChannelBrowser] { override def invoke(value: ChannelBrowser, context: SinkFunction.Context[_]): Unit = { val browser= value.browser val tableName = TableName.valueOf("device") val columnFamily="info" val nwCount = browser+"_Count" val nwNewCount = browser+"_NewCount" val nwOldCount = browser+"_OldCount" val rowKey = value.dateField //获取历史数据 val countData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwCount) val newCountData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwNewCount) val oldCountData = HbaseUtils.getTablec(tableName,rowKey,columnFamily,nwOldCount) var count = value.count var newCount = value.newCount var oldCount = value.oldCount if(StringUtils.isNotBlank(countData)){ count+=countData.toLong } if(StringUtils.isNotBlank(newCountData)){ newCount+=newCountData.toLong } if(StringUtils.isNotBlank(oldCountData)){ oldCount+=oldCountData.toLong } val map = new mutable.HashMap[String,String]() map.put(nwCount,count.toString) map.put(nwNewCount,newCount.toString) map.put(nwOldCount,oldCount.toString) HbaseUtils.putData(tableName,rowKey,columnFamily,map) } }
-
工具类
HbaseUtils类
package com.ityouxin.tools import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.client.{Admin, Connection, ConnectionFactory, Get, Put, Result, Table} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object HbaseUtils { //创建Hbase的配置conf private val conf:Configuration = HBaseConfiguration.create() conf.set("hbase.zookeeper.quorum",GlobalConfigUtils.zookeeperQuorum) conf.set("hbase.master",GlobalConfigUtils.hbaseMaster) conf.set("hbase.zookeeper.property.clientPort",GlobalConfigUtils.clientPort) conf.set("hbase.rpc.timeout",GlobalConfigUtils.rpcTimeout) conf.set("hbase.client.operator.timeout",GlobalConfigUtils.operatorTimeout) conf.set("hbase.client.scanner.timeout.period",GlobalConfigUtils.timeoutPeriod) //得到hbase的连接 private val connection: Connection = ConnectionFactory.createConnection(conf) //得到admin private val admin:Admin = connection.getAdmin //创建或者获取表 def createTable(tableName:TableName,columnFamily:String)={ //获取表名 val tableDescriptor = new HTableDescriptor(tableName) //获取字段列族 val columnDescriptor = new HColumnDescriptor(columnFamily) tableDescriptor.addFamily(columnDescriptor) //判断表是否存在 if (!admin.tableExists(tableName)){ //如果表不存在则创建 admin.createTable(tableDescriptor) } connection.getTable(tableName) } //获取表的数据 def getTablec(tableName: TableName,rowKey:String,columnFamily:String,column:String)={ var str:String="" val table= createTable(tableName,columnFamily) try{ //rowKey转换为字节Bytes val get:Get = new Get(Bytes.toBytes(rowKey)) //得到表的结果集 val result: Result = table.get(get) //将列族和列添加到结果集中 val bytes: Array[Byte] = result.getValue(Bytes.toBytes(columnFamily), Bytes.toBytes(column)) //判断是否为空 if(bytes!=null && bytes.size>0){ str = Bytes.toString(bytes) } }catch { case e:Exception =>e.printStackTrace() }finally { table.close() } str } //写数据到hbase def putData(tableName:TableName,rowKey:String,columnFamily:String,fieldsData:mutable.HashMap[String,String]): Unit ={ //创建表 val table: Table = createTable(tableName,columnFamily) try{ val put = new Put(Bytes.toBytes(rowKey)) if(fieldsData!=null && fieldsData.nonEmpty){ for( (k,v) <- fieldsData ){ put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(v)) } table.put(put) } }catch { case e:Exception => e.printStackTrace() }finally { table.close() } } def lpad(source:String,len:Int):String={ var str = source if(str!=null){ val strBuilder= new mutable.StringBuilder(str) while (str.length<len){ str = strBuilder.insert(0,"0").toString() } } str } }
TimeUtils类
package com.ityouxin.tools import java.util.Date import org.apache.commons.lang.time.FastDateFormat object TimeUtils { def getDate(timestamp:Long,format:String):String={ val time = new Date(timestamp) val dateFormat: FastDateFormat = FastDateFormat.getInstance(format) dateFormat.format(time) } }
GlobalConfigUtils类
package com.ityouxin.tools import com.typesafe.config.ConfigFactory object GlobalConfigUtils { //使用ConfigFactory加载资源文件 private val conf = ConfigFactory.load() //获取资源文件配置 def bootstrapServers:String = conf.getString("bootstrap.servers") def zookeeperConnect:String = conf.getString("zookeeper.connect") def inputTopic:String = conf.getString("input.topic") def groupId:String = conf.getString("group.id") def autoCommit:String = conf.getString("enable.auto.commit") def autoCommitInterval:String = conf.getString("auto.commit.interval.ms") def autoOffsetReset:String = conf.getString("auto.offset.reset") def zookeeperQuorum:String = conf.getString("hbase.zookeeper.quorum") def hbaseMaster:String = conf.getString("hbase.master") def clientPort:String = conf.getString("hbase.zookeeper.property.clientPort") def rpcTimeout:String = conf.getString("hbase.rpc.timeout") def operatorTimeout:String = conf.getString("hbase.client.operator.timeout") def timeoutPeriod:String = conf.getString("hbase.client.scanner.timeout.period") def maxDelayTime:Long = conf.getString("maxDelayTime").toLong def timeWindow:Long = conf.getString("timeWindow").toLong }