package kafka import java.io.IOException import DAO.{ScalaHbase, ScalaConn} import kafka.Service.sp2p_log import org.apache.kafka.clients.producer.{ProducerRecord, KafkaProducer} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import kafka.serializer.StringDecoder import org.slf4j.LoggerFactory import scala.collection.immutable.HashMap import org.apache.log4j.Level import org.apache.log4j.Logger import org.apache.hadoop.hbase.client.{Table, Connection,ConnectionFactory} import org.apache.hadoop.conf.Configuration import java.sql.Timestamp import java.util.{Properties, Date} import org.apache.hadoop.hbase.{ZooKeeperConnectionException, MasterNotRunningException, TableName} import org.apache.commons.lang3.time.DateFormatUtils import org.apache.spark.serializer.KryoRegistrator; /** * @author 谭志坚 */ @SerialVersionUID(-1) object KafkaData_Sp2p { def LOG = LoggerFactory.getLogger(getClass) case class SP2PLOG(YMD: String, MOBILE: String, CHANNEL: String, REMARK: String, STATUS: String) def SP2PLOG_parse(line: String): SP2PLOG = { // print("SP2PLOG =========== 分析") var ymd = "" //短信发送时间 var content = "" //短信内容 var mobile = "" //手机号码 var channel = "" //渠道 var remark = "" //备注 var status = "" //注册状态 if ((line.indexOf("SMSUtil").!=(-1)) && (line.indexOf("【蜂投网】").!=(-1)) && (line.indexOf("手机号注册").!=(-1)) && (line.indexOf("验证码").!=(-1))) { content = line.split(" - ")(1).trim ymd = line.split("INFO")(0).trim.substring(0, 19) mobile = content.split("发送")(0).trim remark = "手机号注册" } else if ((line.indexOf("SMSUtil").!=(-1)) && (line.indexOf("【蜂投网】").!=(-1)) && (line.indexOf("重置密码").!=(-1)) && (line.indexOf("验证码").!=(-1))) { content = line.split(" - ")(1).trim ymd = line.split("INFO")(0).trim.substring(0, 19) mobile = content.split("发送")(0).trim remark = "重置密码" } else if ((line.indexOf("SMSUtil").!=(-1)) && (line.indexOf("【蜂投网】").!=(-1)) && (line.indexOf("微信账号绑定").!=(-1)) && (line.indexOf("验证码").!=(-1))) { content = line.split(" - ")(1).trim ymd = line.split("INFO")(0).trim.substring(0, 19) mobile = content.split("发送")(0).trim remark = "微信账号绑定" } else if ((line.indexOf("LoginAndRegisterAction").!=(-1)) && (line.indexOf("用户注册信息。userName").!=(-1)) && (line.indexOf("recommended1").!=(-1))) { content = line.split("用户注册信息。userName:")(1).trim ymd = line.split("INFO")(0).trim.substring(0, 19) mobile = content.split(".recommended1:")(0) if (content.split(".recommended1:").length > 1) { channel = content.split(".recommended1:")(1).trim } status = "注册成功" } // val ymd: String = new StringBuffer(DateFormatUtils.format(new Date().getTime, "yyyyMMdd")).toString var t_reg: SP2PLOG = SP2PLOG(ymd, mobile, channel, remark, status) t_reg } def SP2PLOG_save(line: SP2PLOG, hbaseConf: Configuration, hbaseconn: Connection) = { var ts: Timestamp = new Timestamp(new Date().getTime) try { if (line.YMD.ne("") && line.MOBILE.ne("")) { ts = ScalaConn.GetStampByTime(line.YMD) val familyColumn: Array[String] = Array[String]("USERS") ScalaHbase.createTable(hbaseConf,hbaseconn, "RT_SMS", familyColumn) val RT_SMS: Table = hbaseconn.getTable(TableName.valueOf("RT_SMS")) try { val Rkey: String = new StringBuffer(DateFormatUtils.format(ts, "yyyyMMddHHmmss")).toString // val id:String = String.format("%010d",Integer.valueOf(user_id)) val rowKey: String = Rkey + line.MOBILE ScalaHbase.addRow(RT_SMS, rowKey, "USERS", "YMD", line.YMD) ScalaHbase.addRow(RT_SMS, rowKey, "USERS", "MOBILE", line.MOBILE) ScalaHbase.addRow(RT_SMS, rowKey, "USERS", "REMARK", line.REMARK) } catch { case e: Exception => { e.printStackTrace() println("RT_SMS Exception:" + e.toString) } } } } catch { case e: Exception => { // LOG.error("Exception"+e.printStackTrace()) println("RT_SMS_REG Exception:" + e.toString) e.printStackTrace() } } } def RT_REG_save(line: SP2PLOG, hbaseConf: Configuration, hbaseconn: Connection) = { val familyColumn: Array[String] = Array[String]("USERS") ScalaHbase.createTable(hbaseConf,hbaseconn, "RT_REG", familyColumn) val RT_REG: Table = hbaseconn.getTable(TableName.valueOf("RT_REG")) var ts: Timestamp = new Timestamp(new Date().getTime) try { ts = ScalaConn.GetStampByTime(line.YMD) val Rkey: String = new StringBuffer(DateFormatUtils.format(ts, "yyyyMMdd")).toString // val id:String = String.format("%010d",Integer.valueOf(user_id)) // println("line = " + line.YMD + " " + line.MOBILE + " " + line.CHANNEL + " " + line.REMARK +" Rkey="+Rkey) val rowKey: String = Rkey + line.MOBILE ScalaHbase.addRow(RT_REG, rowKey, "USERS", "YMD", Rkey) ScalaHbase.addRow(RT_REG, rowKey, "USERS", "MOBILE", line.MOBILE) if (line.CHANNEL.ne("")) { ScalaHbase.addRow(RT_REG, rowKey, "USERS", "CHANNEL", line.CHANNEL) } if (line.REMARK.ne("")) { ScalaHbase.addRow(RT_REG, rowKey, "USERS", "REMARK", line.REMARK) } if (line.STATUS.ne("")) { ScalaHbase.addRow(RT_REG, rowKey, "USERS", "STATUS", line.STATUS) } } catch { case e: Exception => { e.printStackTrace() println("RT_REG Exception1:" + e.toString) } } } def main(args: Array[String]): Unit = { // Logger.getLogger("org.apache.spark").setLevel(Level.WARN); // Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.ERROR); val conf = new SparkConf().setAppName("kafkaData").setMaster("local[*]") // val sc: SparkContext = SparkUtil.createSparkContext(this.getClass.getSimpleName) val sc = new SparkContext(conf) val ssc = new StreamingContext(sc, Seconds(600)) // ssc.checkpoint("D:\\BigData\\spark-warehouse") var myConf: Configuration = ScalaConn.getHbaseConf var hbaseconn: Connection = ConnectionFactory.createConnection(myConf) val spark= ScalaConn.spark val topics = Set("sp2plog") val brokers = "192.168.100.110:9092,192.168.100.111:9092,192.168.100.112:9092" val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers, "serializer.class" -> "kafka.serializer.StringEncoder") // Create a direct stream val kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics) val lines = kafkaStream.map(x => x._2) try { // 将一行一行数据映射成对象,RT_SMS 获取申请手机号码注册,重置密码,绑定汇付 lines.filter(x => (!x.isEmpty && x.contains("SMSUtil") && x.contains("【蜂投网】") && x.contains("验证码")) || ( !x.isEmpty && x.contains("LoginAndRegisterAction") && x.contains("用户注册信息。userName") && x.contains("recommended1") )).map(x => { SP2PLOG_parse(x) }).foreachRDD { rdd => // val spark = ScalaConn.spark import spark.implicits._ val dataFrame = rdd.toDF() dataFrame.createOrReplaceTempView("RT_REG") val RT_REG_sql = spark.sql("select YMD, MOBILE, CHANNEL,REMARK,STATUS from RT_REG") if (RT_REG_sql.collect().length > 0) { try { RT_REG_sql.collect().foreach { userRow => { val YMD: String = String.valueOf(userRow.getString(0)) val MOBILE: String = String.valueOf(userRow.getString(1)) val CHANNEL: String = String.valueOf(userRow.getString(2)) val REMARK: String = String.valueOf(userRow.getString(3)) val STATUS: String = String.valueOf(userRow.getString(4)) // println("MOBILE ==" + MOBILE + "===========REMARK=" + REMARK + " STATUS=" + STATUS) //定义一个kafka消息发送机制,用来发送数据到前端进行实时展现 val props = new Properties() props.put("metadata.broker.list", brokers) props.put("serializer.class", "org.apache.kafka.common.serialization.ByteArraySerializer") props.put("bootstrap.servers", brokers) props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") val producer: KafkaProducer[String, String] = new KafkaProducer[String, String](props) if (REMARK.contains("手机号注册") || STATUS.contains("注册成功")) { RT_REG_save(SP2PLOG(YMD, MOBILE, CHANNEL, REMARK, STATUS), myConf, hbaseconn) } if (!YMD.isEmpty && !REMARK.isEmpty) { SP2PLOG_save(SP2PLOG(YMD, MOBILE, CHANNEL, REMARK, STATUS), myConf, hbaseconn) } val RT_REG_ZX:String = YMD + " , " + MOBILE + " , " + CHANNEL + " , " + REMARK + " , " + STATUS+" ," println("RT_REG_ZX = "+RT_REG_ZX) producer.send(new ProducerRecord("SP2PLOG",RT_REG_ZX)); producer.close() } } } catch { case e: Exception => { println("完成数据保存到Hbase后:"+e.toString) e.printStackTrace } } } } } catch { case e: Exception => { println("完成数据分析和运行后:"+e.toString) e.printStackTrace } } ssc.start() ssc.awaitTermination() } } 将一行一行数据映射成对象,RT_SMS 获取申请手机号码注册,重置密码,绑定汇付 // lines.filter(x => (!x.isEmpty // && x.contains("SMSUtil") // && x.contains("【蜂投网】") // && x.contains("验证码") )).map(x => { // println("开始清洗数据================") // SP2PLOG_parse(x) // }).foreachRDD((s:RDD[KafkaDataTest.SP2PLOG]) => { // // 遍历DStream中的RDD // if (!s.isEmpty()) { // // 遍历RDD中的分区记录 // s.foreachPartition { // records => { // if (!records.isEmpty) records.toSet.foreach { // r: SP2PLOG => // SP2PLOG_save(r) // } // } // } // } // })
KafkaData_Sp2p.scala
最新推荐文章于 2021-11-30 10:24:56 发布