大数据项目实战(8) | 从 Kafka读取数据并写入到 Phoenix

从 Kafka 读取数据

  • 1.创建util类
import kafka.serializer.StringDecoder

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils

object MyKafkaUtil {
  //从kafka读数据
  val params = Map[String,String](
    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "hadoop12:9092,hadoop13:9092,hadoop14:9092",
    ConsumerConfig.GROUP_ID_CONFIG->  "gmall1602"

  )
  def  getKafkaStream(ssc:StreamingContext,topic:String,otherTopic:String*): DStream[String] ={
    KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,
      params,
      (otherTopic :+ topic).toSet).map(_._2)
  }
}

  • 2.代码实现
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import util.MyKafkaUtil

object OrderApp {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("OrderApp")
    val ssc = new StreamingContext(conf, Seconds(3))

    val sourceStream = MyKafkaUtil.getKafkaStream(ssc,Constant.TOPIC_ORDER_INFO)
    sourceStream.print(1000)

    
    ssc.start()
    ssc.awaitTermination()
  }

}
  • 查看结果
    虚拟机启动Canal,idea运行CanalClient,启动模拟数据(mysql),再启动OrderApp
    在这里插入图片描述

写入数据到 Phoenix

  • 创建OrderInfo样例类
case class OrderInfo(id: String,
                     province_id: String,
                     var consignee: String,
                     order_comment: String,
                     var consignee_tel: String,
                     order_status: String,
                     payment_way: String,
                     user_id: String,
                     img_url: String,
                     total_amount: Double,
                     expire_time: String,
                     delivery_address: String,
                     create_time: String,
                     operate_time: String,
                     tracking_no: String,
                     parent_order_id: String,
                     out_trade_no: String,
                     trade_body: String,
                     var create_date: String = null,
                     var create_hour: String = null){
  //1.给创建日期和创建小时赋值
  /*"payment_way":"1","delivery_address":"MkRBikAWjeWtberwgrqD","consignee":"YVIFYf","create_time":"2020-09-13 " +
    "08:28:53","order_comment":"SEtervbXJDPpdxdaPZEW","expire_time":"","order_status":"2","out_trade_no":"8524532922",
    "tracking_no":"","total_amount":"453.0","user_id":"7","img_url":"","province_id":"7","consignee_tel":"13559244734",
    "trade_body":"","id":"42","parent_order_id":"","operate_time":"2020-09-13 09:27:52"
   */
  create_date = create_time.substring(0,10)
  create_hour = create_time.substring(11,13)

  //2.给下单的用户名和用户电话号码,做脱敏处理
  consignee = consignee.substring(0,1) + "**"
  //13619008326   =>    136****8326
  //consignee_tel = consignee_tel.substring(0,3)  + "****" +consignee_tel.substring(7,11)
  consignee_tel = consignee_tel.replaceAll("(\\d{3})\\d{4}(\\d{4})","$1****$2")
}

  • 测试一下是否成功
    启动Canal,启动模拟数据(mysql),再启动OrderApp
import bean.OrderInfo
import com.alibaba.fastjson.JSON
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import util.MyKafkaUtil



object OrderApp {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("OrderApp")
    val ssc = new StreamingContext(conf, Seconds(3))
    val sourceStream = MyKafkaUtil.getKafkaStream(ssc, "topic_order_info")
    //1.把读取到json字符串进行解析,封装到样例类中
    val orderInforStream = sourceStream.map(s => JSON.parseObject(s, classOf[OrderInfo]))
    orderInforStream.print(1000)
    //2.保存到hbase(phoenix)

   
    ssc.start()
    ssc.awaitTermination()
  }

}

在这里插入图片描述

  • 在gmall-common下增添下Constant增添一条数据
public final static String GMALL_ORDER_INFO = "gmall_order_info1602";
  • 在 Phoenix 创建表(在SQuirreL SQL Client建表)
create table gmall_order_info1602 (
           id varchar primary key,
           province_id varchar,
           consignee varchar,
           order_comment varchar,
           consignee_tel varchar,
           order_status varchar,
           payment_way varchar,
           user_id varchar,
           img_url varchar,
           total_amount decimal, 
           expire_time varchar,
           delivery_address varchar,
           create_time varchar,
           operate_time varchar,
           tracking_no varchar,
           parent_order_id varchar,
           out_trade_no varchar,
           trade_body varchar,
           create_date varchar,
           create_hour varchar)
  • 保存到phoenix中
import bean.OrderInfo
import com.alibaba.fastjson.JSON
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import util.MyKafkaUtil
import org.apache.phoenix.spark._



object OrderApp {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[*]").setAppName("OrderApp")
    val ssc = new StreamingContext(conf, Seconds(3))
    val sourceStream = MyKafkaUtil.getKafkaStream(ssc, "topic_order_info")
    //1.把读取到json字符串进行解析,封装到样例类中
    val orderInforStream = sourceStream.map(s => JSON.parseObject(s, classOf[OrderInfo]))
    //2.保存到hbase(phoenix)
    orderInforStream.foreachRDD(rdd=>{
      rdd.saveToPhoenix(Constant.GMALL_ORDER_INFO,
        Seq("ID", "PROVINCE_ID", "CONSIGNEE", "ORDER_COMMENT", "CONSIGNEE_TEL", "ORDER_STATUS", "PAYMENT_WAY",
          "USER_ID", "IMG_URL", "TOTAL_AMOUNT", "EXPIRE_TIME", "DELIVERY_ADDRESS", "CREATE_TIME", "OPERATE_TIME",
          "TRACKING_NO", "PARENT_ORDER_ID", "OUT_TRADE_NO", "TRADE_BODY", "CREATE_DATE", "CREATE_HOUR"),
        zkUrl = Some("hadoop12,hadoop13,hadoop14:2181"))
    })
    ssc.start()
    ssc.awaitTermination()
  }

}

在这里插入图片描述

©️2020 CSDN 皮肤主题: 终极编程指南 设计师:CSDN官方博客 返回首页