Spark写数据到HBase

7 篇文章 0 订阅

实现的功能:

从hive表中读取订单表,计算每个用户在30天内下单的金额,然后存到HBase

package pro3

import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client.{HBaseAdmin, Put}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.util.Random
/*
将数据写入到hbase中
 */
object monthAmount {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local")
    val sc = new SparkContext(sparkConf)
    val spark2= SparkSession.builder()
      .appName("test1")
      .master("local[*]")
      //   .config("spark.sql.warehouse.dir","hdfs://mini1:9000/spark-warehouse")
      .config("spark.sql.warehouse.dir","hdfs://mini1:9000/user/hive/warehouse")
      .enableHiveSupport()
      .getOrCreate()

    spark2.sql("use pro3_dw")
    val df1: DataFrame = spark2.sql("" +
      " select userid,sum(order_amount) from dw_order2  where ctime>date_sub('2019-05-28',30) group by userid limit 4" )

    val rdd1 = df1.rdd
    val rdd2 = rdd1.map(x => {
      val uid: String = x.getString(0)
      val amount: Any = x.get(1)
      val put = new Put(Bytes.toBytes(uid))
      put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("tag"), Bytes.toBytes("B220U015_001"))
      put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("tagweight"), Bytes.toBytes(amount.toString))
      //   list.append(put)

      (new ImmutableBytesWritable, put)
    })




    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum","mini1:2181,mini2:2181,mini3:2181");
    conf.set("fs:defaultFS","hdfs://mini1:9000");

    val tableName = "userPhoto"

    val admin = new HBaseAdmin(conf)
    if(!admin.tableExists(TableName.valueOf(tableName))){
      println("不存在")
      val desc = new HTableDescriptor(TableName.valueOf(tableName))
     val hcd = new HColumnDescriptor("cf1")
      desc.addFamily(hcd)
      admin.createTable(desc)
    }

    //初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的!
    val jobConf = new JobConf(conf)
    jobConf.setOutputFormat(classOf[TableOutputFormat])
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)

    rdd2.saveAsHadoopDataset(jobConf)

    sc.stop()
  }
}

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值