spark 读写hbase

2 篇文章 0 订阅

测试代码

package make.zhangsheniMain

import make.bean.CaseClass.user
import make.service.HbaseService
import make.tools.PropertiesTool
import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.SparkSession

/**
  * @Author: maketubu
  * @Date: 2019/11/1 10:37
  */
object hbase_test {

  def read4hbase(spark: SparkSession): Unit = {
    val conf = HbaseService.getHbaseConf()

    conf.set(TableInputFormat.INPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
    HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))

    //    conf.set("hbase.rootdir", "hdfs://master:8020/hbase")
    //    conf.set("hbase.zookeeper.quorum", "master:2181")

    //读取表的全部内容
    val resrdd =spark.sparkContext.newAPIHadoopRDD(conf,classOf[TableInputFormat]
      ,classOf[ImmutableBytesWritable]
      ,classOf[Result])
    import spark.implicits._
    val resdf = resrdd.map(infos => {
      val key = Bytes.toString(infos._2.getRow)
      val name = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")))
      val age = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")))
      user(key, name, age)
    }).toDF()

    resdf.show()
  }

  def write2hbase(spark: SparkSession): Unit ={
    val conf = HbaseService.getHbaseConf()
    conf.set(TableOutputFormat.OUTPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
//    HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))

    val job = new Job(conf)
    job.setOutputKeyClass(classOf[ImmutableBytesWritable])
    job.setOutputValueClass(classOf[Result])
    job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])

//    val jobconf = new JobConf(conf)
//
//    jobconf.setOutputFormat(classOf[TableOutputFormat])

    val indataRDD = spark.sparkContext.makeRDD(Array("8,jackson,15","9,jack,35"))

    val rdd = indataRDD.map(_.split(',')).map{arr=>{
      val put = new Put(Bytes.toBytes(arr(0)))
      put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
      put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(2)))
      (new ImmutableBytesWritable, put)
    }}

    rdd.saveAsNewAPIHadoopDataset(job.getConfiguration)
    spark.stop()
  }



  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("HbaseTestApp")
      .config("spark.network.timeout","1200s")
      .getOrCreate()

//    read4hbase(spark)
    write2hbase(spark)
  }

}

hbase服务类:

package make.service

/**
  * @Author: maketubu
  * @Date: 2019/11/1 10:34
  */

import make.tools.{PropertiesTool, StringUtils}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.{Base64, Bytes}

object HbaseService {
  def getHbaseConf():org.apache.hadoop.conf.Configuration={
    val conf = HBaseConfiguration.create()
    val hbase_zk_quorum = PropertiesTool.getproperties("hbase_zk_quorum","hbase.properties")
    val hbase_zk_port = PropertiesTool.getproperties("hbase_zk_port","hbase.properties")
    val hbase_zk_parent = PropertiesTool.getproperties("hbase_zk_parent","hbase.properties")

    println(hbase_zk_quorum, hbase_zk_port, hbase_zk_parent)

    conf.set("hbase.zookeeper.quorum",hbase_zk_quorum)
    conf.set("hbase.zookeeper.port",hbase_zk_port)
    conf.set("zookeeper.znode.parent",hbase_zk_parent)
    conf.set("mapreduce.output.fileoutputformat.outputdir", "/tmp")

    conf
  }

  def setScan(conf:org.apache.hadoop.conf.Configuration,startRowkey: String,endRowkey:String
              ,families:Array[String],columns:Array[String]):Unit={
    var scan:Scan = null
    if(StringUtils.isEmpty(startRowkey) || StringUtils.isEmpty(endRowkey)){
      scan = new Scan()
    }else{
      scan = new Scan(Bytes.toBytes(startRowkey),Bytes.toBytes(endRowkey))
    }
    if(families != null){
      for(family <- families){
        scan.addFamily(Bytes.toBytes(family))
      }
    }
    if(columns != null){
      for(column<-columns){
        val cols = column.split(":")
        scan.addColumn(Bytes.toBytes(cols(0)),Bytes.toBytes(cols(1)))
      }
    }
    val proto = ProtobufUtil.toScan(scan)
    val scan2String = Base64.encodeBytes(proto.toByteArray)
    conf.set(TableInputFormat.SCAN,scan2String)

  }
}

目前好像开源了hbase-spark,能更方便的读写数据,但是没有测试,有时间看看

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值