spark 读写hbase

最新推荐文章于 2021-12-09 18:32:48 发布

maketubu7

最新推荐文章于 2021-12-09 18:32:48 发布

阅读量183

点赞数

分类专栏： hbase spark 文章标签： spark hbase

本文链接：https://blog.csdn.net/maketubu7/article/details/102937902

版权

spark 同时被 2 个专栏收录

23 篇文章 0 订阅

订阅专栏

hbase

2 篇文章 0 订阅

订阅专栏

测试代码

package make.zhangsheniMain

import make.bean.CaseClass.user
import make.service.HbaseService
import make.tools.PropertiesTool
import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.SparkSession

/**
  * @Author: maketubu
  * @Date: 2019/11/1 10:37
  */
object hbase_test {

  def read4hbase(spark: SparkSession): Unit = {
    val conf = HbaseService.getHbaseConf()

    conf.set(TableInputFormat.INPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
    HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))

    //    conf.set("hbase.rootdir", "hdfs://master:8020/hbase")
    //    conf.set("hbase.zookeeper.quorum", "master:2181")

    //读取表的全部内容
    val resrdd =spark.sparkContext.newAPIHadoopRDD(conf,classOf[TableInputFormat]
      ,classOf[ImmutableBytesWritable]
      ,classOf[Result])
    import spark.implicits._
    val resdf = resrdd.map(infos => {
      val key = Bytes.toString(infos._2.getRow)
      val name = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")))
      val age = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")))
      user(key, name, age)
    }).toDF()

    resdf.show()
  }

  def write2hbase(spark: SparkSession): Unit ={
    val conf = HbaseService.getHbaseConf()
    conf.set(TableOutputFormat.OUTPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
//    HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))

    val job = new Job(conf)
    job.setOutputKeyClass(classOf[ImmutableBytesWritable])
    job.setOutputValueClass(classOf[Result])
    job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])

//    val jobconf = new JobConf(conf)
//
//    jobconf.setOutputFormat(classOf[TableOutputFormat])

    val indataRDD = spark.sparkContext.makeRDD(Array("8,jackson,15","9,jack,35"))

    val rdd = indataRDD.map(_.split(',')).map{arr=>{
      val put = new Put(Bytes.toBytes(arr(0)))
      put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
      put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(2)))
      (new ImmutableBytesWritable, put)
    }}

    rdd.saveAsNewAPIHadoopDataset(job.getConfiguration)
    spark.stop()
  }



  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("HbaseTestApp")
      .config("spark.network.timeout","1200s")
      .getOrCreate()

//    read4hbase(spark)
    write2hbase(spark)
  }

}

hbase服务类：

package make.service

/**
  * @Author: maketubu
  * @Date: 2019/11/1 10:34
  */

import make.tools.{PropertiesTool, StringUtils}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.{Base64, Bytes}

object HbaseService {
  def getHbaseConf():org.apache.hadoop.conf.Configuration={
    val conf = HBaseConfiguration.create()
    val hbase_zk_quorum = PropertiesTool.getproperties("hbase_zk_quorum","hbase.properties")
    val hbase_zk_port = PropertiesTool.getproperties("hbase_zk_port","hbase.properties")
    val hbase_zk_parent = PropertiesTool.getproperties("hbase_zk_parent","hbase.properties")

    println(hbase_zk_quorum, hbase_zk_port, hbase_zk_parent)

    conf.set("hbase.zookeeper.quorum",hbase_zk_quorum)
    conf.set("hbase.zookeeper.port",hbase_zk_port)
    conf.set("zookeeper.znode.parent",hbase_zk_parent)
    conf.set("mapreduce.output.fileoutputformat.outputdir", "/tmp")

    conf
  }

  def setScan(conf:org.apache.hadoop.conf.Configuration,startRowkey: String,endRowkey:String
              ,families:Array[String],columns:Array[String]):Unit={
    var scan:Scan = null
    if(StringUtils.isEmpty(startRowkey) || StringUtils.isEmpty(endRowkey)){
      scan = new Scan()
    }else{
      scan = new Scan(Bytes.toBytes(startRowkey),Bytes.toBytes(endRowkey))
    }
    if(families != null){
      for(family <- families){
        scan.addFamily(Bytes.toBytes(family))
      }
    }
    if(columns != null){
      for(column<-columns){
        val cols = column.split(":")
        scan.addColumn(Bytes.toBytes(cols(0)),Bytes.toBytes(cols(1)))
      }
    }
    val proto = ProtobufUtil.toScan(scan)
    val scan2String = Base64.encodeBytes(proto.toByteArray)
    conf.set(TableInputFormat.SCAN,scan2String)

  }
}

目前好像开源了hbase-spark，能更方便的读写数据，但是没有测试，有时间看看

maketubu7

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
spark 读写hbase

测试代码package make.zhangsheniMainimport make.bean.CaseClass.userimport make.service.HbaseServiceimport make.tools.PropertiesToolimport org.apache.hadoop.hbase.client.{Put, Result}import org.apa...
复制链接

扫一扫