scala读取hbase

import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.rdd.NewHadoopRDD

object HbaseTest {
  def main(args: Array[String]) {
  //0
  val conf = HBaseConfiguration.create()
  conf.set(TableInputFormat.INPUT_TABLE, "tmp")
  var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
  hBaseRDD.count()

  import scala.collection.JavaConverters._

  hBaseRDD.map(tuple => tuple._2).map(result => result.getColumn("cf".getBytes(), "val".getBytes())).map(keyValues => {
    (keyValues.asScala.reduceLeft {
      (a, b) => if (a.getTimestamp > b.getTimestamp) a else b
    }.getRow,
      keyValues.asScala.reduceLeft {
        (a, b) => if (a.getTimestamp > b.getTimestamp) a else b
      }.getValue
      )
  }).take(10)


  //1
  hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("cf".getBytes(), "val".getBytes()))).map(row => {
    (
      row._1.map(_.toChar).mkString,
      row._2.asScala.reduceLeft {
        (a, b) => if (a.getTimestamp > b.getTimestamp) a else b
      }.getValue.map(_.toChar).mkString
      )
  }).take(10)


  //2
  conf.set(TableInputFormat.INPUT_TABLE, "test1")
  //var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
  hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {
    (
      row._1.map(_.toChar).mkString,
      row._2.asScala.reduceLeft {
        (a, b) => if (a.getTimestamp > b.getTimestamp) a else b
      }.getValue.map(_.toInt).mkString
      )
  }).take(10)


  //3

  import java.nio.ByteBuffer

  hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {
    (
      row._1.map(_.toChar).mkString,
      ByteBuffer.wrap(row._2.asScala.reduceLeft {
        (a, b) => if (a.getTimestamp > b.getTimestamp) a else b
      }.getValue).getLong
      )
  }).take(10)


  //4
  //conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "lf")
  conf.set(TableInputFormat.SCAN_COLUMNS, "lf:app1")
  //var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])

  import java.nio.ByteBuffer

  hBaseRDD.map(tuple => tuple._2).map(result => {
    (result.getRow.map(_.toChar).mkString,
      ByteBuffer.wrap(result.value).getLong
      )
  }).take(10)


  //5
  val conf = HBaseConfiguration.create()
  conf.set(TableInputFormat.INPUT_TABLE, "test1")
  var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
  var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)
  rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "")).groupByKey.take(10)
}
}






//5
val conf = HBaseConfiguration.create()
conf.set(TableInputFormat.INPUT_TABLE, "test1")
var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)
rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "") ).groupByKey.take(10)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值