newAPIHadoopRDD

最新推荐文章于 2021-03-04 18:36:36 发布

争取多缴税

最新推荐文章于 2021-03-04 18:36:36 发布

阅读量6.5k

点赞数 2

本文链接：https://blog.csdn.net/weisongming/article/details/81296642

版权

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.{ConnectionFactory, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object HbaseManager {
  val hbaseConf = HBaseConfiguration.create()
  val hbaseConn = ConnectionFactory.createConnection(hbaseConf)

  /**
    *
    * @param sc
    */
  def getAsNewHadoopApi(sc: SparkContext) = {
    hbaseConf.set(TableInputFormat.INPUT_TABLE, "apptmslogs:waybillInfo") //设置表名 namespace:tableName
    hbaseConf.set(TableInputFormat.SCAN_COLUMNS, "cf1:arrivecity cf1:carno")//多个字段空格隔开
    hbaseConf.set(TableInputFormat.SCAN_ROW_START, "20180517132443-Y01591805170032")//设置开始rowkey
    hbaseConf.set(TableInputFormat.SCAN_ROW_STOP, "20180615170225-Y01591806150061") //设置终止rowkey ,范围是左闭区间右开区间
    //
    val hbaseRDD: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(hbaseConf,
      classOf[TableInputFormat], //table
      classOf[ImmutableBytesWritable], //k  hbase table rowkey
      classOf[Result] //v resultset
    )
    hbaseRDD

  }


  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName(this.getClass.getCanonicalName)
    val sc = new SparkContext(sparkConf)
    val rdd = getAsNewHadoopApi(sc)
    rdd.foreachPartition(fp => {
      fp.foreach({case(k,v) => {
        val kd = new String(k.get(),k.getOffset,k.getLength)
        val vd = v.rawCells()
        for(cell<-vd){
          val rowid = new String(cell.getRowArray,cell.getRowOffset,cell.getRowLength)
          val family = new String(cell.getFamilyArray,cell.getFamilyOffset,cell.getFamilyLength)
          val qulifier = new String(cell.getQualifierArray,cell.getQualifierOffset,cell.getQualifierLength)
          val value = new String(cell.getValueArray,cell.getValueOffset,cell.getValueLength)
          println(kd,rowid,family,qulifier,value)
        }
      }})
    })

  }

}

争取多缴税

关注

2
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
newAPIHadoopRDD

import org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.hbase.client.{ConnectionFactory, Result}import org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.h...
复制链接

扫一扫