import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.{ConnectionFactory, Result} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.rdd.RDD object HbaseManager { val hbaseConf = HBaseConfiguration.create() val hbaseConn = ConnectionFactory.createConnection(hbaseConf) /** * * @param sc */ def getAsNewHadoopApi(sc: SparkContext) = { hbaseConf.set(TableInputFormat.INPUT_TABLE, "apptmslogs:waybillInfo") //设置表名 namespace:tableName hbaseConf.set(TableInputFormat.SCAN_COLUMNS, "cf1:arrivecity cf1:carno")//多个字段空格隔开 hbaseConf.set(TableInputFormat.SCAN_ROW_START, "20180517132443-Y01591805170032")//设置开始rowkey hbaseConf.set(TableInputFormat.SCAN_ROW_STOP, "20180615170225-Y01591806150061") //设置终止rowkey ,范围是左闭区间右开区间 // val hbaseRDD: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(hbaseConf, classOf[TableInputFormat], //table classOf[ImmutableBytesWritable], //k hbase table rowkey classOf[Result] //v resultset ) hbaseRDD } def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster("local[*]").setAppName(this.getClass.getCanonicalName) val sc = new SparkContext(sparkConf) val rdd = getAsNewHadoopApi(sc) rdd.foreachPartition(fp => { fp.foreach({case(k,v) => { val kd = new String(k.get(),k.getOffset,k.getLength) val vd = v.rawCells() for(cell<-vd){ val rowid = new String(cell.getRowArray,cell.getRowOffset,cell.getRowLength) val family = new String(cell.getFamilyArray,cell.getFamilyOffset,cell.getFamilyLength) val qulifier = new String(cell.getQualifierArray,cell.getQualifierOffset,cell.getQualifierLength) val value = new String(cell.getValueArray,cell.getValueOffset,cell.getValueLength) println(kd,rowid,family,qulifier,value) } }}) }) } }
newAPIHadoopRDD
最新推荐文章于 2021-03-04 18:36:36 发布