import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.rdd.NewHadoopRDD
object HbaseTest {
def main(args: Array[String]) {
//0
val conf = HBaseConfiguration.create()
conf.set(TableInputFormat.INPUT_TABLE, "tmp")
var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
hBaseRDD.count()
import scala.collection.JavaConverters._
hBaseRDD.map(tuple => tuple._2).map(result => result.getColumn("cf".getBytes(), "val".getBytes())).map(keyValues => {
(keyValues.asScala.reduceLeft {
(a, b) => if (a.getTimestamp > b.getTimestamp) a else b
}.getRow,
keyValues.asScala.reduceLeft {
(a, b) => if (a.getTimestamp > b.getTimestamp) a else b
}.getValue
)
}).take(10)
//1
hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("cf".getBytes(), "val".getBytes()))).map(row => {
(
row._1.map(_.toChar).mkString,
row._2.asScala.reduceLeft {
(a, b) => if (a.getTimestamp > b.getTimestamp) a else b
}.getValue.map(_.toChar).mkString
)
}).take(10)
//2
conf.set(TableInputFormat.INPUT_TABLE, "test1")
//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {
(
row._1.map(_.toChar).mkString,
row._2.asScala.reduceLeft {
(a, b) => if (a.getTimestamp > b.getTimestamp) a else b
}.getValue.map(_.toInt).mkString
)
}).take(10)
//3
import java.nio.ByteBuffer
hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {
(
row._1.map(_.toChar).mkString,
ByteBuffer.wrap(row._2.asScala.reduceLeft {
(a, b) => if (a.getTimestamp > b.getTimestamp) a else b
}.getValue).getLong
)
}).take(10)
//4
//conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "lf")
conf.set(TableInputFormat.SCAN_COLUMNS, "lf:app1")
//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
import java.nio.ByteBuffer
hBaseRDD.map(tuple => tuple._2).map(result => {
(result.getRow.map(_.toChar).mkString,
ByteBuffer.wrap(result.value).getLong
)
}).take(10)
//5
val conf = HBaseConfiguration.create()
conf.set(TableInputFormat.INPUT_TABLE, "test1")
var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)
rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "")).groupByKey.take(10)
}
}
//5
val conf = HBaseConfiguration.create()
conf.set(TableInputFormat.INPUT_TABLE, "test1")
var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])
var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)
rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "") ).groupByKey.take(10)