package hgs.spark.hbase
//https://blog.csdn.net/mlljava1111/article/details/52675901
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.filter.FilterList
import org.apache.hadoop.hbase.filter.FilterList.Operator
import org.apache.hadoop.hbase.filter.RowFilter
import org.apache.hadoop.hbase.filter.RegexStringComparator
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.Base64
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.filter.LongComparator
object HbaseToSpark {
def main(args: Array[String]): Unit = {
//System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
val conf = new SparkConf
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
conf.setMaster("local").setAppName("hbasedata")
val context = new SparkContext(conf)
//hbase配置
val hconf = new HBaseConfiguration
hconf.set("hbase.zookeeper.quorum", "bigdata00:2181,bigdata01:2181,bigdata02:2181")
hconf.set("hbase.zookeeper.property.clientPort", "2181")
hconf.set(TableInputFormat.INPUT_TABLE, "test")
val scan = new Scan
//扫描的表rowkey的开始和结束
scan.setStartRow("1991".getBytes)
scan.setStopRow("3000".getBytes)
//val list = new FilterList(Operator.MUST_PASS_ALL)
//val filter1 = new RowFilter(CompareOp.GREATER_OR_EQUAL,new LongComparator(1991))
//val filter2 = new RowFilter(CompareOp.LESS_OR_EQUAL,new RegexStringComparator("3000*"))
// list.addFilter(filter1)
// list.addFilter(filter2)
//scan.setFilter(list)
//添加scan
hconf.set(TableInputFormat.SCAN, convertScanToString(scan))
val hrdd = context.newAPIHadoopRDD(hconf,
classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val resultrdd = hrdd.repartition(2)
//打印结果
resultrdd.foreach{case(_,value)=>{
val key = Bytes.toString(value.getRow)
val name = Bytes.toString(value.getValue("cf1".getBytes, "name".getBytes))
val age = Bytes.toString(value.getValue("cf1".getBytes, "age".getBytes))
println("rowkey:"+key+" "+"name:"+name+" "+"age:"+age)
}
}
context.stop()
}
def convertScanToString(scan: Scan) = {
val proto = ProtobufUtil.toScan(scan)
Base64.encodeBytes(proto.toByteArray)
}
}