测试代码
package make.zhangsheniMain
import make.bean.CaseClass.user
import make.service.HbaseService
import make.tools.PropertiesTool
import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableOutputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.SparkSession
/**
* @Author: maketubu
* @Date: 2019/11/1 10:37
*/
object hbase_test {
def read4hbase(spark: SparkSession): Unit = {
val conf = HbaseService.getHbaseConf()
conf.set(TableInputFormat.INPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))
// conf.set("hbase.rootdir", "hdfs://master:8020/hbase")
// conf.set("hbase.zookeeper.quorum", "master:2181")
//读取表的全部内容
val resrdd =spark.sparkContext.newAPIHadoopRDD(conf,classOf[TableInputFormat]
,classOf[ImmutableBytesWritable]
,classOf[Result])
import spark.implicits._
val resdf = resrdd.map(infos => {
val key = Bytes.toString(infos._2.getRow)
val name = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")))
val age = Bytes.toString(infos._2.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")))
user(key, name, age)
}).toDF()
resdf.show()
}
def write2hbase(spark: SparkSession): Unit ={
val conf = HbaseService.getHbaseConf()
conf.set(TableOutputFormat.OUTPUT_TABLE,PropertiesTool.getproperties("event_table","hbase.properties"))
// HbaseService.setScan(conf, null, null, Array[String]("info"),Array[String]("info:name","info:age"))
val job = new Job(conf)
job.setOutputKeyClass(classOf[ImmutableBytesWritable])
job.setOutputValueClass(classOf[Result])
job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
// val jobconf = new JobConf(conf)
//
// jobconf.setOutputFormat(classOf[TableOutputFormat])
val indataRDD = spark.sparkContext.makeRDD(Array("8,jackson,15","9,jack,35"))
val rdd = indataRDD.map(_.split(',')).map{arr=>{
val put = new Put(Bytes.toBytes(arr(0)))
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(2)))
(new ImmutableBytesWritable, put)
}}
rdd.saveAsNewAPIHadoopDataset(job.getConfiguration)
spark.stop()
}
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("HbaseTestApp")
.config("spark.network.timeout","1200s")
.getOrCreate()
// read4hbase(spark)
write2hbase(spark)
}
}
hbase服务类:
package make.service
/**
* @Author: maketubu
* @Date: 2019/11/1 10:34
*/
import make.tools.{PropertiesTool, StringUtils}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.{Base64, Bytes}
object HbaseService {
def getHbaseConf():org.apache.hadoop.conf.Configuration={
val conf = HBaseConfiguration.create()
val hbase_zk_quorum = PropertiesTool.getproperties("hbase_zk_quorum","hbase.properties")
val hbase_zk_port = PropertiesTool.getproperties("hbase_zk_port","hbase.properties")
val hbase_zk_parent = PropertiesTool.getproperties("hbase_zk_parent","hbase.properties")
println(hbase_zk_quorum, hbase_zk_port, hbase_zk_parent)
conf.set("hbase.zookeeper.quorum",hbase_zk_quorum)
conf.set("hbase.zookeeper.port",hbase_zk_port)
conf.set("zookeeper.znode.parent",hbase_zk_parent)
conf.set("mapreduce.output.fileoutputformat.outputdir", "/tmp")
conf
}
def setScan(conf:org.apache.hadoop.conf.Configuration,startRowkey: String,endRowkey:String
,families:Array[String],columns:Array[String]):Unit={
var scan:Scan = null
if(StringUtils.isEmpty(startRowkey) || StringUtils.isEmpty(endRowkey)){
scan = new Scan()
}else{
scan = new Scan(Bytes.toBytes(startRowkey),Bytes.toBytes(endRowkey))
}
if(families != null){
for(family <- families){
scan.addFamily(Bytes.toBytes(family))
}
}
if(columns != null){
for(column<-columns){
val cols = column.split(":")
scan.addColumn(Bytes.toBytes(cols(0)),Bytes.toBytes(cols(1)))
}
}
val proto = ProtobufUtil.toScan(scan)
val scan2String = Base64.encodeBytes(proto.toByteArray)
conf.set(TableInputFormat.SCAN,scan2String)
}
}
目前好像开源了hbase-spark,能更方便的读写数据,但是没有测试,有时间看看