package SparkSql
import org.apache.hadoop.hbase.client.{HBaseAdmin, Put, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object writeHbase {
def main(args: Array[String]): Unit = {
// TODO: 添加数据到hbase
val config = new SparkConf().setAppName("DataSourceTest").setMaster("local[*]")
val sc = new SparkContext(config)
sc.setLogLevel("WARN")
//连接hbase
val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.quorum", "node01:2181,node02:2181,node03:2181")
//创建fruit表
val fruitTable = TableName.valueOf("fruit")
//设置表结构
val tableDescr = new HTableDescriptor(fruitTable)
//设置列族
tableDescr.addFamily(new HColumnDescriptor("info".getBytes))
//设置hbase管理员
val admin = new HBaseAdmin(conf)
//判断表是否存在
if (admin.tableExists(fruitTable)) {
//把表设置为不可用
admin.disableTable(fruitTable)
// 删除表
admin.deleteTable(fruitTable)
}
//创建表
admin.createTable(tableDescr)
//插入数据
def convert(triple: (String, String, String)) = {
//创建put对象 指定rowkey
val put = new Put(Bytes.toBytes(triple._1))
//创建列族 下的列名
put.addImmutable(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(triple._2))
put.addImmutable(Bytes.toBytes("info"), Bytes.toBytes("price"), Bytes.toBytes(triple._3))
//执行put
(new ImmutableBytesWritable, put)
}
//创建数据 执行添加数据的方法
val dataRDD: RDD[(String, String, String)] = sc.parallelize(List(("1","apple","11"), ("2","banana","12"), ("3","pear","13")))
val targetRDD: RDD[(ImmutableBytesWritable, Put)] = dataRDD.map(convert)
//创建输出流
val jobConf = new JobConf(conf)
jobConf.setOutputFormat(classOf[TableOutputFormat])
//输出流名
jobConf.set(TableOutputFormat.OUTPUT_TABLE, "fruit")
//写入数据
targetRDD.saveAsHadoopDataset(jobConf)
println("写入数据成功")
println("--------------------------------------------------------------------------------------")
// TODO:读取hbase的表数据
//设置输入流,读取表
conf.set(TableInputFormat.INPUT_TABLE, "fruit")
val hbaseRDD: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
//个数
val count: Long = hbaseRDD.count()
println("hBaseRDD RDD Count:"+ count)
//循环遍历hbase数据
hbaseRDD.foreach {
case (_, result) =>
val key = Bytes.toString(result.getRow)
val name = Bytes.toString(result.getValue("info".getBytes, "name".getBytes))
val color = Bytes.toString(result.getValue("info".getBytes, "price".getBytes))
//拼接输出
println("Row key:" + key + " Name:" + name + " Color:" + color)
}
sc.stop()
}
}
Spark 可以通过Hadoop输入格式访问HBase(创建hbase表和读取hbase数据)
最新推荐文章于 2023-03-19 00:49:28 发布