package actions
import java.util
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{Cell, CellUtil, HBaseConfiguration}
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.json4s.jackson.Serialization
import scala.collection.mutable
object example {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Practice").setMaster("local[2]")
val sc = new SparkContext(conf)//连接hbase配置
val hbaseConf: Configuration = HBaseConfiguration.create()
//连接zookeeper集群
hbaseConf.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104")
//hbase表
hbaseConf.set(TableInputFormat.INPUT_TABLE, "student")
val rdd: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(
hbaseConf,
classOf[TableInputFormat],//文件输入类型,textInputFormat
classOf[ImmutableBytesWritable],//rowkey的封装
classOf[Result]
) //结果的封装
val rdd2 = rdd.map {
case (iw, result) => {
val map: mutable.Map[String, Any] = mutable.Map[String, Any]()
//rowkey存入map
map += "rowkey" -> Bytes.toString(iw.get())
//再把每列存储
val cells: util.List[Cell] = result.listCells()
import scala.collection.JavaConversions._
for (cell <- cells) {
//列名+列值
val key: String = Bytes.toString(CellUtil.cloneQualifier(cell))
val value: String = Bytes.toString(CellUtil.cloneValue(cell))
map += key -> value
}
//map直接返回
//map
//json格式返回,将map转成json
implicit val df =org.json4s.DefaultFormats
Serialization.write(map)
}
}
rdd2.collect.foreach(println)
//最终数据json格式
//{"rowkey":"1001",name":"20","age":"lisi"}
sc.stop()
}
}
Spark读取hbase数据
最新推荐文章于 2024-01-20 12:36:48 发布