spark-shell读取hbase表中的数据
启动hbase集群
start-hbase.sh
进入hbase客户端
hbase shell
创建表
create 'student','info'
插入数据
put 'student','1','info:id','001'
put 'student','1','info:name','Mary'
put 'student','1','info:age','18'
put 'student','1','info:sex','girl'
启动spark-shell
前置条件
cp ${HBASE_HOME}/lib/hbase*.jar ${SPARK_HOME}/jars
cp ${HBASE_HOME}/lib/hbase*.jar ${SPARK_HOME}/jars
cp ${HBASE_HOME}/lib/htrace-core-3.1.0-incubating.jar ${SPARK_HOME}/jars
cp ${HBASE_HOME}/lib/protobuf-java-2.5.0.jar ${SPARK_HOME}/jars
cp ${HBASE_HOME}/lib/metrics-core-3.2.6.jar ${SPARK_HOME}/jars
启动spark-shell并输入以下代码
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase._
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
val conf = HBaseConfiguration.create()
//设置查询表名
conf.set(TableInputFormat.INPUT_TABLE, "Student")
val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],classOf[org.apache.hadoop.hbase.client.Result])
val count = stuRDD.count()
println("Students RDD Count:" + count)
stuRDD.cache()
//遍历输出结果
stuRDD.foreach({ case (_,result) =>;
val key = Bytes.toString(result.getRow);
val id = Bytes.toString(result.getValue("info".getBytes,"id".getBytes));
val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes));
val academy = Bytes.toString(result.getValue("info".getBytes,"academy".getBytes));
val major = Bytes.toString(result.getValue("info".getBytes,"major".getBytes));
val phone = Bytes.toString(result.getValue("info".getBytes,"phone".getBytes));
println("Row key:"+key+" id:"+id+" name:"+name+" academy:"+academy+" major:"+major+" phone"+phone)})