實現十分簡單,作爲上文的補充,直接上測試代碼
def main(args: Array[String]): Unit
{
val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName).setMaster("local[*]")
sparkConf.registerKryoClasses(Array(classOf[ImmutableBytesWritable]))
val sc = new SparkContext(sparkConf)
val job: Job = Job.getInstance(conf)
val scan = new Scan()
scan.setCaching(100)
TableMapReduceUtil.initTableMapperJob(TableName.valueOf(tableName), scan,
classOf[IdentityTableMapper], null, null, job)
val jConf = new JobConf(job.getConfiguration)
SparkHadoopUtil.get.addCredentials(jConf)
val rdd = new NewHadoopRDD[ImmutableBytesWritable, Result](sc,
classOf[TableInputFormat],
classOf[ImmutableBytesWritable],
classOf[Result], job.getConfiguration).map((r: (ImmutableBytesWritable, Result)) => r)
rdd.foreach(v => println(Bytes.toString(v._1.get())))
println("Length: " + rdd.map(r => r._1.copyBytes()).collect().length)
}