spark hbase java_spark使用java读取hbase数据做分布式计算

package com.sdyc.ndspark.sys;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableInputFormat;

import org.apache.hadoop.hbase.util.Base64;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.spark.api.java.JavaPairRDD;

import org.apache.spark.api.java.JavaSparkContext;

import org.apache.spark.api.java.function.Function2;

import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

import java.io.ByteArrayOutputStream;

import java.io.DataOutputStream;

import java.io.IOException;

import java.io.Serializable;

import java.util.List;

/**

*

 
 

*

* spark hbase 测试

*

*  Created with IntelliJ IDEA.

* User: zhangdonghao

* Date: 14-1-26

* Time: 上午9:24

* To change this template use File | Settings | File Templates.

*

*

* @author zhangdonghao

*/

public class HbaseTest implements Serializable {

public Log log = LogFactory.getLog(HbaseTest.class);

/**

* 将scan编码,该方法copy自 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil

*

* @param scan

* @return

* @throws IOException

*/

static String convertScanToString(Scan scan) throws IOException {

ByteArrayOutputStream out = new ByteArrayOutputStream();

DataOutputStream dos = new DataOutputStream(out);

scan.write(dos);

return Base64.encodeBytes(out.toByteArray());

}

public void start() {

//初始化sparkContext,这里必须在jars参数里面放上Hbase的jar,

// 否则会报unread block data异常

JavaSparkContext sc = new JavaSparkContext("spark://nowledgedata-n3:7077", "hbaseTest",

"/home/hadoop/software/spark-0.8.1",

new String[]{"target/ndspark.jar", "target\\dependency\\hbase-0.94.6.jar"});

//使用HBaseConfiguration.create()生成Configuration

// 必须在项目classpath下放上hadoop以及hbase的配置文件。

Configuration conf = HBaseConfiguration.create();

//设置查询条件,这里值返回用户的等级

Scan scan = new Scan();

scan.setStartRow(Bytes.toBytes("195861-1035177490"));

scan.setStopRow(Bytes.toBytes("195861-1072173147"));

scan.addFamily(Bytes.toBytes("info"));

scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("levelCode"));

try {

//需要读取的hbase表名

String tableName = "usertable";

conf.set(TableInputFormat.INPUT_TABLE, tableName);

conf.set(TableInputFormat.SCAN, convertScanToString(scan));

//获得hbase查询结果Result

JavaPairRDD hBaseRDD = sc.newAPIHadoopRDD(conf,

TableInputFormat.class, ImmutableBytesWritable.class,

Result.class);

//从result中取出用户的等级,并且每一个算一次

JavaPairRDD levels = hBaseRDD.map(

new PairFunction, Integer, Integer>() {

@Override

public Tuple2 call(

Tuple2 immutableBytesWritableResultTuple2)

throws Exception {

byte[] o = immutableBytesWritableResultTuple2._2().getValue(

Bytes.toBytes("info"), Bytes.toBytes("levelCode"));

if (o != null) {

return new Tuple2(Bytes.toInt(o), 1);

}

return null;

}

});

//数据累加

JavaPairRDD counts = levels.reduceByKey(new Function2() {

public Integer call(Integer i1, Integer i2) {

return i1 + i2;

}

});

//打印出最终结果

List> output = counts.collect();

for (Tuple2 tuple : output) {

System.out.println(tuple._1 + ": " + tuple._2);

}

} catch (Exception e) {

log.warn(e);

}

}

/**

* spark如果计算没写在main里面,实现的类必须继承Serializable接口,

* >否则会报 Task not serializable: java.io.NotSerializableException 异常

*/

public static void main(String[] args) throws InterruptedException {

new HbaseTest().start();

System.exit(0);

}

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值