简介:HBase作为共享源,即HBase作为输入源,同时作为输出源。通俗的讲,就是从HBase中读取数据,使用MapReduce处理过后,再将处理的结果存入HBase中去。
下面直接上代码:
package apache.org.myhbase.asshare;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
/**
* hbase作为数据共享源,即从hbase中读取数据后做处理再插入回hbase
*
* @author 13277
*
*/
public class HBaseMR {
final static String inTable = "access-log";
final static String inFamily = "info";
final static String inColumn = "url";
final static String outTable="total-access";
final static String outFamily="url";
final static String outColumn="count";
public static class Mapper extends TableMapper<Text,Text> {
public Mapper() {}
@Override
public void map(ImmutableBytesWritable row, Result values,Context context)
throws IOException, InterruptedException {
byte[] b = values.getValue(inFamily.getBytes(), inColumn.getBytes());
if (b != null) {
String v = new String(b);
String r = new String(values.getRow());
String[] strs = r.split("-");
String ip = strs[0];
System.out.println("iamhere"+r);
context.write(new Text(ip), new Text(v));
}
}
}
public static class Reducer extends TableReducer<Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int count = 0;
String sum = null;
for (Text val : values) {
count++;
}
sum = String.valueOf(count);
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(outFamily.getBytes(), outColumn.getBytes(), sum.getBytes());
context.write(key, put);
}
}
public static void main(String[] args)
throws IOException, ClassNotFoundException, InterruptedException {
System.out.println("iamherestart");
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf, "HbaseMR");
job.setJarByClass(HBaseMR.class);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob("access-log", scan, Mapper.class, Text.class, Text.class, job);
TableMapReduceUtil.initTableReducerJob("total-access", Reducer.class, job);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
PS:
从access-log中读取所有访问记录数据,然后计算出每个ip的访问次数存储到total-access表中。