1.在Hbase中创建EMPLOYEE表:create'EMPLOYEE','cf1',并创造一批销售订单数据,包含但限于产品id、销售员id、销售时间、销售额;
2.在Hbase中创建TotalSale表:create 'TotalSale','cf1',并创造一批销售数据,包括但不限于用户id、销售总额;
3.编写mapper、reducer和driver源代码;
4.编写testDriver程序输出员工id、销售单数和销售总额。
testMapper
package com.hbasepackage;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.io.*;
import org.apache.hadoop.hbase.client.Result;
//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.primitives.*;
//import com.yammer.metrics.core.HealthCheck.Result;
@SuppressWarnings("unused")
public class testMapper extends TableMapper<Text, IntWritable> {
public void map(ImmutableBytesWritable rowKey, Result columns, Context
context)
throws IOException, InterruptedException {
try {
// get rowKey and convert it to string
String inKey = new String(rowKey.get());
// set new key having only date
String oKey = inKey.split("#")[0];
// get sales column in byte format first and then convert it to
// string(as it is stored as string from hbase shell)
byte[] bSales = columns.getValue(Bytes.toBytes("cf1"), Bytes.
toBytes("sales"));
String sSales = new String(bSales);
Integer sales = new Integer(sSales);
// emit date and sales values
context.write(new Text(oKey), new IntWritable(sales));
} catch (RuntimeException e) {
e.printStackTrace();
}
}
}
testReducer
package com.hbasepackage;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.primitives.*;
@SuppressWarnings("unused")
public class testReducer extends TableReducer<Text, IntWritable,
ImmutableBytesWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context
context)
throws IOException, InterruptedException {
try {
int sum = 0;
// loop through different sales vales and add it to sum
for (IntWritable sales : values) {
Integer intSales = new Integer(sales.toString());
sum += intSales;
}
String keyString = key.toString();
System.out.println("" + keyString + "\t" + sum);
// create hbase put with rowkey as date
Put insHBase = new Put(key.getBytes());
// insert sum value to hbase
insHBase.add(Bytes.toBytes("cf1"), Bytes.toBytes("Total sales:"),
Bytes.toBytes(sum));
// write data to Hbase table
context.write(null, insHBase);
} catch (Exception e) {
e.printStackTrace();
}
}
}
testDriver
package com.hbasepackage;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.primitives.*;
@SuppressWarnings("unused")
public class testDriver {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// define scan and define column families to scan
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("cf1"));
Job job = new Job(conf);
job.setMapperClass(testMapper.class);
job.setReducerClass(testReducer.class);
job.setJarByClass(testDriver.class);
// define input hbase table
TableMapReduceUtil.initTableMapperJob(
"EMPLOYEE",
scan,
testMapper.class,
Text.class,
IntWritable.class,
job);
// define output table
TableMapReduceUtil.initTableReducerJob(
"TotalSale",
testReducer.class,
job);
job.waitForCompletion(true);
}
}