1.二者为什么要集成
HBase是一个基于Hadoop的数据库,它可靠、分布式适合结构化大数据的存储。其当然要与分布式的计算框架MapReduce相结合。
2.集成操作
1.创建Maven项目导入相关依赖
<!--mapreduce + hbase-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
2.创建测试表
@Test
public void testCreateOrderTable() throws IOException {
boolean exists = admin.tableExists(TableName.valueOf("t_order"));
if (exists) {
admin.disableTable(TableName.valueOf("t_order"));
admin.deleteTable(TableName.valueOf("t_order"));
}
HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf("t_order"));
HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
hTableDescriptor.addFamily(cf1);
admin.createTable(hTableDescriptor);
}
3.插入测试数据
@Test
public void testInset() throws IOException {
Table table = connection.getTable(TableName.valueOf("t_order"));
Put put1 = new Put(Bytes.toBytes("1:20181010153020100"));
put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(2500.0D));
put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("p20"));
put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
Put put2 = new Put(Bytes.toBytes("2:20180510121011233 "));
put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(199.0D));
put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("连衣裙"));
put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
Put put3 = new Put(Bytes.toBytes("3:20180612111111111"));
put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(999.9D));
put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("小天鹅洗衣机"));
put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
Put put4 = new Put(Bytes.toBytes("1:20181212011011111"));
put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(200.0D));
put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("搓衣板"));
put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
Put put5 = new Put(Bytes.toBytes("1:20190206101010101"));
put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(10D));
put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("钢丝球"));
put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
Put put6 = new Put(Bytes.toBytes("2:20180306101010101"));
put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(9.9D));
put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("丝袜"));
put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
ArrayList<Put> puts = new ArrayList<Put>();
puts.add(put1);
puts.add(put2);
puts.add(put3);
puts.add(put4);
puts.add(put5);
puts.add(put6);
table.put(puts);
}
4.创建输出表
//创建输出表
@Test
public void TestCreateTable() throws IOException {
boolean exists = admin.tableExists(TableName.valueOf("t_result"));
if(exists){
admin.disableTable(TableName.valueOf("t_result"));
admin.deleteTable(TableName.valueOf("t_result"));
}
HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf("t_result"));
HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
hTableDescriptor.addFamily(cf1);
admin.createTable(hTableDescriptor);
}
5.编写自定义Mapper
package com.hw.hm;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
/**
* @aurhor:fql
* @date 2019/8/22 12:59
* @type:自定义的Mapper
*/
public class orderMapper extends TableMapper<Text, DoubleWritable> {
/**
*
* @param key rowkey
* @param result hbase中的一行记录
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException, InterruptedException {
//获得主键
String rowkey = Bytes.toString(key.get());
//进行截取,获得id
String userId = rowkey.split(":")[0];
//获取到订单的金额
double money = Bytes.toDouble(result.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("money")));
context.write(new Text(userId),new DoubleWritable(money));
}
}
6.编写自定义Reduce
package com.hw.hm;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.util.Iterator;
/**
* @aurhor:fql
* @date 2019/8/22 13:07
* @type: 自定义的Reduce
*/
public class orderReduce extends TableReducer<Text, DoubleWritable, NullWritable> {
/**
* @param key userId
* @param values 本年度的订单金额总和
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
Double summoney = 0D;
//获取到迭代器
Iterator<DoubleWritable> iterator = values.iterator();
while (iterator.hasNext()) {
double money = iterator.next().get();
summoney += money;
}
Put put = new Put((key.toString() + ":2018").getBytes());
put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total"), Bytes.toBytes(summoney));
context.write(null, put);
}
}
7.编写Application
package com.hw.hm;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
/**
* @aurhor:fql
* @date 2019/8/22 13:14
* @type: 自定义初始化类
*/
public class OrderApplication {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = HBaseConfiguration.create();
configuration.set(HConstants.ZOOKEEPER_QUORUM,"hadoop:2181");
Job job = Job.getInstance(configuration, "order");
job.setJarByClass(OrderApplication.class);
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);
//map任务的初始化
Scan scan = new Scan();
//使用正则表达式筛选结果
RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^.*:2018.*$"));
scan.setFilter(filter);
//3-5步
TableMapReduceUtil.initTableMapperJob(TableName.valueOf("t_order"),scan,orderMapper.class, Text.class, DoubleWritable.class,job);
TableMapReduceUtil.initTableReducerJob("t_result", orderReduce.class, job);
job.waitForCompletion(true);
}
}
8.编写结果查看方法
@Test
public void TestScan() throws IOException {
Table table = connection.getTable(TableName.valueOf("t_result"));
Result result = table.get(new Get(Bytes.toBytes("1:2018")));
double total = Bytes.toDouble(result.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total")));
System.out.println("用户1在2018年度的总账单为:"+total);
}