一、Hbase 环境搭建-单机
1.1 环境
- Hadoop
- ZK
1.2 下载安装
[root@HadoopNode00 ~]# mkdir /home/hbase
[root@HadoopNode00 ~]# tar -zxvf hbase-1.2.4-bin.tar.gz -C /home/hbase/
[root@HadoopNode00 ~]# vi .bashrc
export HBASE_HOME=/home/hbase/hbase-1.2.4
export HBASE_MANAGES_ZK=false
export PATH=$PATH:$HBASE_HOME/bin
[root@HadoopNode00 ~]# vi /home/hbase/hbase-1.2.4/conf/hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://HadoopNode00:9000/hbase</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>HadoopNode00</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
[root@HadoopNode00 ~]# vi /home/hbase/hbase-1.2.4/conf/regionservers
HadoopNode00
1.3 启动
# 保证zk和hdfs启动成功
[root@HadoopNode00 zookeeper-3.4.6]# start-hbase.sh
starting master, logging to /home/hbase/hbase-1.2.4/logs/hbase-root-master-HadoopNode00.out
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option PermSize=128m; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=128m; support was removed in 8.0
HadoopNode00: starting regionserver, logging to /home/hbase/hbase-1.2.4/logs/hbase-root-regionserver-HadoopNode00.out
HadoopNode00: Java HotSpot(TM) 64-Bit Server VM warning: ignoring option PermSize=128m; support was removed in 8.0
HadoopNode00: Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=128m; support was removed in 8.0
[root@HadoopNode00 zookeeper-3.4.6]# jps
2257 QuorumPeerMain
2642 HRegionServer
1906 DataNode
1811 NameNode
2485 HMaster
2121 SecondaryNameNode
2938 Jps
1.4 连接
[root@HadoopNode00 zookeeper-3.4.6]# hbase shell
二、Shell操作
2.1 常见命令
hbase(main):001:0> status
1 active master, 0 backup masters, 1 servers, 0 dead, 2.0000 average load
hbase(main):003:0> version
1.2.4, rUnknown, Wed Feb 15 18:58:00 CST 2017
hbase(main):004:0> whoami
root (auth:SIMPLE)
groups: root
2.2 命名空间操作
namespace 相当于MySQL中的数据库
# 创建一个普通的命名空间
hbase(main):006:0> create_namespace "baizhi"
0 row(s) in 0.0870 seconds
# 创建带属性的命名空间
hbase(main):007:0> create_namespace "shahe",{'subway'=>'shahezhan'}
0 row(s) in 0.0130 seconds
#删除命名空间
hbase(main):001:0> drop_namespace 'baizhi'
0 row(s) in 0.1440 seconds
# 描述命名空间
hbase(main):001:0> describe_namespace 'shahe'
DESCRIPTION
{NAME => 'shahe', subway => 'shahezhan'}
1 row(s) in 0.1300 seconds
# 修改命名空间
hbase(main):003:0> alter_namespace 'shahe',{METHOD => 'set', 'subway'=>'gonghuacheng'}
0 row(s) in 0.0240 seconds
hbase(main):004:0> describe_namespace 'shahe'
DESCRIPTION
{NAME => 'shahe', subway => 'gonghuacheng'}
1 row(s) in 0.0060 seconds
# 列出所有命名空间
hbase(main):005:0> list_namespace
NAMESPACE
default
hbase
shahe
3 row(s) in 0.0160 seconds
2.3 表操作(DDL 数据定义语言)
# 创建命名空间为baizhi
hbase(main):007:0> create_namespace 'baizhi'
0 row(s) in 0.0230 seconds
# 创建baizhi namespace的下的表叫做 t_user,有两个列簇叫做 cf1 cf2
hbase(main):008:0> create 'baizhi:t_user','cf1','cf2'
0 row(s) in 2.2930 seconds
=> Hbase::Table - baizhi:t_user
hbase(main):001:0> describe 'baizhi:t_user'
Table baizhi:t_user is ENABLED
baizhi:t_user
COLUMN FAMILIES DESCRIPTION
{NAME => 'cf1', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BL
CODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE =>
', REPLICATION_SCOPE => '0'}
{NAME => 'cf2', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BL
CODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE =>
', REPLICATION_SCOPE => '0'}
2 row(s) in 0.1930 seconds
# 直接删除无法删除,需要将其先disable掉
hbase(main):002:0> drop 'baizhi:t_user'
ERROR: Table baizhi:t_user is enabled. Disable it first.
Here is some help for this command:
Drop the named table. Table must first be disabled:
hbase> drop 't1'
hbase> drop 'ns1:t1'
# disable 表 (禁用表)
hbase(main):003:0> disable
disable disable_all disable_peer disable_table_replication
hbase(main):003:0> disable 'baizhi:t_user'
0 row(s) in 2.2570 seconds
# 删除表
hbase(main):004:0> drop 'baizhi:t_user'
0 row(s) in 1.2370 seconds
hbase(main):001:0> create 'baizhi:t_user','cf1','cf2'
0 row(s) in 1.3470 seconds
=> Hbase::Table - baizhi:t_user
# 列出所有的表
hbase(main):002:0> list
TABLE
baizhi:t_user
1 row(s) in 0.0120 seconds
=> ["baizhi:t_user"]
# 列出某个命名空间下的表
hbase(main):003:0> list_namespace_tables 'baizhi'
TABLE
t_user
1 row(s) in 0.0130 seconds
2.4 数据操作(数据管理语言 DML)
2.4.1 put
# 最普通的插入语句的方式
hbase(main):005:0> put 'baizhi:t_user','1','cf1:name','zhangsan'
0 row(s) in 0.1010 seconds
# 将表对象复制给变量t
hbase(main):006:0> t = get_table 'baizhi:t_user'
0 row(s) in 0.0010 seconds
=> Hbase::Table - baizhi:t_user
# 使用复制对象t 对表进行操作
hbase(main):007:0> t.put '1','cf1:age',18
0 row(s) in 0.0110 seconds
# 覆盖操作和put插入相似 写入相同的条件 写入值就可覆盖
hbase(main):008:0> t.put '1','cf1:age',19
0 row(s) in 0.0070 seconds
# 扫描 当前的表
hbase(main):009:0> t.scan
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569278257582, value=19
1 column=cf1:name, timestamp=1569277976093, value=zhangsan
1 row(s) in 0.0180 seconds
2.4.2 get
# 获取baizhi:t_user 下的rowk为1 的所有的cell
hbase(main):013:0> get 'baizhi:t_user' ,'1'
COLUMN CELL
cf1:age timestamp=1569278257582, value=19
cf1:name timestamp=1569277976093, value=zhangsan
2 row(s) in 0.0120 seconds
hbase(main):014:0> t.get '1'
COLUMN CELL
cf1:age timestamp=1569278257582, value=19
cf1:name timestamp=1569277976093, value=zhangsan
2 row(s) in 0.0030 seconds
# 发现获取版本只有一个,是因为表目前不支持多版本
hbase(main):015:0> t.get '1',{COLUMN=>'cf1:name',VERSIONS=>3}
# 创建一个最多有三个版本的表
hbase(main):001:0> create 'baizhi:t_user',{NAME=>'cf1',VERSIONS=>3}
0 row(s) in 1.3510 seconds
=> Hbase::Table - baizhi:t_user
hbase(main):006:0> t.get '1',{COLUMN=>'cf1:name',VERSIONS=>3}
COLUMN CELL
cf1:name timestamp=1569279103046, value=zs
cf1:name timestamp=1569279081528, value=zhangsan
2 row(s) in 0.0090 seconds
2.4.3 delete /deleteall
hbase(main):009:0> t.delete '1','cf1:name'
0 row(s) in 0.0270 seconds
hbase(main):010:0> t.scan
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569279066158, value=18
1 row(s) in 0.0090 seconds
# 根据时间戳进行删除
hbase(main):012:0> t.get '1',{COLUMN=>'cf1:age',VERSIONS=>3}
COLUMN CELL
cf1:age timestamp=1569279425168, value=19
cf1:age timestamp=1569279066158, value=18
2 row(s) in 0.0080 seconds
hbase(main):013:0> t.delete '1','cf1:age',1569279066158
0 row(s) in 0.0060 seconds
hbase(main):014:0> t.get '1',{COLUMN=>'cf1:age',VERSIONS=>3}
COLUMN CELL
cf1:age timestamp=1569279425168, value=19
1 row(s) in 0.0030 seconds
hbase(main):002:0> deleteall 'baizhi:t_user','1'
0 row(s) in 0.1600 seconds
hbase(main):003:0> scan 'baizhi:t_user'
ROW COLUMN+CELL
0 row(s) in 0.0120 seconds
2.4.4 scan
# 查询列簇为cf1 从第一行开始
hbase(main):019:0> scan 'baizhi:t_user',{COLUMNS=>['cf1'],STARTROW=>'1'}
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569279798830, value=20
1 column=cf1:name, timestamp=1569279748288, value=zhangsan
2 column=cf1:age, timestamp=1569279809118, value=22
2 column=cf1:name, timestamp=1569279779869, value=wangwubaizhi
2 row(s) in 0.0100 seconds
# 查询列簇为cf1 从第二行开始
hbase(main):020:0> scan 'baizhi:t_user',{COLUMNS=>['cf1'],STARTROW=>'2'}
ROW COLUMN+CELL
2 column=cf1:age, timestamp=1569279809118, value=22
2 column=cf1:name, timestamp=1569279779869, value=wangwubaizhi
1 row(s) in 0.0080 seconds
# 查询列簇为cf1 从第三行开始
hbase(main):021:0> scan 'baizhi:t_user',{COLUMNS=>['cf1'],STARTROW=>'3'}
ROW COLUMN+CELL
0 row(s) in 0.0040 seconds
# 查询列簇为cf1 从第一行开始 只显示一行
hbase(main):022:0> scan 'baizhi:t_user',{COLUMNS=>['cf1'],STARTROW=>'1',LIMIT=>1}
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569279798830, value=20
1 column=cf1:name, timestamp=1569279748288, value=zhangsan
1 row(s) in 0.0070 seconds
# 查询列簇为cf1 从第一行开始 只显示两行
hbase(main):023:0> scan 'baizhi:t_user',{COLUMNS=>['cf1'],STARTROW=>'1',LIMIT=>2}
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569279798830, value=20
1 column=cf1:name, timestamp=1569279748288, value=zhangsan
2 column=cf1:age, timestamp=1569279809118, value=22
2 column=cf1:name, timestamp=1569279779869, value=wangwubaizhi
2 row(s) in 0.0260 seconds
2.4.5 count
hbase(main):025:0> count 'baizhi:t_user'
2 row(s) in 0.0130 seconds
=> 2
2.4.6 append
hbase(main):026:0> append 'baizhi:t_user','1','cf1:name','110'
0 row(s) in 0.0070 seconds
hbase(main):027:0> scan 'baizhi:t_user'
ROW COLUMN+CELL
1 column=cf1:age, timestamp=1569279798830, value=20
1 column=cf1:name, timestamp=1569280127798, value=zhangsan110
2 column=cf1:age, timestamp=1569279809118, value=22
2 column=cf1:name, timestamp=1569279779869, value=wangwubaizhi
2 row(s) in 0.0090 seconds
2.4.7 truncate
清空数据
hbase(main):028:0> truncate 'baizhi:t_user'
Truncating 'baizhi:t_user' table (it may take a while):
- Disabling table...
- Truncating table...
0 row(s) in 3.4400 seconds
hbase(main):001:0> scan 'baizhi:t_user'
ROW COLUMN+CELL
0 row(s) in 0.1550 seconds
三、Java API
3.1 依赖
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
3.2 核心操作
获取客户端对象和连接对象
@Before
public void getClient() throws Exception {
configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "HadoopNode00");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
conn = ConnectionFactory.createConnection(configuration);
admin = conn.getAdmin();
}
@After
public void close() throws Exception {
admin.close();
conn.close();
}
3.3 namespace常规操作
@Test
public void createNameSpace() throws Exception {
NamespaceDescriptor namespaceDescriptor = NamespaceDescriptor.create("baizhi123").addConfiguration("admin", "gjf").build();
admin.createNamespace(namespaceDescriptor);
}
@Test
public void listNameSpace() throws Exception {
NamespaceDescriptor[] listNamespaceDescriptors = admin.listNamespaceDescriptors();
for (NamespaceDescriptor listNamespaceDescriptor : listNamespaceDescriptors) {
System.out.println(listNamespaceDescriptor.getName());
}
}
@Test
public void modifyNameSpace() throws Exception {
NamespaceDescriptor namespaceDescriptor = NamespaceDescriptor.create("baizhi123").addConfiguration("aa", "bb").removeConfiguration("admin").build();
admin.modifyNamespace(namespaceDescriptor);
}
@Test
public void deleteNameSpace() throws Exception{
admin.deleteNamespace("baizhi123");
}
3.4 表常规操作
@Test
public void createTables() throws Exception {
/*
创建表名的对象(封装表名字)
* */
TableName tableName = TableName.valueOf("baizhi:t_user1");
/*
* 封装 表 的相关信息
* */
HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
/*
* 封装列簇的相关信息
* */
HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
cf1.setMaxVersions(3);
=
/*
*
* */
HColumnDescriptor cf2 = new HColumnDescriptor("cf2");
cf2.setMaxVersions(3);
/*
* 在hTableDescriptor 对象中添加列簇描述对象
* */
hTableDescriptor.addFamily(cf1);
hTableDescriptor.addFamily(cf2);
/*
* 创建 table
* */
admin.createTable(hTableDescriptor);
}
@Test
public void dropTable() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user1");
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
3.5 CURD
3.5.1 put
更新单个记录
@Test
public void testPutOne() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user");
/*
* 通过conn对象获得table的操作对象
* */
Table table = conn.getTable(tableName);
Put put1 = new Put("1".getBytes());
put1.addColumn("cf1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
put1.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
put1.addColumn("cf1".getBytes(), "sex".getBytes(), "false".getBytes());
table.put(put1);
table.close();
}
插入多个记录
@Test
public void testPutList() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user");
BufferedMutator bufferedMutator = conn.getBufferedMutator(tableName);
Put put1 = new Put("4".getBytes());
put1.addColumn("cf1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
put1.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
put1.addColumn("cf1".getBytes(), "sex".getBytes(), "false".getBytes());
Put put2 = new Put("5".getBytes());
put2.addColumn("cf1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
put2.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
put2.addColumn("cf1".getBytes(), "sex".getBytes(), "false".getBytes());
Put put3 = new Put("6".getBytes());
put3.addColumn("cf1".getBytes(), "name".getBytes(), "zhangsan".getBytes());
put3.addColumn("cf1".getBytes(), "age".getBytes(), "18".getBytes());
put3.addColumn("cf1".getBytes(), "sex".getBytes(), "false".getBytes());
ArrayList<Put> puts = new ArrayList<Put>();
puts.add(put1);
puts.add(put2);
puts.add(put3);
bufferedMutator.mutate(puts);
bufferedMutator.close();
}
3.5.2 delete
单个删除
@Test
public void tetsDelete() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user");
Table table = conn.getTable(tableName);
Delete delete = new Delete("6".getBytes());
table.delete(delete);
table.close();
}
批量删除
@Test
public void testDeleteList() throws Exception{
TableName tableName = TableName.valueOf("baizhi:t_user");
BufferedMutator bufferedMutator = conn.getBufferedMutator(tableName);
Delete delete1 = new Delete("1".getBytes());
Delete delete2 = new Delete("2".getBytes());
Delete delete3 = new Delete("3".getBytes());
ArrayList<Delete> deletes = new ArrayList<Delete>();
deletes.add(delete1);
deletes.add(delete2);
deletes.add(delete3);
bufferedMutator.mutate(deletes);
bufferedMutator.close();
}
3.5.3 get
@Test
public void testGet() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user");
Table table = conn.getTable(tableName);
Get get = new Get("4".getBytes());
Result result = table.get(get);
byte[] name = result.getValue("cf1".getBytes(), "name".getBytes());
byte[] age = result.getValue("cf1".getBytes(), "age".getBytes());
byte[] sex = result.getValue("cf1".getBytes(), "sex".getBytes());
System.out.println(new String(name) + "-" + new String(age) + "-" + new String(sex));
}
@Test
public void testGet02() throws Exception {
/*
*
* hbase(main):012:0> t.get '4',{COLUMN=>'cf1:name',VERSIONS=>3}
COLUMN CELL
cf1:name timestamp=1569284691440, value=zs
cf1:name timestamp=1569283965094, value=zhangsan
* */
TableName tableName = TableName.valueOf("baizhi:t_user");
Table table = conn.getTable(tableName);
Get get = new Get("4".getBytes());
get.setMaxVersions(2);
get.addColumn("cf1".getBytes(), "name".getBytes());
Result result = table.get(get);
List<Cell> cellList = result.getColumnCells("cf1".getBytes(), "name".getBytes());
for (Cell cell : cellList) {
/*
* rowkey 列名 列值 时间戳
* */
byte[] rowkey = CellUtil.cloneRow(cell);
byte[] cf = CellUtil.cloneFamily(cell);
byte[] qualifier = CellUtil.cloneQualifier(cell);
byte[] value = CellUtil.cloneValue(cell);
long timestamp = cell.getTimestamp();
System.out.println(new String(rowkey) + "--" + new String(cf) + "--" + new String(qualifier) + "--" + new String(value) +"--" +timestamp);
}
3.5.4 scan
@Test
public void testScan() throws Exception {
TableName tableName = TableName.valueOf("baizhi:t_user");
Table table = conn.getTable(tableName);
Scan scan = new Scan();
PrefixFilter prefixFilter1 = new PrefixFilter("4".getBytes());
PrefixFilter prefixFilter2 = new PrefixFilter("5".getBytes());
FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE,prefixFilter1, prefixFilter2);
scan.setFilter(list);
ResultScanner results = table.getScanner(scan);
for (Result result : results) {
byte[] row = result.getRow();
byte[] name = result.getValue("cf1".getBytes(), "name".getBytes());
byte[] age = result.getValue("cf1".getBytes(), "age".getBytes());
byte[] sex = result.getValue("cf1".getBytes(), "sex".getBytes());
System.out.println(new String(row) + "--" + new String(name) + "-" + new String(age) + "-" + new String(sex));
}
}
四、MapReduce on Hbase
4.1 依赖
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
4.2 模拟问题
HBase中有相关的数据
书写MR 程序
运行
baidu sina ali
rowkey salary name age
package com.baizhi.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import javax.swing.*;
public class JobRunner {
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME", "root");
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "HadoopNode00");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
configuration.addResource("conf2/core-site.xml");
configuration.addResource("conf2/hdfs-site.xml");
configuration.addResource("conf2/mapred-site.xml");
configuration.addResource("conf2/yarn-site.xml");
configuration.set(MRJobConfig.JAR, "G:\\IDEA_WorkSpace\\BigData\\HBase_Test\\target\\HBase_Test-1.0-SNAPSHOT.jar");
configuration.set("mapreduce.app-submission.cross-platform", "true");
Job job = Job.getInstance(configuration);
job.setJarByClass(JobRunner.class);
/*
*
* */
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);
/*
*
* 设置mapper 相关
* */
TableMapReduceUtil.initTableMapperJob(
"baizhi:t_user1",
new Scan(),
UserMapper.class,
Text.class,
DoubleWritable.class,
job
);
TableMapReduceUtil.initTableReducerJob(
"baizhi:t_result",
UserReducer.class,
job);
job.waitForCompletion(true);
}
}
package com.baizhi.mr;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class UserMapper extends TableMapper<Text, DoubleWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
byte[] bytes = key.get();
String rowkey = Bytes.toString(bytes);
String company = rowkey.split(":")[0];
byte[] salaryByte = value.getValue("cf1".getBytes(), "salary".getBytes());
double salary = Bytes.toDouble(salaryByte);
context.write(new Text(company), new DoubleWritable(salary));
}
}
package com.baizhi.mr;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class UserReducer extends TableReducer<Text, DoubleWritable, NullWritable> {
@Override
protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
double totalSalary = 0.0;
int count = 0;
for (DoubleWritable value : values) {
totalSalary += value.get();
count++;
}
Put put = new Put(key.getBytes());
put.addColumn("cf1".getBytes(), "avgSalary".getBytes(), (totalSalary / count + "").getBytes());
context.write(NullWritable.get(), put);
}
}