配置永久生效:
- 先确定配置hbase和hadoop 的环境变量
export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-3.1.3
- 配置mr使用hbase依赖
hadoop 2 .x
在 hadoop-env.sh 中配置:(注意:在 for 循环之后配)
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
hadoop3.x
在hadoop-env.sh 随便一个位置配置都可以
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
- 案例1:统计表中有多少行的数据(检测读取能力)
yarn jar lib/hbase-server-1.3.1.jar rowcounter student
- 案例2: 将文本数据写入到 hbase表中( 检测写入能力)
数据:
- 在本地创建一个 tsv 格式的文件:fruit.tsv
1001 Apple Red
1002 Pear Yellow
1003 Pineapple Yellow
- 上传到hdfs上
hdfs fs -put ‘‘数据路径’’ “传入的路径”
执行下面命令
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit hdfs://hadoop202:8020/fruit.tsv
主要使用的几个类(java代码):
Driver:
package com.dxy.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
public class Fruit2Driver {
/*
读取表中的数据, 进行操作到(过滤列)另一张表
*/
public static void main(String[] args) {
try {
Configuration configuration = HBaseConfiguration.create();
Job job = Job.getInstance(configuration);
//设置驱动类
job.setJarByClass(Fruit2Driver.class);
// 设置Mapper 的输出类型和 Mapper类
TableMapReduceUtil.initTableMapperJob("fruit",new Scan(),Fruit2Mapper.class,
ImmutableBytesWritable.class,
Put.class,
job);
// 设置 reducer 类和输出类型
TableMapReduceUtil.initTableReducerJob("fruit2",Fruit2Reduce.class,job);
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}catch (Exception e){
e.printStackTrace();
}
}
}
mapper:
package com.dxy.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
/**
* 根据key 到中取得数据, 然后获得cell 进行遍历
*/
Put put = new Put(key.get());
for (Cell cell : value.rawCells()) {
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
context.write(key,put);
}
}
}
}
reduce:
package com.dxy.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableReduce;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}