目标:在本地环境运行程序,读取 ‘fruit’表中的 name 列,将其内容保存到 fruit2 表中
注意:需要将HBase的 hbase-site.xml 文件拷贝到本地 classpath路径下
Mapper类
package com.zch.hbase.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 18:15
* Description: 读取HBase上的fruit2表数据
* 需求:
* 将HBase上的fruit表,获取name列的数据,保存到fruit2表中
*/
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
// 1、获取put对象
Put put = new Put(key.get());
// 2、获取数据
for (Cell cell : value.rawCells()) {
// 3、判断当前是否为”name"列
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
// 4、给put对象赋值
put.add(cell);
}
}
// 5、写出
context.write(key,put);
}
}
Reduce类
package com.zch.hbase.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 18:16
* Description:
*/
public class Fruit2Reducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
// 遍历写出
for (Put put : values) {
context.write(NullWritable.get(),put);
}
}
}
Driver类
package com.zch.hbase.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 18:16
* Description: 尝试本地运行程序,完成HBase获取表数据操作
* 需要将hbase的hbase-site.xml文件复制到classpath下
*/
public class Fruit2Driver implements Tool {
// 定义配置信息
Configuration configuration = null;
public int run(String[] args) throws Exception {
// 1、获取job对象
Job job = Job.getInstance(configuration);
// 2、设置主类路径
job.setJarByClass(Fruit2Driver.class);
// 3、设置Mapper&输出KV类型 arg[0] 输入的表名
TableMapReduceUtil.initTableMapperJob(
args[0],
new Scan(),
Fruit2Mapper.class,
ImmutableBytesWritable.class,
Put.class,
job
);
// 4、设置Reducer&输出的表 args[1]输出的表名
TableMapReduceUtil.initTableReducerJob(
args[1],
Fruit2Reducer.class,
job
);
// 5、提交任务
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public void setConf(Configuration conf) {
configuration = conf;
}
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
Fruit2Driver tool = new Fruit2Driver();
int run = ToolRunner.run(configuration, tool, args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
报错:org.apache.hadoop.yarn.api.records.LocalResource.setShouldBeUploadedToSharedCache(Z)V
报错原因:在导入maven依赖时导入的hadoop的版本和集群安装的hadoop版本不一致
解决办法:排除掉版本不一致的maven依赖,重新导入对应版本的hadoop依赖