1、自定义MapReduce
目标:将集群上的 fruit.tsv 文件 通过编写 MapReduce程序,最终保存到HBase上的fruit表中
注意:需要提前在HBase上把表创建好
流程:
1、编写Mapper类,将读取到的内容传到Reducer中
package com.zch.hbase.mr1;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 16:30
* Description: 读取hdfs数据传到HBase的fruit表中
*/
public class FruitMapper extends Mapper<LongWritable, Text,LongWritable,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
2、编写Reducer类,将传入的数据进行处理,赋值给Put对象,将其写出
package com.zch.hbase.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 16:30
* Description:
*/
public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
// 1、便利values
for (Text value : values) {
// 2、获取每一行对象
String[] split = value.toString().split(" ");
String rowKey = split[0];
String name = split[1];
String color = split[2];
// 3、构建Put对象
Put put = new Put(Bytes.toBytes(rowKey));
// 4、给Put对象赋值
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(name));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(color));
// 5、写出
context.write(NullWritable.get(),put);
}
}
}
3、编写Driver类,创建Job对象,设置驱动类,TableMapReduceUtil设置输出到的表名,FileInputFormat设置输入路径
package com.zch.hbase.mr1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Author: zhaoHui
* Date: 2022/01/14
* Time: 16:30
* Description: 打包mr程序到集群运行
* [root@zhaohui01 hbase-1.3.1]# yarn jar hbase-1.0-SNAPSHOT.jar com.zch.hbase.mr1.FruitDriver /Fruit/fruit.tsv fruit
*/
public class FruitDriver implements Tool {
private Configuration configuration = null;
public int run(String[] args) throws Exception {
// 1、获取Job对象
Job job = Job.getInstance(configuration);
// 2、设置驱动类
job.setJarByClass(FruitDriver.class);
// 3、设置Mapper&Mapper的输出类型
job.setMapperClass(FruitMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
// 4、设置Reducer类
TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
// 5、设置输入输出参数
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 6、提交任务
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public void setConf(Configuration conf) {
configuration = conf;
}
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new FruitDriver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}