还是用05里面的案例,只不过是自定义的mapreduce程序
![](https://i-blog.csdnimg.cn/blog_migrate/b1b47faaff81f7c24518e3726e4b1721.png)
编写步骤
其实该案例的思想和06没有太大不同,思路总体还是一样的,只不过这次Mapper不是从HBase的表里读取数据了,而是从HDFS上的文件中读取数据,所以Mapper可直接继承自HDFS的Mapper。
Mapper
package com.buba.mapper;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class ReadFruitFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//从HDFS中读取的数据
String lineValue = value.toString();
//读取出来的每行数据使用\t进行分割,存于String数组
String[] values = lineValue.split("\t");
//根据数据中值的含义取值
String rowKey = values[0];
String name = values[1];
String color = values[2];
//初始化rowKey
ImmutableBytesWritable rowKeyWritable = new ImmutableBytesWritable(Bytes.toBytes(rowKey));
//初始化put对象
Put put = new Put(Bytes.toBytes(rowKey));
//参数分别:列族、列、值
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
put.add(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(color));
context.write(rowKeyWritable, put);
}
}
Reducer
package com.buba.reducer;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
public class WriteFruitMRFromTxtReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
//读出来的每一行数据写入到fruit_hdfs表中
for(Put put: values){
context.write(NullWritable.get(), put);
}
}
}
Driver
package com.buba.driver;
import com.buba.mapper.ReadFruitFromHDFSMapper;
import com.buba.mapper.ReadFruitMapper;
import com.buba.reducer.ReadFruitReducer;
import com.buba.reducer.WriteFruitMRFromTxtReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class HDFS2HBaseDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception{
Configuration configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum","192.168.1.20,192.168.1.21,192.168.1.22"); //zookeeper 服务地址
configuration.set("hbase.zookeeper.property.clientPort","2181"); //端口号
HDFS2HBaseDriver readFruitJob = new HDFS2HBaseDriver();
int run = ToolRunner.run(configuration, readFruitJob, args);
System.exit(run);
}
@Override
public int run(String[] args) throws Exception {
//得到Configuration
Configuration conf = this.getConf();
//创建Job任务
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(HDFS2HBaseDriver.class);
// Path inPath = new Path("/input/fruit.txt");
Path inPath = new Path("hdfs://hadoop-senior01.buba.com:8020/input/fruit/fruit.tsv");
FileInputFormat.addInputPath(job, inPath);
//设置Mapper
job.setMapperClass(ReadFruitFromHDFSMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//设置Reducer fruit_hdfs写入到哪张表去
TableMapReduceUtil.initTableReducerJob("fruit_hdfs", WriteFruitMRFromTxtReducer.class, job);
//设置Reduce数量,最少1个
job.setNumReduceTasks(1);
boolean isSuccess = job.waitForCompletion(true);
if(!isSuccess){
throw new IOException("Job running with error");
}
return isSuccess ? 0 : 1;
}
}
另一种提交作业方式,选一个就行
public static void main(String[] args) throws Exception {
//设置作业的配置信息
Configuration configuration = new Configuration();
configuration.set("hbase.rootdir","hdfs://master:8020/hbase");
configuration.set("hbase.zookeeper.quorum","master:2181");
//把第一参数作为要输出的表
configuration.set(TableOutputFormat.OUTPUT_TABLE, args[0]);
//定义一个Job对象,
Job job= Job.getInstance(configuration,"xiaoshuo");
//设置作业处理的类
job.setJarByClass(HdfsToHbaseApp.class);
//非常重要
//Hbase的辅助类TableMapReduceUtil帮助用户作业中通过代码动态添加依赖的jar包和配置文件
TableMapReduceUtil.addDependencyJars(job);
//设置map,reduce处理类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//设置map<key,value> 处理类
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//设置作业的输入路径是第二个参数
FileInputFormat.addInputPaths(job, args[1]);
//设置作业的输出类
job.setOutputFormatClass(TableOutputFormat.class);
//提交作业
job.waitForCompletion(true);
}
![](https://i-blog.csdnimg.cn/blog_migrate/ed95333d376f8e6d27ecc080b2013408.png)
![](https://i-blog.csdnimg.cn/blog_migrate/a3405012596ae20e9a30d5d9195c84e6.png)