导入Hbase的MapReduce的依赖
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-mapreduce -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>${hbase.version}</version>
</dependency>
Map端
package day7hbase.Hdfs2Hbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @date 2019/7/24
* @author Fantome
*/
public class ReadMap extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
/**
* 从hdfs中读取数据,进行切分后存入到Put中,传入Reduce端
* 因为是从hdfs中读取的文件,使用的是 Mapper 而不是 TableMapper
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key,
Text value,
Context context) throws IOException, InterruptedException {
// 返回的key
ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(rowkey.getBytes());
// 数据格式:1 lisa 0 17
String[] split = value.toString().split("\t");
String rowkey=split[0];
String name=split[1];
String sex=split[2];
String age=split[3];
// 创建Put 返回的put
Put put=new Put(rowkey.getBytes());
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("sex"), Bytes.toBytes(sex));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(age));
context.write(immutableBytesWritable, put);
}
}
Reduce端
package day7hbase.Hdfs2Hbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* @date 2019/7/24
* @author Fantome
*/
public class ReadReduce extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
/**
* 输出数据 不做操作
* 数据输出到HBase里,使用的是 TableReducer
* @param key
* @param values
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(ImmutableBytesWritable key,
Iterable<Put> values,
Context context) throws IOException, InterruptedException {
for (Put p:values){
context.write(NullWritable.get(), p);
}
}
}
drive端
package day7hbase.Hdfs2Hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class ReadDrive extends Configured implements Tool {
/**
* 因为需要对drive传入HBase的conf,所以使用了run的方法来运行drive
* 在main中传入 hbase的conf
* @param strings
* @return
* @throws Exception
*/
@Override
public int run(String[] args) throws Exception {
//得到Configuration
Configuration conf = this.getConf();
//创建Job任务
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(ReadDrive.class);
// 输入数据路径
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job, inPath);
//设置Mapper
job.setMapperClass(ReadMap.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//设置Reducer
job.setReducerClass(ReadReduce.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Put.class);
// 指定需要导入的表名,
TableMapReduceUtil.initTableReducerJob(
"fruit_mr",
ReadReduce.class,
job);
//设置Reduce数量,最少1个
job.setNumReduceTasks(1);
boolean isSuccess = job.waitForCompletion(true);
if (!isSuccess) {
throw new IOException("Job running with error");
}
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
// 创建Hbase的conf
Configuration conf = HBaseConfiguration.create();
// 使用 ToolRunner 运行 new ReadDrive()自己的这个Drive
int result = ToolRunner.run(conf, new ReadDrive(), args);
System.exit(result);
}
}
运行命令:
yarn jar /opt/test/sparkStudy-1.0-SNAPSHOT.jar day7hbase.Hdfs2Hbase.ReadDrive /Hdfs2HbaseData.txt