读取HDFS数据写入HBase中java模板代码
读取hdfs路径/hbase/input/user.txt内容如下:
0007 zhangsan 18
0008 lisi 25
0009 wangwu 20
1 使用mapper读取HDFS上的数据 读取一行不做任何处理发送给reducer
public class HDFS2HBaseMapper extends Mapper<LongWritable,Text ,Text, NullWritable> {
/**
* @param key 每行起始偏移量 无实际意义
* @param value 这一行文本内容
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
2 TableReducer
public class HDFS2HBaseReducer extends TableReducer<Text, NullWritable, Put> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
//拿取一行数据,根据分隔符进行切割
String[] split = key.toString().split("\t");
//创建Put对象 指定rowkey split数组的第一个字段就是rowkey
Put put = new Put(Bytes.toBytes(split[0]));
//添加数据到put对象
put.addColumn("f1".getBytes(),"name".getBytes(),split[1].getBytes());
put.addColumn("f1".getBytes(),"age".getBytes(),Bytes.toBytes(Integer.parseInt(split[2])));
context.write(null,put);
}
}
3 HDFS2HBaseClient
public class HDFS2HBaseClient {
public static void main(String[] args) throws Exception{
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
//指定主类
job.setJarByClass(HDFS2HBaseClient.class);
//指定输入数据路径 mapper输出的key value类型
FileInputFormat.addInputPath(job,new Path("hdfs://node-1:8020/hbase/input"));
job.setMapperClass(HDFS2HBaseMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//todo 使用TableMapReduceUtil 工具类来初始化我们的reducer
TableMapReduceUtil.initTableReducerJob("myuser2", HDFS2HBaseReducer.class,job);
boolean result = job.waitForCompletion(true);
System.exit(result?0 :1);
}
}