今天搞了个通过hdfs mapper读取文件插入到hbase表的简单实例,在这里记录一下,如下:
java代码(File2HBase1.java):
package com.lyq.study.example;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class File2HBase1 {
public static class MapperClass extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
//列名
public static final String[] COLUMNS = { "card", "type",
"amount", "time", "many" };
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] cloumnVals = value.toString().split(",");
String rowkey = cloumnVals[0] + cloumnVals[3];
Put put = new Put(rowkey.getBytes());
for (int i = 0; i < cloumnVals.length; i++) {
put.add("info".getBytes(), COLUMNS[i].getBytes(),
cloumnVals[i].getBytes());
}
context.write(new ImmutableBytesWritable(rowkey.getBytes()), put);
}
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://ubuntu:9000/");
conf.set("mapreduce.framework.name", "local");
conf.set("mapred.job.tracker", "ubuntu:9001");
conf.set("hbase.zookeeper.quorum", "ubuntu");
Job job = new Job(conf,"file2hbase");
job.setJarByClass(File2HBase1.class);
job.setMapperClass(MapperClass.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "testtable1");
FileInputFormat.addInputPath(job, new Path("hdfs://ubuntu:9000/test/input/data.txt"));
System.out.println(job.waitForCompletion(true) ? 0 : 1);
}
}
hbase创建表语气是:create 'testtable1','info'
data.txt的内容如下:
6222020405006,typeA,100000,201408081225,2000
6222020405006,typeA,100000,201408112351,1000
6222020405006,typeA,100000,201408140739,4000
6222020405008,typeB,50000,201408150932,5000
6222020405009,typeC,30000,201408181212,10000