环境准备
在上一篇的基础上,还需要导入Hbase的相关jar包,找到Hbase下载目录,lib中的jar包导入环境中
代码编写
package Hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import java.io.IOException;
public class HBaseImport{
// reduce输出的表名
private static String tableName = "test";
// 初始化连接
static Configuration conf = null;
static {
conf = HBaseConfiguration.create();
conf.set("hbase.rootdir", "hdfs://192.168.60.1:9000/hbase");
conf.set("hbase.master", "hdfs://192.168.60.1:60000");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set("hbase.zookeeper.quorum", "master,slave1,slave2");
conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);
}
public static class BatchMapper extends
Mapper<LongWritable, Text, LongWritable, Text> {
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, LongWritable, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
Text v2s = new Text();
v2s.set(line);
context.write(key, v2s);
}
}
public static class BatchReducer extends
TableReducer<LongWritable, Text, NullWritable> {
private String family = "info";
@Override
protected void reduce(
LongWritable arg0,
Iterable<Text> v2s,
Reducer<LongWritable, Text, NullWritable, Mutation>.Context context)
throws IOException, InterruptedException {
for (Text v2 : v2s) {
String[] splited = v2.toString().split("\t");
String rowKey = splited[0];
Put put = new Put(rowKey.getBytes());
put.add(family.getBytes(), "raw".getBytes(), v2.toString()
.getBytes());
context.write(NullWritable.get(), put);
}
}
}
public static void imputil(String str) throws IOException, ClassNotFoundException,
InterruptedException {
Job job = Job.getInstance(conf, HBaseImport.class.getSimpleName());
TableMapReduceUtil.addDependencyJars(job);
job.setJarByClass(HBaseImport.class);
FileInputFormat.setInputPaths(job,str);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(BatchMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(BatchReducer.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.waitForCompletion(true);
}
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {
String[] str={
"hdfs://192.168.60.1:9000/sogou/data/CountAll/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountNotNull/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountNotMoreUid/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountNotRepeat/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountQueriesGreater2/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountTop50/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountURL/part-r-00000",
"hdfs://192.168.60.1:9000/sogou/data/CountUidGreater3/part-r-00000"
};
for (String stri:str){
imputil(stri);
}
}
}
通过HBase shell命令查询6导出的结果
在终端输入命令 hbase shell 进入hbase的命令行
输入命令:scan ‘test’ ,查询导出的结果