package sitech;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
public class HdfsToHbase {
private static class HdfsToHbaseMapper extends Mapper<LongWritable, Text, Text, Put> {
private String family = "info";
private String qualifier = "value";
private String hdfs_split = ",";
private String hbase_split = "|";
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//拿到一行文件按分割符分割
String[] fileds = value.toString().split(this.hdfs_split);
//读取配置文件
Configuration conf = context.getConfiguration();
String row_key = getKey(conf.get("hdfs_index"), fileds);
String rowvalue = getValue(fileds);
Put put = new Put(Bytes.toBytes(row_key));
put.add(family.getBytes(), qualifier.getBytes(), rowvalue.getBytes());
context.write(new Text(row_key), put);
}
//for 得到key值
public String getKey(String index, String[] fileds) {
String[] indexs = index.split(this.hdfs_split);
StringBuffer res_key = new StringBuffer();
//res_key.append(fileds[Integer.parseInt(indexs[0])]);
for (int i = 0; i < indexs.length; i++) {
res_key.append(fileds[java.lang.Integer.parseInt(indexs[i])] + this.hbase_split);
}
//return res_key.toString();
return res_key.deleteCharAt(res_key.length() - 1).toString();
}
//for 得到value值
public String getValue(String[] flies) {
StringBuffer res_value = new StringBuffer();
//res_value.append(flies[1]);
for (int i = 1; i < flies.length; i++) {
res_value.append(flies[i] + ",");
}
//res_value.delete(res_value.length()-1,res_value.length());
//res_value.deleteCharAt(res_value.length()-1);
return res_value.deleteCharAt(res_value.length() - 1).toString();
}
}
public static class HdfsToHbaseReducer extends TableReducer<Text, Put, NullWritable> {
@Override
protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
//读出来的每一行数据写入到 fruit_hdfs 表中
for (Put put : values) {
context.write(NullWritable.get(), put);
}
}
}
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
String hbase_table_name = args[0];
String hbase_inPath = args[1];
String hdfs_index = args[2];
conf.set("hbase_table_name", hbase_table_name);
conf.set("hbase_inPath", hbase_inPath);
conf.set("hdfs_index", hdfs_index);
Job job = Job.getInstance(conf, HdfsToHbase.class.getSimpleName());
TableMapReduceUtil.initTableReducerJob(
hbase_table_name, // output table
HdfsToHbaseReducer.class, // reducer class
job);
job.setNumReduceTasks(0);
job.setReducerClass(HdfsToHbaseReducer.class);
job.setJarByClass(HdfsToHbase.class);
job.setMapperClass(HdfsToHbaseMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Put.class);
FileInputFormat.addInputPath(job, new Path(args[1]));
job.setOutputFormatClass(TableOutputFormat.class);
return job;
}
public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {
Configuration conf = HBaseConfiguration.create();
//设置压缩格式
conf.set("mapreduce.output.fileoutputformat.compress", "false");
//设置内存
conf.set("mapreduce.map.memory.mb", "2048");
//
conf.set("hbase.zookeeper.property.clientPort", "9501");
conf.set("hbase.zookeeper.quorum", xxxxxxxxxxxxxxxxxxxxx);
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 3) {
System.out.println("Wrong number of arguments: " + otherArgs.length);
System.exit(-1);
}
Job job = createSubmittableJob(conf, otherArgs);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
MapReduce hdfs to hbase
最新推荐文章于 2022-07-29 16:54:50 发布