-
自定义MR生成Hfile
生成Hfile基本流程:
1. 设置Mapper的输出KV类型:
K: ImmutableBytesWritable(代表行键)
V: KeyValue (代表cell)
2. 开发Mapper
读取你的原始数据,按你的需求做处理
输出rowkey作为K,输出一些KeyValue(Put)作为V
3. 配置job参数
Zookeeper的连接地址
配置输出的OutputFormat为HFileOutputFormat2,并为其设置参数
4. 提交job
导入HFile到RegionServer的流程
构建一个表描述对象
构建一个region定位工具
然后用BulkLoadHFilesTool来doBulkload操作
-
数据
-
代码演示
import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
public class LoadData {
static class LoadDataMapper extends Mapper<LongWritable, Text,Text,MovieBean>{
Gson gs = new Gson();
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) {
try {
String line = value.toString();
MovieBean mb = gs.fromJson(line, MovieBean.class);
String s = StringUtils.leftPad(mb.getMovie(),5,'0');
String rk = s+"_"+mb.getTimeStamp();
// String v = rk+","+mb.getMovie()+","+mb.getRate()+","+mb.getTimeStamp()+","+mb.getUid();
k.set(rk);
context.write(k,mb);
} catch (Exception e) {
e.printStackTrace();
}
}
}
static class LoadDataReducer extends TableReducer<Text,MovieBean, ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<MovieBean> values, Context context) throws IOException, InterruptedException {
String rk = key.toString();
Put put = new Put(rk.getBytes());
MovieBean mb = values.iterator().next();
put.addColumn("cf".getBytes(),"movie".getBytes(),mb.getMovie().getBytes());
put.addColumn("cf".getBytes(),"rate".getBytes(), Bytes.toBytes(mb.getRate()));
put.addColumn("cf".getBytes(),"timeStamp".getBytes(),mb.getTimeStamp().getBytes());
put.addColumn("cf".getBytes(),"Uid".getBytes(),mb.getUid().getBytes());
context.write(null,put);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","linux01:2181,linux02:2181,linux03:2181");
Job job = Job.getInstance(conf, "load");
job.setMapperClass(LoadDataMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MovieBean.class);
FileInputFormat.setInputPaths(job,new Path("D:\\mrdata\\mrdata\\movie\\input"));
TableMapReduceUtil.initTableReducerJob("movie_22",LoadDataReducer.class,job);
job.waitForCompletion(true);
}
}