import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* 如果要对heroes中的name和email列构建索引,则运行参数设为heroes info name email
*/
public class HBaseIndexBuilder extends Configured implements Tool{
static Configuration conf;
// 索引表唯一的一列为INDEX:ROW,其中INDEX为列族
public static final byte[] INDEX_COLUMN = Bytes.toBytes("INDEX");
public static final byte[] INDEX_QUALIFIER = Bytes.toBytes("ROW");
public static class MyMapper extends Mapper<ImmutableBytesWritable,Result,ImmutableBytesWritable,Writable>{
private byte[] family;
//存储了“列名”到“表名-列名”的映射
//前者用于获取某列的值,并作为索引表的键值;后者用于作为索引表的表名
private HashMap<byte[],ImmutableBytesWritable> indexes;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
//通过configuration.set()方法传递参数,详见下面的configuraJob方法
String tableName = configuration.get("index.tablename");
String [] fields = configuration.getStrings("index.fields");
//fields内需要做索引的列名
String familyName = configuration.get("index.familyname");
family = Bytes.toBytes(familyName);
//初始化indexes方法
indexes = new HashMap<byte[], ImmutableBytesWritable>();
for(String field : fields){
//如果给name做索引,则索引表的名称为"heroes-name"
//name:heroes-name
//email:heroes-email
indexes.put(Bytes.toBytes(field),new ImmutableBytesWritable(Bytes.toBytes(tableName+"-"+field)));
}
}
@Override
protected void map(ImmutableBytesWritable rowkey, Result result, Context context) throws IOException, InterruptedException {
for(Map.Entry<byte[],ImmutableBytesWritable> index : indexes.entrySet()){
//name
byte[] qualifier = index.getKey();//获得列名
//heroes-name
ImmutableBytesWritable tableName = index.getValue();//索引表的表名
//info:name
byte[] value = result.getValue(family,qualifier);//根据“列族:列名”获得元素值
if(value !=null){
//以列值为行健,在列"INDEX:ROW"中插入行健
Put put = new Put(value);//row key
//family,column,value
put.add(INDEX_COLUMN,INDEX_QUALIFIER,rowkey.get());
//在tableName表上执行put操作
//使用MultiOutputFormat时,第二个参数必须是Put或者Delete类型
context.write(tableName,put);
}
}
}
}
@Override
public int run(String[] args) throws Exception {
Job job = configureJob(conf,args);
return job.waitForCompletion(true) ? 0 : 1 ;
}
private Job configureJob(Configuration conf, String[] otherArgs) throws IOException {
String tableName = otherArgs[0];//heroes
String columnFamily = otherArgs[1];//info
// conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
conf.set(TableInputFormat.INPUT_TABLE,tableName);// "hbase.mapreduce.inputtable"
conf.set("index.tablename",tableName);
conf.set("index.familyname",columnFamily);
//heroes info name email
String[] fields = new String[otherArgs.length-2];
for(int i=0;i<fields.length;i++){
fields[i] = otherArgs[i+2];//为name 和email
}
conf.setStrings("index.fields",fields);
conf.set("index.familyname" ,"attributes");
//Job job = new Job(conf,tableName);
Job job = Job.getInstance(conf, this.getClass().getName());
job.setJarByClass(HBaseIndexBuilder.class);
job.setMapperClass(MyMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(MultiTableOutputFormat.class);
return job;
}
public static void main(String[] args) throws Exception {
conf = HBaseConfiguration.create();
String [] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length<3){
System.err.println("参数过少");
System.exit(-1);
}
int exitCode = ToolRunner.run(new HBaseIndexBuilder(), otherArgs);
System.exit(exitCode);
}
}
为HBASE构建辅助索引
最新推荐文章于 2023-07-14 15:22:44 发布