使用MultiTableOutputFormat这个类:s
package test;
import java.io.IOException;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class MultiTableImport {
public static class MultiTableImportMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
private LongWritable count=new LongWritable(1);
private Text text=new Text();
private String daytime="";
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] lineSplits=line.split(",");
if(lineSplits.length!=13){
return;
}
if(lineSplits.length<0 || lineSplits.length!=13){
return;
}
//过滤没有车牌号的数据
String plateNumber=lineSplits[2];
if(null==plateNumber || "".equals(plateNumber)){
return;
}
//过滤车牌号中不为京的车牌
// if(plateNumber.contains("冀") || plateNumber.contains("玉") || plateNumber.contains("使") || plateNumber.contains("密")){
// return;
// }
if(!plateNumber.startsWith("京")){
return;
}
//过滤时间不正确的数据
String gpsTime=lineSplits[3];
if(null ==gpsTime || "".equals(gpsTime)){
return;
}else{
gpsTime=gpsTime.replace("-", "").replace(":", "").replace(" ", "");
if(!gpsTime.startsWith(daytime)){
return;
}
}
String uuid=UUID.randomUUID().toString().replace("-", "");
String txt=uuid+","+line;
text.set(Bytes.toBytes(txt));
context.write(text, count);
}
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
daytime=context.getConfiguration().get("daytime");
System.out.println("daytime="+daytime);
}
}
public static class MultiTableImportReducer extends Reducer<Text, LongWritable, ImmutableBytesWritable,Put> {
private String[] columns=new String[]{"DATA_TYPE","TERMINAL_NUMBER","PLATE_NUMBER","GPS_TIME","LON","LAT","SPEED","VEHICLE_STATUS",
"LOCATION_STATUS","AZIMUTHS","VEHICLE_TYPE","ALARM_STATUS","FIRE_STATUS"};
ImmutableBytesWritable write1=new ImmutableBytesWritable(Bytes.toBytes("BJ_GPS_DATA"));
//ImmutableBytesWritable write2=new ImmutableBytesWritable(Bytes.toBytes("GPS_INDEXPLATTIME_DATA"));
ImmutableBytesWritable write2=new ImmutableBytesWritable(Bytes.toBytes("GPS_INDEXPLATNUMBER_DATA"));
@Override
protected void reduce(Text key,
Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
String lineContent=key.toString();
String[] splits=lineContent.split(",");
if(splits.length!=14){
return;
}
String uuid=splits[0];
String time=splits[4].replace("-", "").replace(":", "").replace(" ", "");
//写入第一个GPS表
Put gpsPut=new Put(Bytes.toBytes(uuid));
for (int i = 0; i < columns.length; i++) {
if(i==3){
gpsPut.add(Bytes.toBytes("F1"), Bytes.toBytes(columns[i]), Bytes.toBytes(time));
}else{
gpsPut.add(Bytes.toBytes("F1"), Bytes.toBytes(columns[i]), Bytes.toBytes(splits[i+1]));
}
}
context.write(write1, gpsPut);
//写入车牌时间索引表
String platNum=splits[3];
// byte[] prefix=TimeRegionPrefix.getRegionPrefix(Constants.INDEXPLATTIME, time);
// if(null==prefix || prefix.length==0){
// prefix = new byte[] { 126, 126 };
// }
// //构建车牌时间索引表的行键
//
// byte[] column = ArrayUtils.addAll(prefix, Bytes.toBytes(platNum));
// column = ArrayUtils.add(column, Constants.SPLITBYTE);
// column = ArrayUtils.addAll(column, Bytes.toBytes(time));
// column = ArrayUtils.add(column, Constants.SPLITBYTE);
// column = ArrayUtils.addAll(column, Bytes.toBytes(uuid));
String column=platNum+"|"+time;
Put platTimePut=new Put(Bytes.toBytes(column));
platTimePut.add(Bytes.toBytes(Constants.FAMILY),Bytes.toBytes("C1"),Bytes.toBytes(""));
context.write(write2, platTimePut);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
conf.set("mapred.jar", "E:\\MultiTableImport.jar");
conf.set("daytime", args[0]);
Job job=new Job(conf, "MultiTableImport");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 1) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
job.setJarByClass(MultiTableImport.class);
job.setMapperClass(MultiTableImportMapper.class);
job.setReducerClass(MultiTableImportReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setNumReduceTasks(61);
///test/datafile/GpsData/20140514/JKSX_20140514.CSV
FileInputFormat.setInputPaths(job, new Path("/test/datafile/GpsData/201404/"+args[0]));
// TableMapReduceUtil.initTableReducerJob("BJ_GPS", ImportGpsReducer.class, job);
job.setOutputFormatClass(MultiTableOutputFormat.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
boolean result=job.waitForCompletion(true);
System.out.println(result);
}
}