package hbase;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
import com.hadoop.mapreduce.LzoTextInputFormat;
public class FeatureMiningWithHFile {
static Charset charset = Charset.forName("utf8");
static final String BLACK_NUM = "BLACK_NUM";
static final String MISSING_RATE = "MISSING_RATE";
static final String ERROR_RATE = "ERROR_RATE";
/****
*
* 这个过程,其实就两点,一是HFile,这三个任务的outputFormatClass都是HFileOutputFormat,
* 即输出文件的格式是HFile, 二是bulk Load,即把HFile格式的文件,导入到hbase的表里 通杀,判断为黑,条件为:B>N and
* W=0 and T=0 map的输入为combine总和压缩文件 每条数据 key\tw\tb\tt
* map的输出直接到BlackFeature_strHour数据库
*/
public static class BlackFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
BLACK_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int black_num;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if (wcount == 0 && tcount == 0 && bcount > black_num) {
context.getCounter(Counters.BLACK_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
black_num = Integer.parseInt(conf.get(BLACK_NUM).trim());
};
}
/****
* 误杀判定,条件为:(w+t)!=0 and b/(w+t) > missing_rate map的输入为combine总和压缩文件 每条数据
* key\tw\tb\tt map的输出直接到MissingFeature_strHour数据库
*/
public static class MissingFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
MISSING_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int missing_rate;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if ((wcount + tcount) != 0
&& bcount / (wcount + tcount) > missing_rate) {
context.getCounter(Counters.MISSING_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
missing_rate = Integer.parseInt(conf.get(MISSING_RATE).trim());
};
}
/****
* 漏报检索,条件为:b!=0 and (w+t)/b > error_rate map的输入为combine总和压缩文件 每条数据
* key\tw\tb\tt map的输出直接到ErrorFeature_strHour数据库
*/
public static class ErrorFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
Error_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int error_rate;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if (bcount != 0 && (wcount + tcount) / bcount > error_rate) {
context.getCounter(Counters.Error_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
error_rate = Integer.parseInt(conf.get(ERROR_RATE).trim());
};
}
private static Logger logger = Logger
.getLogger(FeatureMiningWithHFile.class);
private static DateFormat df = new SimpleDateFormat("yyyyMMddHH");
/**
* @param args
*/
public static void main(String[] args) {
if (args.length < 1) {
logger.error("please input args <ifexit> <yyyyMMddHH>");
return;
}
Configuration conf = HBaseConfiguration.create();
Properties p = loadProperties();
conf.set(BLACK_NUM, p.getProperty(BLACK_NUM, "20"));
conf.set(MISSING_RATE, p.getProperty(MISSING_RATE, "20"));
conf.set(ERROR_RATE, p.getProperty(ERROR_RATE, "20"));
conf.set("hbase.regionserver.lease.period", "3000000");
conf.set("hbase.rpc.timeout", "3000000");
p.clear();
String strHour = df.format(new Date());
if (args.length >= 2) {
strHour = args[1];
}
String strDate = strHour.substring(0, 8);
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
Path input = getPath(fs, strHour);
Path parent = input.getParent();
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
createTable(conf, strDate);
// 开始检索纯黑名单
Job blackFeatureJob = new Job(conf, "black feature " + strDate);
blackFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
blackFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
FileInputFormat.addInputPath(blackFeatureJob, input);
blackFeatureJob.setMapOutputKeyClass(ImmutableBytesWritable.class);
blackFeatureJob.setMapOutputValueClass(Put.class);
blackFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
blackFeatureJob.setOutputValueClass(KeyValue.class);
blackFeatureJob.setMapperClass(BlackFeatureMapper.class);
blackFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable blackFeatureTable = new HTable(conf, "BlackFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(blackFeatureJob,
blackFeatureTable);
Path blackFeatureOutput = new Path(parent, "blackfeature");
if (fs.exists(blackFeatureOutput)) {
fs.delete(blackFeatureOutput, true);
}
FileOutputFormat.setOutputPath(blackFeatureJob, blackFeatureOutput);
blackFeatureJob.waitForCompletion(true);
loader.doBulkLoad(blackFeatureOutput, blackFeatureTable);
// 开始误报判断
Job missingFeatureJob = new Job(conf, "missing feature " + strDate);
missingFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
missingFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
missingFeatureJob
.setMapOutputKeyClass(ImmutableBytesWritable.class);
missingFeatureJob.setMapOutputValueClass(Put.class);
missingFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
missingFeatureJob.setOutputValueClass(KeyValue.class);
missingFeatureJob.setMapperClass(MissingFeatureMapper.class);
missingFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable missingFeatureTable = new HTable(conf, "MissingFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(missingFeatureJob,
missingFeatureTable);
FileInputFormat.addInputPath(missingFeatureJob, input);
Path missingFeatureOutput = new Path(parent, "missingfeature");
if (fs.exists(missingFeatureOutput)) {
fs.delete(missingFeatureOutput, true);
}
FileOutputFormat.setOutputPath(missingFeatureJob,
missingFeatureOutput);
missingFeatureJob.waitForCompletion(true);
loader.doBulkLoad(missingFeatureOutput, missingFeatureTable);
// 下面是漏报检索
Job errorFeatureJob = new Job(conf, "error feature " + strDate);
errorFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
errorFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
errorFeatureJob.setMapOutputKeyClass(ImmutableBytesWritable.class);
errorFeatureJob.setMapOutputValueClass(Put.class);
errorFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
errorFeatureJob.setOutputValueClass(KeyValue.class);
errorFeatureJob.setMapperClass(ErrorFeatureMapper.class);
errorFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable errorFeatureTable = new HTable(conf, "ErrorFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(errorFeatureJob,
errorFeatureTable);
FileInputFormat.addInputPath(errorFeatureJob, input);
Path errorFeatureOutput = new Path(parent, "errorfeature");
if (fs.exists(errorFeatureOutput)) {
fs.delete(errorFeatureOutput, true);
}
FileOutputFormat.setOutputPath(errorFeatureJob, errorFeatureOutput);
errorFeatureJob.waitForCompletion(true);
loader.doBulkLoad(errorFeatureOutput, errorFeatureTable);
// 挖掘完成,升级标识库数据
String flagTablename = "FeatureMiningFlag";
HBaseAdmin admin = null;
admin = new HBaseAdmin(conf);
try {
if (!admin.tableExists(flagTablename)) {
HTableDescriptor htd = new HTableDescriptor(flagTablename);
HColumnDescriptor hcd = new HColumnDescriptor("f");
hcd.setMaxVersions(1);
htd.addFamily(hcd);
admin.createTable(htd);
}
// 插数据
addData(conf, flagTablename, strDate);
} catch (IOException e) {
logger.error("drop table occur IOException", e);
}
if (args[0].equals("true")) {
System.exit(0);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
/**
* 插入数据
*/
public static void addData(Configuration conf, String tableName,
String strDate) throws Exception {
HTable tb = new HTable(conf, tableName);
DateFormat df = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
Put putRow = new Put(strDate.getBytes());
putRow.add("f".getBytes(), ("time").getBytes(),
(df.format(new Date())).getBytes());
tb.put(putRow);
}
private static void createTable(Configuration conf, String strDate)
throws IOException {
createTable(conf, "BlackFeature_" + strDate, "F");
createTable(conf, "MissingFeature_" + strDate, "F");
createTable(conf, "ErrorFeature_" + strDate, "F");
}
private static void createTable(Configuration conf, String tablename,
String family) throws IOException {
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tablename)) {
admin.disableTable(tablename);
admin.deleteTable(tablename);
}
HTableDescriptor htd = new HTableDescriptor(tablename);
HColumnDescriptor hcd = new HColumnDescriptor(family);
hcd.setMaxVersions(1);
htd.addFamily(hcd);
admin.createTable(htd);
}
private static Path getPath(FileSystem fs, String strHour)
throws ParseException, IOException {
Calendar cal = Calendar.getInstance();
cal.setTime(df.parse(strHour));
Path path = new Path("hbase/MRSample/" + strHour + "/combine");
if (fs.exists(path)) {
logger.info("find path,[" + path.toString() + "]");
return path;
} else {
cal.add(Calendar.HOUR_OF_DAY, -1);
String tmpHour = df.format(cal.getTime());
return getPath(fs, tmpHour);
}
}
public static Properties loadProperties() {
Properties p = new Properties();
FileInputStream fis = null;
try {
File file = new File("tool/mining.properties");
if (file.exists()) {
fis = new FileInputStream(file);
p.load(fis);
} else {
logger.error("properties file 'tool/mining.properties' not exists,can't load properties");
}
} catch (IOException e1) {
e1.printStackTrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return p;
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
import com.hadoop.mapreduce.LzoTextInputFormat;
public class FeatureMiningWithHFile {
static Charset charset = Charset.forName("utf8");
static final String BLACK_NUM = "BLACK_NUM";
static final String MISSING_RATE = "MISSING_RATE";
static final String ERROR_RATE = "ERROR_RATE";
/****
*
* 这个过程,其实就两点,一是HFile,这三个任务的outputFormatClass都是HFileOutputFormat,
* 即输出文件的格式是HFile, 二是bulk Load,即把HFile格式的文件,导入到hbase的表里 通杀,判断为黑,条件为:B>N and
* W=0 and T=0 map的输入为combine总和压缩文件 每条数据 key\tw\tb\tt
* map的输出直接到BlackFeature_strHour数据库
*/
public static class BlackFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
BLACK_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int black_num;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if (wcount == 0 && tcount == 0 && bcount > black_num) {
context.getCounter(Counters.BLACK_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
black_num = Integer.parseInt(conf.get(BLACK_NUM).trim());
};
}
/****
* 误杀判定,条件为:(w+t)!=0 and b/(w+t) > missing_rate map的输入为combine总和压缩文件 每条数据
* key\tw\tb\tt map的输出直接到MissingFeature_strHour数据库
*/
public static class MissingFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
MISSING_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int missing_rate;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if ((wcount + tcount) != 0
&& bcount / (wcount + tcount) > missing_rate) {
context.getCounter(Counters.MISSING_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
missing_rate = Integer.parseInt(conf.get(MISSING_RATE).trim());
};
}
/****
* 漏报检索,条件为:b!=0 and (w+t)/b > error_rate map的输入为combine总和压缩文件 每条数据
* key\tw\tb\tt map的输出直接到ErrorFeature_strHour数据库
*/
public static class ErrorFeatureMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public static enum Counters {
Error_ROWS
}
byte[] family = Bytes.toBytes("F");
byte[] w = Bytes.toBytes("w");
byte[] b = Bytes.toBytes("b");
byte[] t = Bytes.toBytes("t");
int error_rate;
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] strValue = value.toString().split("\t");
long wcount = Long.parseLong(strValue[1]);
long bcount = Long.parseLong(strValue[2]);
long tcount = Long.parseLong(strValue[3]);
if (bcount != 0 && (wcount + tcount) / bcount > error_rate) {
context.getCounter(Counters.Error_ROWS).increment(1);
byte[] row = Bytes.toBytes(strValue[0]);
ImmutableBytesWritable k = new ImmutableBytesWritable(row);
Put put = new Put(row);
put.add(family, w, Bytes.toBytes(strValue[1]));
put.add(family, b, Bytes.toBytes(strValue[2]));
put.add(family, t, Bytes.toBytes(strValue[3]));
context.write(k, put);
}
}
protected void setup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
error_rate = Integer.parseInt(conf.get(ERROR_RATE).trim());
};
}
private static Logger logger = Logger
.getLogger(FeatureMiningWithHFile.class);
private static DateFormat df = new SimpleDateFormat("yyyyMMddHH");
/**
* @param args
*/
public static void main(String[] args) {
if (args.length < 1) {
logger.error("please input args <ifexit> <yyyyMMddHH>");
return;
}
Configuration conf = HBaseConfiguration.create();
Properties p = loadProperties();
conf.set(BLACK_NUM, p.getProperty(BLACK_NUM, "20"));
conf.set(MISSING_RATE, p.getProperty(MISSING_RATE, "20"));
conf.set(ERROR_RATE, p.getProperty(ERROR_RATE, "20"));
conf.set("hbase.regionserver.lease.period", "3000000");
conf.set("hbase.rpc.timeout", "3000000");
p.clear();
String strHour = df.format(new Date());
if (args.length >= 2) {
strHour = args[1];
}
String strDate = strHour.substring(0, 8);
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
Path input = getPath(fs, strHour);
Path parent = input.getParent();
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
createTable(conf, strDate);
// 开始检索纯黑名单
Job blackFeatureJob = new Job(conf, "black feature " + strDate);
blackFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
blackFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
FileInputFormat.addInputPath(blackFeatureJob, input);
blackFeatureJob.setMapOutputKeyClass(ImmutableBytesWritable.class);
blackFeatureJob.setMapOutputValueClass(Put.class);
blackFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
blackFeatureJob.setOutputValueClass(KeyValue.class);
blackFeatureJob.setMapperClass(BlackFeatureMapper.class);
blackFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable blackFeatureTable = new HTable(conf, "BlackFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(blackFeatureJob,
blackFeatureTable);
Path blackFeatureOutput = new Path(parent, "blackfeature");
if (fs.exists(blackFeatureOutput)) {
fs.delete(blackFeatureOutput, true);
}
FileOutputFormat.setOutputPath(blackFeatureJob, blackFeatureOutput);
blackFeatureJob.waitForCompletion(true);
loader.doBulkLoad(blackFeatureOutput, blackFeatureTable);
// 开始误报判断
Job missingFeatureJob = new Job(conf, "missing feature " + strDate);
missingFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
missingFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
missingFeatureJob
.setMapOutputKeyClass(ImmutableBytesWritable.class);
missingFeatureJob.setMapOutputValueClass(Put.class);
missingFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
missingFeatureJob.setOutputValueClass(KeyValue.class);
missingFeatureJob.setMapperClass(MissingFeatureMapper.class);
missingFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable missingFeatureTable = new HTable(conf, "MissingFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(missingFeatureJob,
missingFeatureTable);
FileInputFormat.addInputPath(missingFeatureJob, input);
Path missingFeatureOutput = new Path(parent, "missingfeature");
if (fs.exists(missingFeatureOutput)) {
fs.delete(missingFeatureOutput, true);
}
FileOutputFormat.setOutputPath(missingFeatureJob,
missingFeatureOutput);
missingFeatureJob.waitForCompletion(true);
loader.doBulkLoad(missingFeatureOutput, missingFeatureTable);
// 下面是漏报检索
Job errorFeatureJob = new Job(conf, "error feature " + strDate);
errorFeatureJob.setJarByClass(FeatureMiningWithHFile.class);
errorFeatureJob.setInputFormatClass(LzoTextInputFormat.class);
errorFeatureJob.setMapOutputKeyClass(ImmutableBytesWritable.class);
errorFeatureJob.setMapOutputValueClass(Put.class);
errorFeatureJob.setOutputKeyClass(ImmutableBytesWritable.class);
errorFeatureJob.setOutputValueClass(KeyValue.class);
errorFeatureJob.setMapperClass(ErrorFeatureMapper.class);
errorFeatureJob.setOutputFormatClass(HFileOutputFormat.class);
HTable errorFeatureTable = new HTable(conf, "ErrorFeature_"
+ strDate);
HFileOutputFormat.configureIncrementalLoad(errorFeatureJob,
errorFeatureTable);
FileInputFormat.addInputPath(errorFeatureJob, input);
Path errorFeatureOutput = new Path(parent, "errorfeature");
if (fs.exists(errorFeatureOutput)) {
fs.delete(errorFeatureOutput, true);
}
FileOutputFormat.setOutputPath(errorFeatureJob, errorFeatureOutput);
errorFeatureJob.waitForCompletion(true);
loader.doBulkLoad(errorFeatureOutput, errorFeatureTable);
// 挖掘完成,升级标识库数据
String flagTablename = "FeatureMiningFlag";
HBaseAdmin admin = null;
admin = new HBaseAdmin(conf);
try {
if (!admin.tableExists(flagTablename)) {
HTableDescriptor htd = new HTableDescriptor(flagTablename);
HColumnDescriptor hcd = new HColumnDescriptor("f");
hcd.setMaxVersions(1);
htd.addFamily(hcd);
admin.createTable(htd);
}
// 插数据
addData(conf, flagTablename, strDate);
} catch (IOException e) {
logger.error("drop table occur IOException", e);
}
if (args[0].equals("true")) {
System.exit(0);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
/**
* 插入数据
*/
public static void addData(Configuration conf, String tableName,
String strDate) throws Exception {
HTable tb = new HTable(conf, tableName);
DateFormat df = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
Put putRow = new Put(strDate.getBytes());
putRow.add("f".getBytes(), ("time").getBytes(),
(df.format(new Date())).getBytes());
tb.put(putRow);
}
private static void createTable(Configuration conf, String strDate)
throws IOException {
createTable(conf, "BlackFeature_" + strDate, "F");
createTable(conf, "MissingFeature_" + strDate, "F");
createTable(conf, "ErrorFeature_" + strDate, "F");
}
private static void createTable(Configuration conf, String tablename,
String family) throws IOException {
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tablename)) {
admin.disableTable(tablename);
admin.deleteTable(tablename);
}
HTableDescriptor htd = new HTableDescriptor(tablename);
HColumnDescriptor hcd = new HColumnDescriptor(family);
hcd.setMaxVersions(1);
htd.addFamily(hcd);
admin.createTable(htd);
}
private static Path getPath(FileSystem fs, String strHour)
throws ParseException, IOException {
Calendar cal = Calendar.getInstance();
cal.setTime(df.parse(strHour));
Path path = new Path("hbase/MRSample/" + strHour + "/combine");
if (fs.exists(path)) {
logger.info("find path,[" + path.toString() + "]");
return path;
} else {
cal.add(Calendar.HOUR_OF_DAY, -1);
String tmpHour = df.format(cal.getTime());
return getPath(fs, tmpHour);
}
}
public static Properties loadProperties() {
Properties p = new Properties();
FileInputStream fis = null;
try {
File file = new File("tool/mining.properties");
if (file.exists()) {
fis = new FileInputStream(file);
p.load(fis);
} else {
logger.error("properties file 'tool/mining.properties' not exists,can't load properties");
}
} catch (IOException e1) {
e1.printStackTrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
return p;
}
}