hadoop做的一个简单grep程序,可从文档中提取包含某些字符串的行
/*
* 一个简单grep程序,可从文档中提取包含莫些字符串的行
*/
public class grep extends Configured implements Tool{
public static class grepMap extends Mapper{ public void map(LongWritable line,Text value,Context context) throws IOException, InterruptedException{ //通过Configuration获取参数 String str = context.getConfiguration().get("grep"); if(value.toString().contains(str)){ context.write(value, NullWritable.get()); } } } @Override public int run(String[] args) throws Exception { if(args.length!=3){ System.out.println("ERROR"); System.exit(1); } Configuration configuration = getConf(); //传递参数 configuration.set("grep", args[2]); Job job = new Job(configuration,"grep"); job.setJarByClass(grep.class); job.setMapperClass(grepMap.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileSystem fileSystem = out.getFileSystem(configuration); if(fileSystem.exists(out)) fileSystem.delete(out, true); FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); System.exit(job.waitForCompletion(true)?0:1); return 0; }