1.这是dataguru第五周视频的代码
import java.io.IOException;
import java.io.InterruptedIOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.lang.ArrayIndexOutOfBoundsException;
public class Test_2 extends Configured implements Tool {
enum Counter {
LINESKIP, // error line
}
public static class Map extends
Mapper<LongWritable, Text, NullWritable, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
try {
String[] lineSplit = line.split(" ");
String month = lineSplit[0];
String time = lineSplit[1];
String mac = lineSplit[6];
Text out = new Text(month + "" + time + "" + mac);
context.write(NullWritable.get(), out);
} catch (ArrayIndexOutOfBoundsException e) {
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}// end map
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "Test_2");
job.setJarByClass(Test_2.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(Map.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}// end run
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new Test_2(), args);
System.exit(res);
}// end main
}// end Test_2 class
注意设置
如果出现安全模式错误Name node is in safe mode,则用hadoop dfsadmin -safemode leave