public class Sort {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String[] path = {
"hdfs://localhost:9000/user/hduser/sort/input",
"hdfs://localhost:9000/user/hduser/sort/output"
};
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Sort.class);
job.setOutputKeyClass(OutputKey.class);
job.setOutputValueClass(OutputValue.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// 设置四个reduce同步执行
// job.setNumReduceTasks(4);
// 设置Partitioner
// job.setPartitionerClass(MyPartitioner.class);
FileInputFormat.setInputPaths(job, new Path(path[0]));
FileSystem fs = FileSystem.get(conf);
Path p = new Path(path[1]);
if(fs.exists(p)) {
// 目录存在,删除
fs.delete(p, true);
}
FileOutputFormat.setOutputPath(job, p);
boolean a = job.waitForCompletion(true);
if(a) {
System.exit(0);
}
else {
System.exit(1);
}
}
}
贴出以上部分代码为例,因为之前都是每次运行完想要再次运行就需要自己手动去删除output文件夹,所以这次就直接在程序里,对HDFS的文件夹进行了操作,如果每次运行output文件夹都在,就会自动删除
代码如下
FileInputFormat.setInputPaths(job, new Path(path[0]));
FileSystem fs = FileSystem.get(conf);
Path p = new Path(path[1]);
if(fs.exists(p)) {
// 目录存在,删除
fs.delete(p, true);
}