public class WordCountYARNApp {
public static void main(String[] args) throws Exception {
//1.获取job对象
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
String input = args[0];
String output = args[1];
/**
* 自定义写了一个工具类,递归删除指定目录
*/
FileUtils.daleteTarget(output, configuration);
//2.获取jar的相关信息
job.setJarByClass(WordCountYARNApp.class);
//3.设置自定义的Mapper和Reducer
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//4.Mapper阶段输出的类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//5.Reducer阶段输出的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//6.设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
//7.提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
/**
* KEYIN: 输入数据key的数据类型------每行数据的偏移量(一般没用)
* KEYVALUE:输入数据value的数据类型----文本内容
* KEYOUT: 输出数据key的数据类型------每个单词
* VALUEOUT:输出数据value的数据类型----数量1
*/
public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
IntWritable ONE = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
for (String split : splits) {
context.write(new Text(split), ONE);
}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int cnt = 0;
for (IntWritable value : values) {
cnt += value.get();
}
context.write(word, new IntWritable(cnt));
}
}
}
自定义的删除输出目录的工具类:
public class FileUtils {
public static void daleteTarget(String output, Configuration configuration) throws IOException {
FileSystem fileSystem = FileSystem.get(configuration);
Path outPath = new Path(output);
if (fileSystem.exists(outPath)) {
fileSystem.delete(outPath, true); //递归删除
}
}
}