1.具体代码
package mr.study;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* Created by Administrator on 2019/5/9.
*/
public class MutiplyFile {
public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer st = new StringTokenizer(line);
while (st.hasMoreTokens()){
context.write(new Text(st.nextToken()),new Text("1"));
}
}
}
public static class MyReduce extends Reducer<Text,Text,Text,Text>{
MultipleOutputs<Text,Text> mos = null;
@Override
protected void setup(Context context)
throws IOException, InterruptedException {
//获取多个文件的输出对象那个
mos = new MultipleOutputs<Text,Text>(context);
}
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
int counter = 0;
for (Text t :values){
counter += Integer.parseInt(t.toString());
}
String word = key.toString();
String fistChar = word.substring(0, 1);
if (fistChar.matches("[a-z]")){
mos.write("az",key,new Text(counter +""));
}else if (fistChar.matches("[A-Z]")){
mos.write("AZ",key,new Text(counter +""));
}else if (fistChar.matches("[0-9]")){
mos.write("09",key,new Text(counter +""));
}else {
mos.write("other",key,new Text(counter +""));
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
}
public static void main(String[] args)
throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "MutiplyFile job");
job.setJarByClass(MutiplyFile.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
MultipleOutputs.addNamedOutput(job,"az", TextOutputFormat.class,Text.class,Text.class);
MultipleOutputs.addNamedOutput(job,"AZ", TextOutputFormat.class,Text.class,Text.class);
MultipleOutputs.addNamedOutput(job,"09", TextOutputFormat.class,Text.class,Text.class);
MultipleOutputs.addNamedOutput(job,"other", TextOutputFormat.class,Text.class,Text.class);
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//如果输出路径存在,就给删除
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(args[1]))){
fs.delete(new Path(args[1]));
}
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6、提交运行job
int isok = job.waitForCompletion(true) ? 0 : 1;
//退出
System.exit(isok);
}
}
2.处理的文件
test.txt
文件内容:
hello world
hi java
Hi xiaoze
Hello MASTER
163.com
1603
@qq.com
123
321
(888)