环境:Maven,IDEA,hadoop-3.0.0
准备工作可参照:https://blog.csdn.net/m0_63716485/article/details/127597324?spm=1001.2014.3001.5502
员工数据:
链接:https://pan.baidu.com/s/1RmmLH15E4SXJuon3oNQsCg?pwd=yyds
Mapper阶段:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class mapersalary extends Mapper<LongWritable,Text,IntWritable,IntWritable>{
IntWritable a=new IntWritable();
IntWritable b=new IntWritable();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {
//7369,SMITH,CLERK,7902,1980/12/17,800,,20
String data=value.toString();
String[] word=data.split(",");
a.set(Integer.parseInt(word[7]));
b.set(Integer.parseInt(word[5]));
context.write(a,b);
}
}
Reduce阶段:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class reducesalary extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable> {
@Override
protected void reduce(IntWritable key, Iterable<IntWritable> values, Reducer<IntWritable, IntWritable, IntWritable, IntWritable>.Context context) throws IOException, InterruptedException {
int totalsalary=0;
IntWritable a=new IntWritable();
for(IntWritable value:values){
totalsalary+=value.get();
}
a.set(totalsalary);
context.write(key, a);
}
}
程序主入口:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class mainz {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
//创建一个job和任务入口
Job job=Job.getInstance(new Configuration());
job.setJarByClass(mainz.class);//main方法所在的class
//指定job的mapper和输出的类型
job.setMapperClass(mapersalary.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
//指定job的reduce和输出的类型
job.setReducerClass(reducesalary.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
//指定job的输入和输出
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//引用Combiner
//job.setCombinerClass(wordcountreducer.class);
//执行任务
job.waitForCompletion(true);
}
}
导出jar包:
mvn clean package
将jar包上传并运行命令:
hadoop jar salary.jar first.mainz /input/salary.txt /output/wc2
运行成功,结果展示: