数据集展示
7369 | SMITH | CLERK | 7902 | 1980/12/17 | 800 | 20 | |
7499 | ALLEN | SALESMAN | 7698 | 1981/2/20 | 1600 | 300 | 30 |
7521 | WARD | SALESMAN | 7698 | 1981/2/22 | 1250 | 500 | 30 |
7566 | JONES | MANAGER | 7839 | 1981/4/2 | 2975 | 20 | |
7654 | MARTIN | SALESMAN | 7698 | 1981/9/28 | 1250 | 1400 | 30 |
7698 | BLAKE | MANAGER | 7839 | 1981/5/1 | 2850 | 30 | |
7782 | CLARK | MANAGER | 7839 | 1981/6/9 | 2450 | 10 | |
7788 | SCOTT | ANALYST | 7566 | 1987/4/19 | 3000 | 20 | |
7839 | KING | PRESIDENT | 1981/11/17 | 5000 | 10 | ||
7844 | TURNER | SALESMAN | 7698 | 1981/9/8 | 1500 | 0 | 30 |
7876 | ADAMS | CLERK | 7788 | 1987/5/23 | 1100 | 20 | |
7900 | JAMES | CLERK | 7698 | 1981/12/3 | 950 | 30 | |
7902 | FORD | ANALYST | 7566 | 1981/12/3 | 3000 | 20 | |
7934 | MILLER | CLERK | 7782 | 1982/1/23 | 1300 | 10 |
建立不同的java类
SalaryTotalMain、SalaryTotalMapper、SalaryTotalReducer这三个类
对应代码如下:
SalaryTotalMain
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SalaryTotalMain {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(SalaryTotalMain.class);
job.setMapperClass(SalaryTotalMapper.class);
job.setReducerClass(SalaryTotalReducer.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
SalaryTotalMapper
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class SalaryTotalMapper extends Mapper<LongWritable,Text, LongWritable, LongWritable> {
@Override
protected void map(LongWritable key1,Text value1, Context context)
throws IOException, InterruptedException {
String data = value1.toString();
String[] words = data.split(",");
context.write(new LongWritable(Integer.parseInt(words[7])),new LongWritable(Integer.parseInt(words[5])));
}
}
SalaryTotalReducer
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class SalaryTotalReducer extends Reducer<LongWritable, LongWritable,LongWritable, LongWritable> {
@Override
protected void reduce(LongWritable k3, Iterable<LongWritable> v3,Context context) throws IOException,InterruptedException {
long total = 0;
long max = 0;
for(LongWritable v:v3)
{
total=v.get();
if(max<total)
{
max=total;
}
}
context.write(k3, new LongWritable(max));
}
}
命令
hadoop jar 2.jar ch02.SalaryTotalMain /user/data/input/emp.csv /user/data/output/ch2
hadoop jar 包名 主类 输入路径 输出路径
对应结果
学习链接
在Ubuntu上用mapreduce进行词频统计(伪分布式)_mapreduce怎么统计txt文件词频终端-CSDN博客
利用mapreduce统计部门的最高工资_使用mapreduce查询某个部门中薪资最高的员工姓名,如果输出结果的格式为“薪资 员-CSDN博客