- 代码实现
pom.xml文件配置
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
Mapper代码实现:
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* mapper数据处理类
* @author zhongyulin
*
*/
public class MyMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable>{
long max = Long.MIN_VALUE;
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
long line=Long.parseLong(value.toString().trim());
if(line>max) {
max=line;
}
}
/**
* 所有map执行操作完成操作
*/
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}
Reducer类实现:
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{
long max=Long.MIN_VALUE;
@Override
protected void reduce(LongWritable key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
long temp = key.get();
if (temp > max) {
max = temp;
}
}
/**
* 执行完所有reduce执行
*/
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}
Job任务启动类实现:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* Job执行入口
* @author zhongyulin
*
*/
public class MyJobMain {
// 输入文件路径
public static String INPUT_PATH = "hdfs://172.80.2.207:9000/test/testmax/test100w.txt";
// 输出文件路径
public static String OUTPUT_PATH = "hdfs://172.80.2.207:9000/test/testmax/output";
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf =new Configuration();
// map端输出启用压缩
conf.setBoolean("mapred.compress.map.output", true);
//创建job任务
Job job=Job.getInstance(conf);
//指定执行的map和Reduce的类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// 指定map和任务输出的类型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
//设置文件输出路径
FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job,new Path(OUTPUT_PATH));
boolean flag=job.waitForCompletion(true);
}
}