Mapper阶段
import java.io.IOException;
public class NLineMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
Text k = new Text();
IntWritable v= new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行内容
String line = value.toString();
//切割
String[] split = line.split(" ");
for(int i=0;i<split.length;i++){
k.set(split[i]);
context.write(k,v);
}
}
}
Reducer阶段
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class NlineReduce extends Reducer<Text, IntWritable,Text, IntWritable>{
IntWritable v=new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for(IntWritable v:values){
count+=v.get();
}
v.set(count);
context.write(key,v);
}
}
Driver调度
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.net.URI;
public class NilneDriver {
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME","root");//root权限操作文件
Configuration configuration=new Configuration(); //你要将Job提交到那个集群
configuration.set("fs.defaultFS","hdfs://hadoop01:9000");//要获取集群信息
//获取文件系统
FileSystem fs=FileSystem.get(new URI("hdfs://hadoop01:9000"),configuration,"root");
// 定义输入路径
String INPUT_PATH = "hdfs://hadoop01:9000/input/words.txt";
// 定义输出路径
String OUT_PATH = "hdfs://hadoop01:9000/out";
// 如果输出目录存在,我们就删除
if (fs.exists(new Path(OUT_PATH)))
fs.delete(new Path(OUT_PATH), true);
Job job = Job.getInstance(configuration);// 构造job任务
//设置一次读取的行数
NLineInputFormat.setNumLinesPerSplit(job,2);
//设置处理的记录数
job.setInputFormatClass(NLineInputFormat.class);
// 设置job类路径
job.setJarByClass(NilneDriver.class);
// 设置map和reduce类
job.setMapperClass(NLineMapper.class);
job.setReducerClass(NlineReduce.class);
// 设置map的k,v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 设置reduce的k,v类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置inputformat格式
job.setInputFormatClass(NLineInputFormat.class);
//指定输出的路径和设置输出的格式化类
NLineInputFormat.setInputPaths(job , new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job , new Path(OUT_PATH));
// 提交工作
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}