在编写mapreduce时一般情况下案例的编写步骤为
创建文件夹,上传文件
案例编写步骤
1.编写Mapper类
继承Mapper
泛型类型
重写map
2.编写Reducer
集成Reducer
泛型类型
重写reduce方法
3.编写MyDriver
设置主类
设置map和reduce类
设置map和reduce输出类型
设置输入原文件路径
设置输出路径
mapper
package com.gufy.MR;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer stringTokenizer = new StringTokenizer(value.toString());
while (stringTokenizer.hasMoreElements()) {
context.write(new Text(stringTokenizer.nextToken()), new IntWritable(1));
}
// String vString = value.toString();
// String[] valueArray = vString.split(" ");
// for(String word : valueArray){
// context.write(new Text(word), new IntWritable(1));
// }
}
}
reducer
package com.gufy.MR;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> value,Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable aa : value){
sum += aa.get();
}
context.write(key, new IntWritable(sum));
}
}
driver类
package com.gufy.MR;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MRApp {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
System.out.println("程序开始");
Job job = Job.getInstance(configuration, "my app");//创建作业
job.setJarByClass(MRApp.class);//设置jar包的搜索路径
job.setMapperClass(Map.class);//设置mapper类
job.setCombinerClass(Reduce.class);//设置mapper类
job.setReducerClass(Reduce.class);//设置reduce类
job.setOutputKeyClass(Text.class);//设置输出key的类型
job.setOutputValueClass(IntWritable.class);//设置出入value的类型
FileInputFormat.addInputPath(job, new Path("hdfs://gufy:9000/park/aa/"));//设置输入路径
FileOutputFormat.setOutputPath(job, new Path("hdfs://gufy:9000/park/result/aa"));//设置输出路径 输出路径一般是不能存在的 这个文件夹 将由hadoop自行创建 只需要指定的文件名称就好
job.waitForCompletion(true);//等待作业完成
}
}
最后将项目导出成为jar包
然后再运行了hadoop的平台进行计算
hadoop jar xxx.jar