转自:https://www.cnblogs.com/liuwei6/p/6708116.html
自己写的一个简单的wordcount mapreduce实例
三个类
自定义的mapper类
继承 org.apache.hadoop.mapreduce.Mapper类,并需要填写四个泛型类,分别代表key-in的数据类型,value-in的数据类型,key-out的数据类型,value-out的数据类型,key-in为偏移量,所以为long类型,采用hadoop序列化框架中的数据类型
Long--->LongWritable
String-->Text
Int-->IntWritable
Null-->NullWritable
方法中自定义切分规则
package com.zjxt.demo.map;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Author: heiheihaxi
* @Date: 2019/9/23 17:29
*/
public class MyWordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("-");
for (String val : split){
context.write(new Text(val), new IntWritable(1));
}
}
}
Reduce类
package com.zjxt.demo.reduce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Author: heiheihaxi
* @Date: 2019/9/23 17:39
*/
public class MyWordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values){
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
Job类
package com.zjxt.demo.job;
import com.zjxt.demo.map.MyWordCountMapper;
import com.zjxt.demo.reduce.MyWordCountReduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @Author: heiheihaxi
* @Date: 2019/9/23 17:42
*/
public class MyWrodCountJob {
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
Job job = Job.getInstance(conf);
job.setJarByClass(MyWrodCountJob.class);
job.setMapperClass(MyWordCountMapper.class);
job.setReducerClass(MyWordCountReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
Path input = new Path("hdfs://namenodecluster/tmp/test/input/test.txt");
Path output = new Path("hdfs://namenodecluster/tmp/test/output");
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
boolean b = job.waitForCompletion(true);
System.out.println("successs");
System.exit(b ? 0 : 1);
} catch (IOException | ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
最后,我的只能打包在hdfs的客户端上运行,不能在idea本机运行,本机运行虽然不报错,并没有结果文件输出。