1.统计单词个数
(1)统计样本:word.txt(hadoop fs -put word.txt /word)
number linux hadoop word count
linux number hadoop word count
linux hadoop eclipse word count
linux hadoop word java count
eclipse
java map reduce
(2)WordMap.java
package com.test.org;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//例:number linux hadoop word count
public class WordMap extends Mapper<Object, Text, Text, IntWritable>{
//每次传入一行数据,输入类型为object,text-->0,number linux hadoop word count
@Override
protected void map(Object key, Text value,Context context)
throws IOException, InterruptedException {
//以空格“ ”切分字符,{number,linux,hadoop,word,count}==>lines
String[] lines=value.toString().split(" ");
for (String word : lines) {
context.write(new Text(word), new IntWritable(1));
/*输出类型为text,intwritable
*(number,1)
*(linux,1)
*(hadoop,1)
*(word,1)
*count,1)
*/
}
}
}
(2)WordReduce.java
package com.test.org;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//<linux,{1,1,1,1,}>
public class WordReduce extends Reducer<Text, IntWritable, Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum=0; //计数器
for (IntWritable count : values) {
sum+=count.get();
}
context.write(key, new IntWritable(sum)); //<linux,4>
}
}
(3)WordCount.java
package com.test.org;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
//if语句判断输入输出路径是否正确
if(args.length !=2||args ==null){
System.out.println("input error");
System.exit(0);
}
Configuration conf=new Configuration();
@SuppressWarnings("deprecation")
Job job=new Job(conf, "WordCount");
//设置主类及mapper、reducer的业务实现类
job.setJarByClass(WordCount.class);
job.setMapperClass(WordMap.class);
job.setReducerClass(WordReduce.class);
//设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//设置输出的key与value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//判断任务是否完成
job.waitForCompletion(true);
}
}
2.以“序号 单词”输出
package com.test.word;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SingleNum {
public static class NumMap extends Mapper<Object, Text , Text, IntWritable>{
@Override
protected void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String[] lines=value.toString().split(" ");
for (String word : lines) {
//不需要计数,故不设置value值
context.write(new Text(word),new IntWritable());
}
}
//设置输出的<key,value>的数据类型为intwritable,text
public static class NumReduce extends Reducer<Text, IntWritable, IntWritable,Text>{
int number=1;//序号值
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context ) throws IOException, InterruptedException {
context.write(new IntWritable(number), key);
number++;
}
}
public static void main(String []args) throws IOException, ClassNotFoundException, InterruptedException{
if (args.length!=2||args==null) {
System.out.println("error!");
System.exit(0);
}
Configuration configuration=new Configuration();
@SuppressWarnings("deprecation")
Job job=new Job(configuration,"SingleNum");
job.setJarByClass(SingleNum.class);
job.setMapperClass(NumMap.class);
job.setReducerClass(NumReduce.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//因为mapper输出的数据类型与reduce的数据类型不同,故需要设置mapper的key与value值
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
}
}
}
3.计算平均分
(1)样本:grade.txt
小红 89
小明 78
小明 90
小红 79
小明 98
小明 84
灵儿 86
灵儿 78
灵儿 96
(2)AvgGrade.java
package com.test.word;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class AvgGrade {
public static class AvgMap extends Mapper<Object, Text, Text, IntWritable>{
@Override
protected void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
//按Tab键切分数据
String[] lines=value.toString().split("\t");
//lines[0]代表姓名,lines[1]代表各科成绩
int grade=Integer.parseInt(lines[1]);
context.write(new Text(lines[0]),new IntWritable(grade));
}
}
//输出的数据类型为text,doublewritable
public static class AvgReduce extends Reducer<Text, IntWritable, Text, DoubleWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
double sum=0;//计算总成绩
int count=0;//计数器
for (IntWritable word : values) {
sum+=word.get();
count++;
}
context.write(key, new DoubleWritable(sum/count));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
if(args.length!=2||args==null){
System.out.println("error");
System.exit(0);
}
Configuration configuration=new Configuration();
@SuppressWarnings("deprecation")
Job job=new Job(configuration, "AvgGrade");
job.setJarByClass(AvgGrade.class);
job.setMapperClass(AvgMap.class);
job.setReducerClass(AvgReduce.class);
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.waitForCompletion(true);
}
}