package com.wc.hadoop;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class wcount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClass(wcount.class);
//设置所用到的map类
job.setMapperClass(myMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置所用到的reduce类
job.setReducerClass(myReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置输入输出地址
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//job.setCombinerClass(WCReducer.class);
job.waitForCompletion(true);
}
public static class myMapper extends Mapper<LongWritable,Text,Text,LongWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
String strVlues = value.toString();//读取首行文档
String words[] = strVlues.split(" ");//将文档之间用空格 隔开
for (String word : words) {//循环读取文档
context.write(new Text(word), new LongWritable(1));
}
}
}
//Text,LongWritable输入 keyin,valuein,keyout,valueout
public static class myReducer extends Reducer<Text,LongWritable, Text,LongWritable>{
@Override
protected void reduce(Text key, Iterable<LongWritable> values,Context context)throws IOException, InterruptedException {
long count=0;
for(LongWritable value: values){
long l = value.get();
count += l;
}
context.write(key,new LongWritable(count));
}
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class wcount {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClass(wcount.class);
//设置所用到的map类
job.setMapperClass(myMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置所用到的reduce类
job.setReducerClass(myReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置输入输出地址
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//job.setCombinerClass(WCReducer.class);
job.waitForCompletion(true);
}
public static class myMapper extends Mapper<LongWritable,Text,Text,LongWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
String strVlues = value.toString();//读取首行文档
String words[] = strVlues.split(" ");//将文档之间用空格 隔开
for (String word : words) {//循环读取文档
context.write(new Text(word), new LongWritable(1));
}
}
}
//Text,LongWritable输入 keyin,valuein,keyout,valueout
public static class myReducer extends Reducer<Text,LongWritable, Text,LongWritable>{
@Override
protected void reduce(Text key, Iterable<LongWritable> values,Context context)throws IOException, InterruptedException {
long count=0;
for(LongWritable value: values){
long l = value.get();
count += l;
}
context.write(key,new LongWritable(count));
}
}
}