Hadoop------MapReduce案例
案例1Wordcount
Bean类
public class UserRateBean {
private String movie;
private Integer rate;
private String timeStamp;
private String uid;
public String getMovie() {
return movie;
}
public void setMovie(String movie) {
this.movie = movie;
}
public Integer getRate() {
return rate;
}
public void setRate(Integer rate) {
this.rate = rate;
}
public String getTimeStamp() {
return timeStamp;
}
public void setTimeStamp(String timeStamp) {
this.timeStamp = timeStamp;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
@Override
public String toString() {
return "UserRateBean{" +
"movie='" + movie + '\'' +
", rate=" + rate +
", timeStamp='" + timeStamp + '\'' +
", uid='" + uid + '\'' +
'}';
}
}
Map类
package wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import javax.naming.Context;
import java.io.IOException;
public class WoudCountMap extends Mapper<LongWritable, Text,Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException,InterruptedException{
String word = value.toString();
String[] words = word.split(" ");
for (String w:words){
context.write(new Text(w), new IntWritable(1));
}
}
}
Reducer类
package wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCoundReduce extends Reducer<Text, IntWritable,Text,IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
Integer count = 0;
for (IntWritable v : values){
count++;
}
context.write(key,new IntWritable(count));
}
}
Driver类
package wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
#不能本地运行时需加
// conf.set("yarn.resorcemanager.hostname", "node01");
// conf.set("fs.deafutFS", "hdfs://node01:9000/");
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDriver.class);
//设置本次job是使用map,reduce
job.setMapperClass(WoudCountMap.class);
job.setReducerClass(WordCoundReduce.class);
//设置本次map和reduce输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
//制定本次job读取元数据所需要组件,源文件在hdfs的文本文档中
job.setInputFormatClass(TextInputFormat.class);
//制定本次job输出数据所需要的组件,输出到hdfs文件中用TextOutputFormat
job.setOutputFormatClass(TextOutputFormat.class);
//设置输入路径
Path path = new Path("E:\\wc\\input\\worldcount.txt");
FileInputFormat.setInputPaths(job, path);
FileOutputFormat.setOutputPath(job, new Path("E:\\wc\\output"));
//提交任务,客户端返回
job.submit();
//核心代码:提交jar程序给yarn,客户端不退出,等待接受MapReduce的进度信息,打印进度信息,等待结果
//客户端ture的含义:等着
//result:返回true:则跑完了;false:出错了
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}