基于
快捷跳转
- JobWordCount
package com.wordcount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Jobwc {
public static void main(String[] args) throws IOException {
//1.环境变量,连接HDFS
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://node01:8020");//NameNode,active状态的namenode
configuration.set("yarn.resourcemanager.hostname", "node03:8088");//resourceManager节点
//2.设置job任务的相关信息
Job job = Job.getInstance(configuration);//配置信息加载到job
job.setJarByClass(JobWordCount.class);//反射操作,写该类的class类
job.setJobName("wc");//给任务取个名称
job.setMapperClass(Mapwc.class);//设置Mapper,分解
job.setReducerClass(Reducewc.class);//设置Reduce,清洗汇总
job.setMapOutputKeyClass(Text.class);//输出 key的类型
job.setMapOutputValueClass(IntWritable.class);//输出.value的类型
//3 输入数据文件(读取hdfs上的文件)
FileInputFormat.addInputPaths(job, "/WC/input/word.txt");//路径,HDFS上的要处理的文件路径
//4.输出结果到指定的地方
Path path = new Path("/WC/output/");//结果输出的路径
FileSystem fs = FileSystem.get(configuration);//加载
if(fs.exists(path)){ //是否存在
fs.delete(path,true);//存在就删除
}
FileOutputFormat.setOutputPath(job, path);//写出数据
//5.结束
boolean f;
try{
f = job.waitForCompletion(true);//任务是否执行成功
if(f){
System.out.println("job success ~");
}else{
System.out.println("------error-------");
}
} catch (ClassNotFoundException e){
e.printStackTrace();
} catch (InterruptedException e){
e.printStackTrace();
}
}
}
- Mapwc
package com.wordcount;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
public class Mapwc extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer words = new StringTokenizer(line);
while (words.hasMoreTokens()) {
context.write(new Text(words.nextToken()), new IntWritable(1));
}
// String words[] = StringUtils.split(line, ' ');
// for (String ww : words) {
// context.write(new Text(ww), new IntWritable(1));
// }
}
}
- Reducewc
package com.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//把一个个<单词,1>的数据整合成<单词,x>
public class Reducewc extends Reducer<Text, IntWritable, Text, IntWritable>{
//单词,1111迭代,context上下文 处理key:word处理value:{1,1,1,1,1}
@Override
protected void reduce(Text words, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for(IntWritable s : values){
sum +=s.get(); //s.get的值 1 1 ... 1
}
context.write(words, new IntWritable(sum)); //把清洗后的数据写出
}
}