文章目录
word_count单词计数代码简单实现
首先创建项目,项目栏右键—>new----> other----->选择Map/Reduce—>Finish
创建Mapper类,项目栏右键—>new----> other----->选择Mapper
package com.hpe.wc;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
//封装一个IntWritable类型值为1的数
private final static IntWritable one = new IntWritable(1);
//创建Text类型的量
private Text word = new Text();
//mapper输入的key类型、mapper输入的value类型、mapper输出的key类型、mapper输出的value类型
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//将从文件中按行读取过来的数据进行切割
String[] st = value.toString().split(" ");
//循环将各个数据 存入key同一个key中
for(int i=0;i<st.length;i++){
//写入流输出
context.write(word,one);
System.out.println(word);
}
}
}
创建Mapper类,项目栏右键—>new----> other----->选择
package com.hpe.wc;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//这里的值分别为: reduce输入key类型、reduce输入value类型\reduce输出key类型、reduce输出value类型
public class reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> value, Context context) throws IOException, InterruptedException {
//定义一个变量初始化
int sum = 0;
// 循环key中存储的值
for (IntWritable i : value) {
//i.get()获取inWritablet类型封装的值1,进行累加
sum +=i.get();
}
//写入流,输出
context.write(key, new IntWritable(sum));
}
}
创建JOB类运行
package com.hpe.wc;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Job1 {
public static void main(String[] args) throws Exception {
//设置hadoop集群链接时的用户名
System.setProperty("HADOOP_USER_NAME", "root");
//创建配置对象
Configuration conf = new Configuration();
//设置连接路径
conf.set("fs.defaultFS", "hdfs://192.168.230.131:9000");
//引入任务对象 加入conf配置
Job job = Job.getInstance(conf);
//设置任务名称
job.setJobName("hahaha");
//将工作类打成jar包
job.setJarByClass(Job1.class);
//指定mapReduce的mapper类和reduce类
job.setMapperClass(mapper.class);
job.setReducerClass(reduce.class);
//指定mapper的输出的key类型和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置要计算的输入文件路径
FileInputFormat.setInputPaths(job, new Path("/aaa/aab/haha.txt"));
FileOutputFormat.setOutputPath(job, new Path("/abc/heihei"));
//判断是否执行成功
//System.exit(job.waitForCompletion(true)? 0:1);
boolean zt = job.waitForCompletion(true);
if(zt){
System.out.println("执行成功");
}else{
System.out.println("error")
}
}
}
然后运行,job类的主函数,就能够将
//设置要计算的输入文件路径
FileInputFormat.setInputPaths(job, new Path("/aaa/aab/haha.txt"));
FileOutputFormat.setOutputPath(job, new Path("/abc/heihei"));
中/aaa/aab/haha.txt中内容的单词个数,返回到/abc/heihei文件夹下,生成如下两个文件
第二个文件打开会看到统计的单词个数。
context中必须要是Writable的封装类否则无法正常传输
下图中是writable所封装的类