我的数据
banzhang ni hao
xihuan hadoop banzhang dc
banzhang ni hao
xihuan hadoop banzhang dc
Map阶段
package com.KeyValueTextInput;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class KeyValueTextInputMapper extends Mapper<Text,Text,Text, LongWritable> {
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
System.out.println(key.toString()+"+++++++++++++++++map阶段的输入的key+++++++++++++++++++++++++++");
System.out.println(value+"---------------map阶段的输入的value--------------------");
context.write(key, new LongWritable(1));
}
}
Reduce阶段
package com.KeyValueTextInput;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class KeyValueTextInputReducer extends Reducer<Text, LongWritable,Text, LongWritable> {
LongWritable v = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
System.out.println(key.toString()+"+++++++++++++++++reduce阶段的输入的key+++++++++++++++++++++++++++");
int sum = 0;
//遍历统计
for (LongWritable s : values){
sum += s.get();
}
context.write(key, new LongWritable(sum));
}
}
Driver阶段
package com.KeyValueTextInput;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.net.URI;
public class KeyValueTextInputDriver {
public static void main(String[] args) throws Exception {
System.setProperty("HADOOP_USER_NAME","root");//root权限操作文件
Configuration configuration=new Configuration(); //你要将Job提交到那个集群
configuration.set("fs.defaultFS","hdfs://hadoop01:9000");//要获取集群信息
//获取文件系统
FileSystem fs=FileSystem.get(new URI("hdfs://hadoop01:9000"),configuration,"root");
// 定义输入路径
String INPUT_PATH = "hdfs://hadoop01:9000/banzhang.txt";
// 定义输出路径
String OUT_PATH = "hdfs://hadoop01:9000/out";
// 如果输出目录存在,我们就删除
if (fs.exists(new Path(OUT_PATH)))
fs.delete(new Path(OUT_PATH), true);
Job job = Job.getInstance(configuration);// 构造job任务
//设置切割符 --> banzhang ni hao --> key:banzhang value:ni hao
configuration.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR,"\t");
System.out.println("+++++++++++++++++driver阶段设置的分割符+++++++++++++++++++++++++++");
// 设置job类路径
job.setJarByClass(KeyValueTextInputDriver.class);
// 设置map和reduce类
job.setMapperClass(KeyValueTextInputMapper.class);
job.setReducerClass(KeyValueTextInputReducer.class);
// 设置map的k,v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
// 设置reduce的k,v类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//设置inputformat格式
job.setInputFormatClass(KeyValueTextInputFormat.class);
//指定输出的路径和设置输出的格式化类
KeyValueTextInputFormat.setInputPaths(job , new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job , new Path(OUT_PATH));
// 提交工作
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}