package exp1.hadoop;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Answer2Exp2 {
public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable>{
private static final byte userIndex = 2;
private Text mapKey = new Text();
public void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String tmp;
String line = value.toString();//将文本转成字符串类型
String [] lineSplit = line.split("\t");//按tap键进行分片并存储起来,相当于linesplit数组里每一个代表一列
tmp = lineSplit[userIndex];//得到目标列
mapKey.set(tmp);//转成text格式
context.write(mapKey, new IntWritable(1)); //写入context
}
}
public static class ReduceClass extends Reducer<Text, Iterable<IntWritable>, Text, IntWritable>{
private IntWritable totalCnt=new IntWritable();
public void reduce(Text mapKey, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
int sum=0;
for(IntWritable value : values){
sum+=value.get();//对相同key进行累加计数
}
totalCnt.set(sum);
context.write(mapKey, totalCnt);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();//定义一个默认的配置
Job job = Job.getInstance(conf);//初始化一个job
job.setJarByClass(Answer2Exp2.class);//设置jar类
job.setNumReduceTasks(1);//设置reduce任务的数量
job.setJobName("User Statistical");//设置job名称
// job.setMapperClass(MapClass.class);//设置map类
// job.setReducerClass(ReduceClass.class);//设置reduce类
job.setMapOutputKeyClass(Text.class);//设置map输出key类,记得与<>里的格式类型一致,此时为text。
job.setMapOutputValueClass(IntWritable.class);//设置map输出value类,格式!
job.setOutputKeyClass(Text.class);//最终输出的Key类型类,如上
job.setOutputValueClass(IntWritable.class);//最终输出的value类,如上
FileInputFormat.addInputPath(job, new Path("F:/data/input/userurl_20150911"));//输入在本地
FileOutputFormat.setOutputPath(job, new Path("F:/data/output/o3"));//输出在本地
//可以用paths输出多个路径,用逗号隔开
// FileInputFormat.addInputPath(job, new Path (args[0]));
// FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);//判断job是否完成,完成为1,未完成为0.
}
}