- 需求:统计以下ip出现的次数
IP 应用 网站
192.168.0.1 QQ com.baidu.com
192.168.0.4 QQ com.baidu.com
192.168.0.2 QQ com.baidu.com
192.168.0.3 QQ com.baidu.com
192.168.0.1 QQ com.baidu.com
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AccessMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] str= value.toString().split(" ");
String ip = str[1];
context.write(new Text(ip),new IntWritable(1));
}
}
public class AccessReducer extends Reducer<Text, IntWritable, Text,IntWritable> {
/**
*
* @param key 行字节偏移量
* @param values 一行文本数据
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
for(IntWritable intWritab:values){
total+=intWritab.get();
}
context.write(key,new IntWritable(total));
}
}
public class CustomJobSubmitter extends Configured implements Tool {
public int run(String[] strings) throws Exception {
//1.封装job
Configuration conf=getConf();
Job job= Job.getInstance(conf);
job.setJarByClass(CustomJobSubmitter.class);
//2.设置分析|输出数据格式类型
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
//3.设置数据读入和写出路径
Path src = new Path("/demo/access");
TextInputFormat.addInputPath(job,src);
Path dst = new Path("/demo/res");//必须为null(目录必须不存在)
TextOutputFormat.setOutputPath(job,dst);
//4.设置Mapper和Reducer逻辑
job.setMapperClass(AccessMapper.class);
job.setReducerClass(AccessReducer.class);
//5.设置Mapper和Reducer的输出k/v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//6.提交任务
job.waitForCompletion(true);//表示看执行的结果
return 0;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new CustomJobSubmitter(),args);
}
}
5.package
6.上传到linux系统 运行即可