执行的一个简单案例:
用MapReduce实现一下需求
1、搜狗日志摘取“年月日时分”
2011 12 30 00 00 05
2、统计搜索过“爱奇艺”关键字的UID和搜索记录
uid keyword
3、统计上午7-9点之间,搜索过“赶集网”的用户(uid)
uid
4.统计12月30号8点20-8点22分,所有用户id和浏览信息
1、搜狗日志摘取“年月日时分”
Mapper端
package hadoop.MapReduce.cuttime;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class TimeMapper extends Mapper<LongWritable, Text,Text,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
String time = lines[0];
//substring 将时间字段进行分割
String year = time.substring(0,4);
String month = time.substring(4,6);
String day = time.substring(6,8);
String hour = time.substring(8,10);
String minute = time.substring(10,12);
String second = time.substring(12,14);
context.write(new Text(year+"\t"+month+"\t"+day+"\t"+hour+"\t"+minute+"\t"+second),new Text(""));
}
}
Driver
package hadoop.MapReduce.cuttime;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class TimeDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setMapperClass(TimeMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job,new Path("D:\\a\\sougo.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\a\\b2"));
job.waitForCompletion(true);
}
}
2、统计搜索过“爱奇艺”关键字的UID和搜索记录
Mapper端
package hadoop.MapReduce.aiqiyi;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class AQYMapper extends Mapper<LongWritable, Text,Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
String uid = lines[1];
String name = lines[2];
if (name.indexOf("爱奇艺")>=0){
context.write(new Text(uid),new Text(name));
}
}
}
Reduce端
package hadoop.MapReduce.aiqiyi;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class AQYReduce extends Reducer<Text, Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value : values) {
context.write(new Text(key),new Text(value));
}
}
}
Driver
package hadoop.MapReduce.aiqiyi;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class AQYDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(AQYDriver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(AQYMapper.class);
job.setReducerClass(AQYReduce.class);
FileInputFormat.addInputPath(job,new Path("D:\\a\\sougo.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\a\\b3"));
job.waitForCompletion(true);
}
}
3、统计上午7-9点之间,搜索过“赶集网”的用户(uid)
Mapper端
package hadoop.MapReduce.ganji;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class GJMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
String time = lines[0];
String hour = time.substring(8,10);
String uid = lines[1];
String name = lines[2];
if ((hour.equals("07") || hour.equals("08")) && name.indexOf("赶集网")>=0){
context.write(new Text(uid), NullWritable.get());
}
}
}
Reduce端
package hadoop.MapReduce.ganji;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class GJReduce extends Reducer<Text, Text,Text,NullWritable> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
context.write(new Text(key),NullWritable.get());
}
}
Driver
package hadoop.MapReduce.ganji;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class GJDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(GJDriver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(GJMapper.class);
job.setReducerClass(GJReduce.class);
FileInputFormat.addInputPath(job,new Path("D:\\a\\sougo.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\a\\b4"));
job.waitForCompletion(true);
}
}
4.统计12月30号8点20-8点22分,所有用户id和浏览信息
Mapper端
package hadoop.MapReduce.tongji;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class TGMapper extends Mapper<LongWritable, Text,Text, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] lines = value.toString().split("\t");
String time = lines[0];
String month = time.substring(4,6);
String day = time.substring(6,8);
String hour = time.substring(8,10);
String minute = time.substring(10,12);
String uid = lines[1];
if (month.equals("12") && day.equals("30") && hour.equals("08") && (minute.equals("20")||minute.equals("21"))){
context.write(new Text(uid), new Text(value));
}
}
}
Reduce端
package hadoop.MapReduce.tongji;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class TGReduce extends Reducer<Text, Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value : values) {
context.write(new Text(key), new Text(value));
}
}
}
Drive
package hadoop.MapReduce.tongji;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class TGDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(TGDriver.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(TGMapper.class);
job.setReducerClass(TGReduce.class);
FileInputFormat.addInputPath(job,new Path("D:\\a\\sougo.txt"));
FileOutputFormat.setOutputPath(job,new Path("D:\\a\\b5"));
job.waitForCompletion(true);
}
}