- 导入相关jar包
- 写一个job类
job类的部分代码:
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf,"Pv");
job.setJarByClass(PvUvJob.class);
Path in =new Path("/user/input/pv");
Path out =new Path("/user/output/pv");
FileInputFormat.addInputPath(job,in);
FileOutputFormat.setOutputPath(job,out);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(PvUvMap.class);
job.setReducerClass(PvUvReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.waitForCompletion(true);
}
一个map类:
IntWritable one = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] line =value.toString().split("/t");
String url = line[28];
context.write(new Text(url),one);
}
reduce类
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value: values){
sum+=value.get();
}
context.write(new Text(key+"====>"+sum),NullWritable.get());
}