数据参考之前的博客MapReduce流量统计
Mapper和Reducer还有WritableObject与之前的一样
My_Partitioner类
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class My_Partitioner extends Partitioner<Text,WritableObject> {
public My_Partitioner() {
super();
}
@Override
public int getPartition(Text key, WritableObject value, int numPartitions) {// Partitioner是用于确定map输出的<key,value>对应的处理reducer是那个节点
if(key.toString().substring(0,3).equals("136"))
return 0;
else if(key.toString().substring(0,3).equals("137"))
return 1;
else if(key.toString().substring(0,3).equals("139"))
return 2;
else
return 3;
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Phone_Driver {
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException, ClassNotFoundException {
System.setProperty("HADOOP_USER_NAME","root");//root权限操作文件
Configuration configuration=new Configuration(); //你要将Job提交到那个集群
configuration.set("fs.defaultFS","hdfs://hadoop01:9000");//要获取集群信息
//获取文件系统
FileSystem fs=FileSystem.get(new URI("hdfs://hadoop01:9000"),configuration,"root");
// 定义输入路径
String INPUT_PATH = "hdfs://hadoop01:9000/china_move.txt";
// 定义输出路径
String OUT_PATH = "hdfs://hadoop01:9000/out";
// 如果输出目录存在,我们就删除
if (fs.exists(new Path(OUT_PATH)))
fs.delete(new Path(OUT_PATH), true);
Job job = Job.getInstance(configuration);// 构造job任务
job.setJarByClass(Phone_Driver.class);// 设置job类路径
//设置自定义分区
job.setPartitionerClass(My_Partitioner.class);//----------------
//设置分区的数量
job.setNumReduceTasks(4);//---------------------------------------
// 设置map和reduce类
job.setMapperClass(Phone_Mapper.class);
job.setReducerClass(Phone_Reducer.class);
// 设置map的k,v类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WritableObject.class);
// 设置reduce的k,v类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(WritableObject.class);
//指定输出的路径和设置输出的格式化类
FileInputFormat.setInputPaths(job , new Path(INPUT_PATH));
FileOutputFormat.setOutputPath(job , new Path(OUT_PATH));
//提交作业,退出
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}
分区结果
按照自定义的分了4个区