用MapReduce处理一组流量数据，并按总流量排序

最新推荐文章于 2024-08-27 06:39:08 发布

尘埃落定55

最新推荐文章于 2024-08-27 06:39:08 发布

阅读量2.2k

点赞数

分类专栏： Hadoop 文章标签： hadoop 流量统计 mapreduce 数据排序

本文链接：https://blog.csdn.net/jiang0426/article/details/51395236

版权

Hadoop 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

本文介绍如何运用MapReduce技术处理一组包含电话号码、上传流量、下载流量和总流量的数据，并最终实现按总流量进行排序。

摘要由CSDN通过智能技术生成

用MapReduce处理一组流量数据，并按总流量排序

1、待处理的数据：

1363157985066 	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com		24	27	2481	24681	200
1363157995052 	13826544101	5C-0E-8B-C7-F1-E0:CMCC	120.197.40.4			4	0	264	0	200
1363157991076 	13926435656	20-10-7A-28-CC-0A:CMCC	120.196.100.99			2	4	132	1512	200
1363154400022 	13926251106	5C-0E-8B-8B-B1-50:CMCC	120.197.40.4			4	0	240	0	200
1363157993044 	18211575961	94-71-AC-CD-E6-18:CMCC-EASY	120.196.100.99	iface.qiyi.com	视频网站	15	12	1527	2106	200
1363157995074 	84138413	5C-0E-8B-8C-E8-20:7DaysInn	120.197.40.4	122.72.52.12		20	16	4116	1432	200
1363157993055 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			18	15	1116	954	200
1363157995033 	15920133257	5C-0E-8B-C7-BA-20:CMCC	120.197.40.4	sug.so.360.cn	信息安全	20	20	3156	2936	200
1363157983019 	13719199419	68-A1-B7-03-07-B1:CMCC-EASY	120.196.100.82			4	0	240	0	200
1363157984041 	13660577991	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	24	9	6960	690	200
1363157973098 	15013685858	5C-0E-8B-C7-F7-90:CMCC	120.197.40.4	rank.ie.sogou.com	搜索引擎	28	27	3659	3538	200
1363157986029 	15989002119	E8-99-C4-4E-93-E0:CMCC-EASY	120.196.100.99	www.umeng.com	站点统计	3	3	1938	180	200
1363157992093 	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			15	9	918	4938	200
1363157986041 	13480253104	5C-0E-8B-C7-FC-80:CMCC-EASY	120.197.40.4			3	3	180	180	200
1363157984040 	13602846565	5C-0E-8B-8B-B6-00:CMCC	120.197.40.4	2052.flash2-http.qq.com	综合门户	15	12	1938	2910	200
1363157995093 	13922314466	00-FD-07-A2-EC-BA:CMCC	120.196.100.82	img.qfc.cn		12	12	3008	3720	200
1363157982040 	13502468823	5C-0A-5B-6A-0B-D4:CMCC-EASY	120.196.100.99	y0.ifengimg.com	综合门户	57	102	7335	110349	200
1363157986072 	18320173382	84-25-DB-4F-10-1A:CMCC-EASY	120.196.100.99	input.shouji.sogou.com	搜索引擎	21	18	9531	2412	200
1363157990043 	13925057413	00-1F-64-E1-E6-9A:CMCC	120.196.100.55	t3.baidu.com	搜索引擎	69	63	11058	48243	200
1363157988072 	13760778710	00-FD-07-A4-7B-08:CMCC	120.196.100.82			2	2	120	120	200
1363157985066 	13726238888	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com		24	27	2481	24681	200
1363157993055 	13560436666	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99			18	15	1116	954	200

2、新建一个FlowBean类，里面放置数据中需要的属性：

package cn.nanda.wordCount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean> {
	private String phoneNB;
	private long up_flow;
	private long d_flow;
	private long s_flow;

	public FlowBean() {
	}

	public FlowBean(String phoneNB, long up_flow, long d_flow) {
		this.phoneNB = phoneNB;
		this.up_flow = up_flow;
		this.d_flow = d_flow;
		this.s_flow = up_flow + d_flow;
	}

	public String getPhoneNB() {
		return phoneNB;
	}

	public void setPhoneNB(String phoneNB) {
		this.phoneNB = phoneNB;
	}

	public long getUp_flow() {
		return up_flow;
	}

	public void setUp_flow(long up_flow) {
		this.up_flow = up_flow;
	}

	public long getD_flow() {
		return d_flow;
	}

	public void setD_flow(long d_flow) {
		this.d_flow = d_flow;
	}

	public long getS_flow() {
		return s_flow;
	}

	public void setS_flow(long s_flow) {
		this.s_flow = s_flow;
	}

	public int compareTo(FlowBean o) {
		// TODO Auto-generated method stub
		return s_flow > o.getS_flow() ? -1 : 1;
	}

	public void write(DataOutput out) throws IOException {
		out.writeUTF(phoneNB);
		out.writeLong(up_flow);
		out.writeLong(d_flow);
		out.writeLong(s_flow);

	}

	public void readFields(DataInput in) throws IOException {
		phoneNB = in.readUTF();
		up_flow = in.readLong();
		d_flow = in.readLong();
		s_flow = in.readLong();
	}

	@Override
	public String toString() {

		return "" + up_flow + "\t" + d_flow + "\t" + s_flow;
	}

}

3、书写Mapper获取需要的信息：

package cn.nanda.wordCount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import com.google.common.io.Files;

public class FlowSumMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		// 拿第一行数据
		String line = value.toString();
		String[] fields = StringUtils.split(line, "\t");

		// 拿到我们需要的字段
		String phoneNB = fields[1];
		long up_flow = Long.parseLong(fields[7]);
		long d_flow = Long.parseLong(fields[8]);

		// 封装数据kv并输出
		context.write(new Text(phoneNB), new FlowBean(phoneNB, up_flow, d_flow));

	}

}

4、书写Reducer，对Mapper传来的数据进行分类统计：

package cn.nanda.wordCount;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowSumReducer extends Reducer<Text, FlowBean, Text, FlowBean> {
	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Context context)
			throws IOException, InterruptedException {
		long up_flow_counter = 0;
		long d_flow_counter = 0;
		for (FlowBean bean : values) {
			up_flow_counter += bean.getUp_flow();
			d_flow_counter += bean.getD_flow();

		}
		context.write(key, new FlowBean(key.toString(), up_flow_counter, d_flow_counter));
	}

}

5、申请一个job，并执行：

package cn.nanda.wordCount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class FlowSumRunner extends Configured implements Tool {

	public int run(String[] args) throws Exception {

		Configuration conf = new Configuration();
		// 如果需要在hdfs云端运行MapReduce，需要加上下面的set，相应的路径填写hdfs上的路径
		// conf.set("fs.defaultFS","hdfs://localhost:9000/");
		Job job = Job.getInstance(conf);

		job.setJarByClass(FlowSumRunner.class);

		job.setMapperClass(FlowSumMapper.class);
		job.setReducerClass(FlowSumReducer.class);

		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		return job.waitForCompletion(true) ? 0 : 1;
	}

	public static void main(String[] args) throws Exception {
		int run = ToolRunner
				.run(new Configuration(), new FlowSumRunner(), args);
		System.exit(run);
	}

}

6、运行过程：

2016-05-13 14:54:27,248 WARN  util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2016-05-13 14:54:27,466 INFO  Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1173)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2016-05-13 14:54:27,468 INFO  jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2016-05-13 14:54:27,752 WARN  mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(64)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2016-05-13 14:54:27,757 WARN  mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
2016-05-13 14:54:27,772 INFO  input.FileInputFormat (FileInputFormat.java:listStatus(283)) - Total input paths to process : 1
2016-05-13 14:54:27,831 INFO  mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(198)) - number of splits:1
2016-05-13 14:54:27,998 INFO  mapreduce.JobSubmitter (JobSubmitter.java:printTokens(287)) - Submitting tokens for job: job_local416636006_0001
2016-05-13 14:54:28,288 INFO  mapreduce.Job (Job.java:submit(1294)) - The url to track the job: http://localhost:8080/
2016-05-13 14:54:28,289 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1339)) - Running job: job_local416636006_0001
2016-05-13 14:54:28,297 INFO  mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2016-05-13 14:54:28,304 INFO  output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-05-13 14:54:28,308 INFO  mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2016-05-13 14:54:28,406 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2016-05-13 14:54:28,407 INFO  mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local416636006_0001_m_000000_0
2016-05-13 14:54:28,446 INFO  output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-05-13 14:54:28,460 INFO  mapred.Task (Task.java:initialize(612)) -  Using ResourceCalculatorProcessTree : [ ]
2016-05-13 14:54:28,465 INFO  mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/home/kun/soft/hadoop-2.7.1/input/HTTP_20130313143750.dat:0+2229
2016-05-13 14:54:28,590 INFO  mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2016-05-13 14:54:28,590 INFO  mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2016-05-13 14:54:28,590 INFO  mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2016-05-13 14:54:28,590 INFO  mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2016-05-13 14:54:28,591 INFO  mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2016-05-13 14:54:28,594 INFO  mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2016-05-13 14:54:28,598 INFO  input.LineRecordReader (LineRecordReader.java:skipUtfByteOrderMark(156)) - Found UTF-8 BOM and skipped it
2016-05-13 14:54:28,602 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 
2016-05-13 14:54:28,603 INFO  mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2016-05-13 14:54:28,603 INFO  mapred.MapTask (MapTask.java:flush(1482)) - Spilling map output
2016-05-13 14:54:28,603 INFO  mapred.MapTask (MapTask.java:flush(1483)) - bufstart = 0; bufend = 1072; bufvoid = 104857600
2016-05-13 14:54:28,603 INFO  mapred.MapTask (MapTask.java:flush(1485)) - kvstart = 26214396(104857584); kvend = 26214312(104857248); length = 85/6553600
2016-05-13 14:54:28,613 INFO  mapred.MapTask (MapTask.java:sortAndSpill(1667)) - Finished spill 0
2016-05-13 14:54:28,620 INFO  mapred.Task (Task.java:done(1038)) - Task:attempt_local416636006_0001_m_000000_0 is done. And is in the process of committing
2016-05-13 14:54:28,634 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2016-05-13 14:54:28,634 INFO  mapred.Task (Task.java:sendDone(1158)) - Task 'attempt_local416636006_0001_m_000000_0' done.
2016-05-13 14:54:28,634 INFO  mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local416636006_0001_m_000000_0
2016-05-13 14:54:28,635 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2016-05-13 14:54:28,637 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2016-05-13 14:54:28,637 INFO  mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local416636006_0001_r_000000_0
2016-05-13 14:54:28,648 INFO  output.FileOutputCommitter (FileOutputCommitter.java:<init>(100)) - File Output Committer Algorithm version is 1
2016-05-13 14:54:28,649 INFO  mapred.Task (Task.java:initialize(612)) -  Using ResourceCalculatorProcessTree : [ ]
2016-05-13 14:54:28,651 INFO  mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@43161b5e
2016-05-13 14:54:28,663 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(196)) - MergerManager: memoryLimit=1284138624, maxSingleShuffleLimit=321034656, mergeThreshold=847531520, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2016-05-13 14:54:28,666 INFO  reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local416636006_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2016-05-13 14:54:28,720 INFO  reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(144)) - localfetcher#1 about to shuffle output of map attempt_local416636006_0001_m_000000_0 decomp: 1118 len: 1122 to MEMORY
2016-05-13 14:54:28,726 INFO  reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 1118 bytes from map-output for attempt_local416636006_0001_m_000000_0
2016-05-13 14:54:28,728 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(314)) - closeInMemoryFile -> map-output of size: 1118, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->1118
2016-05-13 14:54:28,730 INFO  reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2016-05-13 14:54:28,731 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2016-05-13 14:54:28,732 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(674)) - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
2016-05-13 14:54:28,771 INFO  mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2016-05-13 14:54:28,771 INFO  mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 1104 bytes
2016-05-13 14:54:28,776 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(751)) - Merged 1 segments, 1118 bytes to disk to satisfy reduce memory limit
2016-05-13 14:54:28,777 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(781)) - Merging 1 files, 1122 bytes from disk
2016-05-13 14:54:28,778 INFO  reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(796)) - Merging 0 segments, 0 bytes from memory into reduce
2016-05-13 14:54:28,778 INFO  mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2016-05-13 14:54:28,780 INFO  mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 1104 bytes
2016-05-13 14:54:28,781 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2016-05-13 14:54:28,807 INFO  Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1173)) - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2016-05-13 14:54:28,824 INFO  mapred.Task (Task.java:done(1038)) - Task:attempt_local416636006_0001_r_000000_0 is done. And is in the process of committing
2016-05-13 14:54:28,837 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 1 / 1 copied.
2016-05-13 14:54:28,837 INFO  mapred.Task (Task.java:commit(1199)) - Task attempt_local416636006_0001_r_000000_0 is allowed to commit now
2016-05-13 14:54:28,839 INFO  output.FileOutputCommitter (FileOutputCommitter.java:commitTask(482)) - Saved output of task 'attempt_local416636006_0001_r_000000_0' to file:/home/kun/soft/hadoop-2.7.1/output/4/_temporary/0/task_local416636006_0001_r_000000
2016-05-13 14:54:28,841 INFO  mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2016-05-13 14:54:28,841 INFO  mapred.Task (Task.java:sendDone(1158)) - Task 'attempt_local416636006_0001_r_000000_0' done.
2016-05-13 14:54:28,842 INFO  mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local416636006_0001_r_000000_0
2016-05-13 14:54:28,842 INFO  mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2016-05-13 14:54:29,294 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local416636006_0001 running in uber mode : false
2016-05-13 14:54:29,295 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 100% reduce 100%
2016-05-13 14:54:29,296 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1378)) - Job job_local416636006_0001 completed successfully
2016-05-13 14:54:29,308 INFO  mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30
	File System Counters
		FILE: Number of bytes read=7100
		FILE: Number of bytes written=578006
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
	Map-Reduce Framework
		Map input records=22
		Map output records=22
		Map output bytes=1072
		Map output materialized bytes=1122
		Input split bytes=127
		Combine input records=0
		Combine output records=0
		Reduce input groups=21
		Reduce shuffle bytes=1122
		Reduce input records=22
		Reduce output records=21
		Spilled Records=44
		Shuffled Maps =1
		Failed Shuffles=0
		Merged Map outputs=1
		GC time elapsed (ms)=0
		Total committed heap usage (bytes)=525336576
	Shuffle Errors
		BAD_ID=0
		CONNECTION=0
		IO_ERROR=0
		WRONG_LENGTH=0
		WRONG_MAP=0
		WRONG_REDUCE=0
	File Input Format Counters 
		Bytes Read=2229
	File Output Format Counters 
		Bytes Written=542

7、运行结果：

（第一列为电话号码，第二列为上传流量，第三列为下载流量，第四列为总流量）

13480253104	180	200	380
13502468823	102	7335	7437
13560436666	954	200	1154
13560439658	5892	400	6292
13602846565	12	1938	1950
13660577991	9	6960	6969
13719199419	0	200	200
13726230503	2481	24681	27162
13726238888	2481	24681	27162
13760778710	120	200	320
13826544101	0	200	200
13922314466	3008	3720	6728
13925057413	63	11058	11121
13926251106	0	200	200
13926435656	1512	200	1712
15013685858	27	3659	3686
15920133257	20	3156	3176
15989002119	3	1938	1941
18211575961	12	1527	1539
18320173382	18	9531	9549
84138413	4116	1432	5548

8、将此输出数据作为排序的输入数据，通过mapreduce进行排序
<pre name="code" class="java">package cn.nanda.sort;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import cn.nanda.wordCount.FlowBean;

public class SortMR {
	// Mapper
	public static class SortMapper extends
			Mapper<LongWritable, Text, FlowBean, NullWritable> {
		// 拿到一行数据，切分出各字段，封装为一个flowbean，作为key输出
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			String[] fields = StringUtils.split(line, "\t");

			String phoneNB = fields[0];
			
			long up_flow = Long.parseLong(fields[1]);
			long d_flow = Long.parseLong(fields[2]);
			System.out.println(phoneNB +"#" +up_flow);
			context.write(new FlowBean(phoneNB, up_flow, d_flow),NullWritable.get());
		}
	}

	// Reducer
	public static class SortReducer extends
			Reducer<FlowBean, NullWritable, Text, FlowBean> {
		@Override
		protected void reduce(FlowBean key, Iterable<NullWritable> values,
				Context context) throws IOException, InterruptedException {
			String phoneNB = key.getPhoneNB();
			context.write(new Text(phoneNB), key);
		}
	}
	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);

		job.setJarByClass(SortMR.class);

		job.setMapperClass(SortMapper.class);
		job.setReducerClass(SortReducer.class);

		job.setMapOutputKeyClass(FlowBean.class);
		job.setMapOutputValueClass(NullWritable.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		System.exit(job.waitForCompletion(true)?0:1);
	}
}

9、排序后的结果为：

13726238888	2481	24681	27162
13726230503	2481	24681	27162
13925057413	63	11058	11121
18320173382	18	9531	9549
13502468823	102	7335	7437
13660577991	9	6960	6969
13922314466	3008	3720	6728
13560439658	5892	400	6292
84138413	4116	1432	5548
15013685858	27	3659	3686
15920133257	20	3156	3176
13602846565	12	1938	1950
15989002119	3	1938	1941
13926435656	1512	200	1712
18211575961	12	1527	1539
13560436666	954	200	1154
13480253104	180	200	380
13760778710	120	200	320
13826544101	0	200	200
13926251106	0	200	200
13719199419	0	200	200