KMeans聚类算法Hadoop实现

最新推荐文章于 2018-08-23 19:16:48 发布
jfhdd
最新推荐文章于 2018-08-23 19:16:48 发布
阅读量655
点赞数
分类专栏：机器学习文章标签： hadoop 算法
机器学习专栏收录该内容
7 篇文章 0 订阅
订阅专栏
原文地址：http://blog.csdn.net/jdplus/article/details/23960127/
Assistance.java 辅助类，功能详见注释
01.package KMeans;  
02.  
03.import org.apache.hadoop.conf.Configuration;  
04.import org.apache.hadoop.fs.FSDataInputStream;  
05.import org.apache.hadoop.fs.FSDataOutputStream;  
06.import org.apache.hadoop.fs.FileSystem;  
07.import org.apache.hadoop.fs.Path;  
08.import org.apache.hadoop.io.Text;  
09.import org.apache.hadoop.util.LineReader;  
10.  
11.import java.io.IOException;  
12.import java.util.*;  
13.  
14.public class Assistance {  
15.    //读取聚类中心点信息：聚类中心ID、聚类中心点  
16.    public static List<ArrayList<Float>> getCenters(String inputpath){  
17.        List<ArrayList<Float>> result = new ArrayList<ArrayList<Float>>();  
18.        Configuration conf = new Configuration();  
19.        try {  
20.            FileSystem hdfs = FileSystem.get(conf);  
21.            Path in = new Path(inputpath);  
22.            FSDataInputStream fsIn = hdfs.open(in);  
23.            LineReader lineIn = new LineReader(fsIn, conf);  
24.            Text line = new Text();  
25.            while (lineIn.readLine(line) > 0){  
26.                String record = line.toString();  
27.                /* 
28.                因为Hadoop输出键值对时会在键跟值之间添加制表符， 
29.                所以用空格代替之。 
30.                */  
31.                String[] fields = record.replace("\t", " ").split(" ");  
32.                List<Float> tmplist = new ArrayList<Float>();  
33.                for (int i = 0; i < fields.length; ++i){  
34.                    tmplist.add(Float.parseFloat(fields[i]));  
35.                }  
36.                result.add((ArrayList<Float>) tmplist);  
37.            }  
38.            fsIn.close();  
39.        } catch (IOException e){  
40.            e.printStackTrace();  
41.        }  
42.        return result;  
43.    }  
44.  
45.    //删除上一次MapReduce作业的结果  
46.    public static void deleteLastResult(String path){  
47.        Configuration conf = new Configuration();  
48.        try {  
49.            FileSystem hdfs = FileSystem.get(conf);  
50.            Path path1 = new Path(path);  
51.            hdfs.delete(path1, true);  
52.        } catch (IOException e){  
53.            e.printStackTrace();  
54.        }  
55.    }  
56.    //计算相邻两次迭代结果的聚类中心的距离，判断是否满足终止条件  
57.    public static boolean isFinished(String oldpath, String newpath, int k, float threshold)  
58.    throws IOException{  
59.        List<ArrayList<Float>> oldcenters = Assistance.getCenters(oldpath);  
60.        List<ArrayList<Float>> newcenters = Assistance.getCenters(newpath);  
61.        float distance = 0;  
62.        for (int i = 0; i < k; ++i){  
63.            for (int j = 1; j < oldcenters.get(i).size(); ++j){  
64.                float tmp = Math.abs(oldcenters.get(i).get(j) - newcenters.get(i).get(j));  
65.                distance += Math.pow(tmp, 2);  
66.            }  
67.        }  
68.        System.out.println("Distance = " + distance + " Threshold = " + threshold);  
69.        if (distance < threshold)  
70.            return true;  
71.        /* 
72.        如果不满足终止条件，则用本次迭代的聚类中心更新聚类中心 
73.        */  
74.        Assistance.deleteLastResult(oldpath);  
75.        Configuration conf = new Configuration();  
76.        FileSystem hdfs = FileSystem.get(conf);  
77.        hdfs.copyToLocalFile(new Path(newpath), new Path("/home/hadoop/class/oldcenter.data"));  
78.        hdfs.delete(new Path(oldpath), true);  
79.        hdfs.moveFromLocalFile(new Path("/home/hadoop/class/oldcenter.data"), new Path(oldpath));  
80.        return false;  
81.    }  
82.}  




KMeansDriver.java 作业驱动类 



[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.conf.Configuration;  
04.import org.apache.hadoop.fs.FileSystem;  
05.import org.apache.hadoop.fs.Path;  
06.import org.apache.hadoop.io.IntWritable;  
07.import org.apache.hadoop.io.Text;  
08.import org.apache.hadoop.mapreduce.Job;  
09.import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
10.import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
11.import org.apache.hadoop.util.GenericOptionsParser;  
12.  
13.import java.io.IOException;  
14.  
15.public class KMeansDriver{  
16.    public static void main(String[] args) throws Exception{  
17.        int repeated = 0;  
18.  
19.        /* 
20.        不断提交MapReduce作业指导相邻两次迭代聚类中心的距离小于阈值或到达设定的迭代次数 
21.        */  
22.        do {  
23.            Configuration conf = new Configuration();  
24.            String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  
25.            if (otherArgs.length != 6){  
26.                System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  
27.                System.exit(2);  
28.            }  
29.            conf.set("centerpath", otherArgs[2]);  
30.            conf.set("kpath", otherArgs[4]);  
31.            Job job = new Job(conf, "KMeansCluster");//新建MapReduce作业  
32.            job.setJarByClass(KMeansDriver.class);//设置作业启动类  
33.  
34.            Path in = new Path(otherArgs[0]);  
35.            Path out = new Path(otherArgs[1]);  
36.            FileInputFormat.addInputPath(job, in);//设置输入路径  
37.            FileSystem fs = FileSystem.get(conf);  
38.            if (fs.exists(out)){//如果输出路径存在，则先删除之  
39.                fs.delete(out, true);  
40.            }  
41.            FileOutputFormat.setOutputPath(job, out);//设置输出路径  
42.  
43.            job.setMapperClass(KMeansMapper.class);//设置Map类  
44.            job.setReducerClass(KMeansReducer.class);//设置Reduce类  
45.  
46.            job.setOutputKeyClass(IntWritable.class);//设置输出键的类  
47.            job.setOutputValueClass(Text.class);//设置输出值的类  
48.  
49.            job.waitForCompletion(true);//启动作业  
50.  
51.            ++repeated;  
52.            System.out.println("We have repeated " + repeated + " times.");  
53.         } while (repeated < 10 && (Assistance.isFinished(args[2], args[3], Integer.parseInt(args[4]), Float.parseFloat(args[5])) == false));  
54.        //根据最终得到的聚类中心对数据集进行聚类  
55.        Cluster(args);  
56.    }  
57.    public static void Cluster(String[] args)  
58.            throws IOException, InterruptedException, ClassNotFoundException{  
59.        Configuration conf = new Configuration();  
60.        String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  
61.        if (otherArgs.length != 6){  
62.            System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  
63.            System.exit(2);  
64.        }  
65.        conf.set("centerpath", otherArgs[2]);  
66.        conf.set("kpath", otherArgs[4]);  
67.        Job job = new Job(conf, "KMeansCluster");  
68.        job.setJarByClass(KMeansDriver.class);  
69.  
70.        Path in = new Path(otherArgs[0]);  
71.        Path out = new Path(otherArgs[1]);  
72.        FileInputFormat.addInputPath(job, in);  
73.        FileSystem fs = FileSystem.get(conf);  
74.        if (fs.exists(out)){  
75.            fs.delete(out, true);  
76.        }  
77.        FileOutputFormat.setOutputPath(job, out);  
78.  
79.        //因为只是将样本点聚类，不需要reduce操作，故不设置Reduce类  
80.        job.setMapperClass(KMeansMapper.class);  
81.  
82.        job.setOutputKeyClass(IntWritable.class);  
83.        job.setOutputValueClass(Text.class);  
84.  
85.        job.waitForCompletion(true);  
86.    }  
87.}  

 KMeansMapper.java 




[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.io.IntWritable;  
04.import org.apache.hadoop.io.LongWritable;  
05.import org.apache.hadoop.io.Text;  
06.import org.apache.hadoop.mapreduce.Mapper;  
07.  
08.import java.io.IOException;  
09.import java.util.ArrayList;  
10.import java.util.List;  
11.  
12.public class KMeansMapper extends Mapper<Object, Text, IntWritable, Text> {  
13.    public void map(Object key, Text value, Context context)  
14.    throws IOException, InterruptedException{  
15.        String line = value.toString();  
16.        String[] fields = line.split(" ");  
17.        List<ArrayList<Float>> centers = Assistance.getCenters(context.getConfiguration().get("centerpath"));  
18.        int k = Integer.parseInt(context.getConfiguration().get("kpath"));  
19.        float minDist = Float.MAX_VALUE;  
20.        int centerIndex = k;  
21.        //计算样本点到各个中心的距离，并把样本聚类到距离最近的中心点所属的类  
22.        for (int i = 0; i < k; ++i){  
23.            float currentDist = 0;  
24.            for (int j = 0; j < fields.length; ++j){  
25.                float tmp = Math.abs(centers.get(i).get(j + 1) - Float.parseFloat(fields[j]));  
26.                currentDist += Math.pow(tmp, 2);  
27.            }  
28.            if (minDist > currentDist){  
29.                minDist = currentDist;  
30.                centerIndex = i;  
31.            }  
32.        }  
33.        context.write(new IntWritable(centerIndex), new Text(value));  
34.    }  
35.}  

 KMeansReducer.java




[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.io.IntWritable;  
04.import org.apache.hadoop.io.Text;  
05.import org.apache.hadoop.mapreduce.Reducer;  
06.  
07.import java.io.IOException;  
08.import java.util.ArrayList;  
09.import java.util.List;  
10.  
11.public class KMeansReducer extends Reducer<IntWritable, Text, IntWritable, Text> {  
12.    public void reduce(IntWritable key, Iterable<Text> value, Context context)  
13.    throws IOException, InterruptedException{  
14.        List<ArrayList<Float>> assistList = new ArrayList<ArrayList<Float>>();  
15.        String tmpResult = "";  
16.        for (Text val : value){  
17.            String line = val.toString();  
18.            String[] fields = line.split(" ");  
19.            List<Float> tmpList = new ArrayList<Float>();  
20.            for (int i = 0; i < fields.length; ++i){  
21.                tmpList.add(Float.parseFloat(fields[i]));  
22.            }  
23.            assistList.add((ArrayList<Float>) tmpList);  
24.        }  
25.        //计算新的聚类中心  
26.        for (int i = 0; i < assistList.get(0).size(); ++i){  
27.            float sum = 0;  
28.            for (int j = 0; j < assistList.size(); ++j){  
29.                sum += assistList.get(j).get(i);  
30.            }  
31.            float tmp = sum / assistList.size();  
32.            if (i == 0){  
33.                tmpResult += tmp;  
34.            }  
35.            else{  
36.                tmpResult += " " + tmp;  
37.            }  
38.        }  
39.        Text result = new Text(tmpResult);  
40.        context.write(key, result);  
41.    }  
42.}  

 作业运行情况：




[plain] view plain copy

 在CODE上查看代码片派生到我的代码片01.hadoop@shaobo-ThinkPad-E420:~/class$ hadoop jar KMeans.jar KMeans.KMeansDriver input/iris.data output input/oldcenter.data output/part-r-00000 3 0.0001  
02.14/04/17 16:15:50 INFO input.FileInputFormat: Total input paths to process : 1  
03.14/04/17 16:15:51 INFO mapred.JobClient: Running job: job_201404171511_0012  
04.14/04/17 16:15:52 INFO mapred.JobClient:  map 0% reduce 0%  
05.14/04/17 16:16:07 INFO mapred.JobClient:  map 100% reduce 0%  
06.14/04/17 16:16:19 INFO mapred.JobClient:  map 100% reduce 100%  
07.14/04/17 16:16:24 INFO mapred.JobClient: Job complete: job_201404171511_0012  
08.14/04/17 16:16:24 INFO mapred.JobClient: Counters: 25  
09.14/04/17 16:16:24 INFO mapred.JobClient:   Job Counters   
10.14/04/17 16:16:24 INFO mapred.JobClient:     Launched reduce tasks=1  
11.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12041  
12.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
13.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
14.14/04/17 16:16:24 INFO mapred.JobClient:     Launched map tasks=1  
15.14/04/17 16:16:24 INFO mapred.JobClient:     Data-local map tasks=1  
16.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10030  
17.14/04/17 16:16:24 INFO mapred.JobClient:   File Output Format Counters   
18.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Written=125  
19.14/04/17 16:16:24 INFO mapred.JobClient:   FileSystemCounters  
20.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
21.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_READ=11214  
22.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
23.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=125  
24.14/04/17 16:16:24 INFO mapred.JobClient:   File Input Format Counters   
25.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Read=2550  
26.14/04/17 16:16:24 INFO mapred.JobClient:   Map-Reduce Framework  
27.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input groups=3  
28.14/04/17 16:16:24 INFO mapred.JobClient:     Map output materialized bytes=3306  
29.14/04/17 16:16:24 INFO mapred.JobClient:     Combine output records=0  
30.14/04/17 16:16:24 INFO mapred.JobClient:     Map input records=150  
31.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce shuffle bytes=0  
32.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce output records=3  
33.14/04/17 16:16:24 INFO mapred.JobClient:     Spilled Records=300  
34.14/04/17 16:16:24 INFO mapred.JobClient:     Map output bytes=3000  
35.14/04/17 16:16:24 INFO mapred.JobClient:     Combine input records=0  
36.14/04/17 16:16:24 INFO mapred.JobClient:     Map output records=150  
37.14/04/17 16:16:24 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
38.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input records=150  
39.We have repeated 1 times.  
40.Distance = 0.35025704 Threshold = 1.0E-4  
41.14/04/17 16:16:24 INFO input.FileInputFormat: Total input paths to process : 1  
42.14/04/17 16:16:25 INFO mapred.JobClient: Running job: job_201404171511_0013  
43.14/04/17 16:16:26 INFO mapred.JobClient:  map 0% reduce 0%  
44.14/04/17 16:16:40 INFO mapred.JobClient:  map 100% reduce 0%  
45.14/04/17 16:16:52 INFO mapred.JobClient:  map 100% reduce 100%  
46.14/04/17 16:16:57 INFO mapred.JobClient: Job complete: job_201404171511_0013  
47.14/04/17 16:16:57 INFO mapred.JobClient: Counters: 25  
48.14/04/17 16:16:57 INFO mapred.JobClient:   Job Counters   
49.14/04/17 16:16:57 INFO mapred.JobClient:     Launched reduce tasks=1  
50.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12077  
51.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
52.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
53.14/04/17 16:16:57 INFO mapred.JobClient:     Launched map tasks=1  
54.14/04/17 16:16:57 INFO mapred.JobClient:     Data-local map tasks=1  
55.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10048  
56.14/04/17 16:16:57 INFO mapred.JobClient:   File Output Format Counters   
57.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Written=116  
58.14/04/17 16:16:57 INFO mapred.JobClient:   FileSystemCounters  
59.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
60.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_READ=21414  
61.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
62.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  
63.14/04/17 16:16:57 INFO mapred.JobClient:   File Input Format Counters   
64.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Read=2550  
65.14/04/17 16:16:57 INFO mapred.JobClient:   Map-Reduce Framework  
66.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input groups=3  
67.14/04/17 16:16:57 INFO mapred.JobClient:     Map output materialized bytes=3306  
68.14/04/17 16:16:57 INFO mapred.JobClient:     Combine output records=0  
69.14/04/17 16:16:57 INFO mapred.JobClient:     Map input records=150  
70.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce shuffle bytes=3306  
71.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce output records=3  
72.14/04/17 16:16:57 INFO mapred.JobClient:     Spilled Records=300  
73.14/04/17 16:16:57 INFO mapred.JobClient:     Map output bytes=3000  
74.14/04/17 16:16:57 INFO mapred.JobClient:     Combine input records=0  
75.14/04/17 16:16:57 INFO mapred.JobClient:     Map output records=150  
76.14/04/17 16:16:57 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
77.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input records=150  
78.We have repeated 2 times.  
79.Distance = 0.006297064 Threshold = 1.0E-4  
80.14/04/17 16:16:57 INFO input.FileInputFormat: Total input paths to process : 1  
81.14/04/17 16:16:58 INFO mapred.JobClient: Running job: job_201404171511_0014  
82.14/04/17 16:16:59 INFO mapred.JobClient:  map 0% reduce 0%  
83.14/04/17 16:17:14 INFO mapred.JobClient:  map 100% reduce 0%  
84.14/04/17 16:17:25 INFO mapred.JobClient:  map 100% reduce 100%  
85.14/04/17 16:17:30 INFO mapred.JobClient: Job complete: job_201404171511_0014  
86.14/04/17 16:17:30 INFO mapred.JobClient: Counters: 25  
87.14/04/17 16:17:30 INFO mapred.JobClient:   Job Counters   
88.14/04/17 16:17:30 INFO mapred.JobClient:     Launched reduce tasks=1  
89.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12046  
90.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
91.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
92.14/04/17 16:17:30 INFO mapred.JobClient:     Launched map tasks=1  
93.14/04/17 16:17:30 INFO mapred.JobClient:     Data-local map tasks=1  
94.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10051  
95.14/04/17 16:17:30 INFO mapred.JobClient:   File Output Format Counters   
96.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Written=116  
97.14/04/17 16:17:30 INFO mapred.JobClient:   FileSystemCounters  
98.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
99.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  
100.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
101.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  
102.14/04/17 16:17:30 INFO mapred.JobClient:   File Input Format Counters   
103.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Read=2550  
104.14/04/17 16:17:30 INFO mapred.JobClient:   Map-Reduce Framework  
105.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input groups=3  
106.14/04/17 16:17:30 INFO mapred.JobClient:     Map output materialized bytes=3306  
107.14/04/17 16:17:30 INFO mapred.JobClient:     Combine output records=0  
108.14/04/17 16:17:30 INFO mapred.JobClient:     Map input records=150  
109.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce shuffle bytes=0  
110.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce output records=3  
111.14/04/17 16:17:30 INFO mapred.JobClient:     Spilled Records=300  
112.14/04/17 16:17:30 INFO mapred.JobClient:     Map output bytes=3000  
113.14/04/17 16:17:30 INFO mapred.JobClient:     Combine input records=0  
114.14/04/17 16:17:30 INFO mapred.JobClient:     Map output records=150  
115.14/04/17 16:17:30 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
116.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input records=150  
117.We have repeated 3 times.  
118.Distance = 0.0 Threshold = 1.0E-4  
119.14/04/17 16:17:30 INFO input.FileInputFormat: Total input paths to process : 1  
120.14/04/17 16:17:30 INFO mapred.JobClient: Running job: job_201404171511_0015  
121.14/04/17 16:17:31 INFO mapred.JobClient:  map 0% reduce 0%  
122.14/04/17 16:17:47 INFO mapred.JobClient:  map 100% reduce 0%  
123.14/04/17 16:17:59 INFO mapred.JobClient:  map 100% reduce 100%  
124.14/04/17 16:18:04 INFO mapred.JobClient: Job complete: job_201404171511_0015  
125.14/04/17 16:18:04 INFO mapred.JobClient: Counters: 25  
126.14/04/17 16:18:04 INFO mapred.JobClient:   Job Counters   
127.14/04/17 16:18:04 INFO mapred.JobClient:     Launched reduce tasks=1  
128.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12036  
129.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
130.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
131.14/04/17 16:18:04 INFO mapred.JobClient:     Launched map tasks=1  
132.14/04/17 16:18:04 INFO mapred.JobClient:     Data-local map tasks=1  
133.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10050  
134.14/04/17 16:18:04 INFO mapred.JobClient:   File Output Format Counters   
135.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Written=2700  
136.14/04/17 16:18:04 INFO mapred.JobClient:   FileSystemCounters  
137.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
138.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  
139.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48717  
140.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=2700  
141.14/04/17 16:18:04 INFO mapred.JobClient:   File Input Format Counters   
142.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Read=2550  
143.14/04/17 16:18:04 INFO mapred.JobClient:   Map-Reduce Framework  
144.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input groups=3  
145.14/04/17 16:18:04 INFO mapred.JobClient:     Map output materialized bytes=3306  
146.14/04/17 16:18:04 INFO mapred.JobClient:     Combine output records=0  
147.14/04/17 16:18:04 INFO mapred.JobClient:     Map input records=150  
148.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce shuffle bytes=0  
149.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce output records=150  
150.14/04/17 16:18:04 INFO mapred.JobClient:     Spilled Records=300  
151.14/04/17 16:18:04 INFO mapred.JobClient:     Map output bytes=3000  
152.14/04/17 16:18:04 INFO mapred.JobClient:     Combine input records=0  
153.14/04/17 16:18:04 INFO mapred.JobClient:     Map output records=150  
154.14/04/17 16:18:04 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
155.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input records=150  








      .