KMeans聚类算法Hadoop实现

原文地址:http://blog.csdn.net/jdplus/article/details/23960127/
Assistance.java 辅助类,功能详见注释

01.package KMeans;  
02.  
03.import org.apache.hadoop.conf.Configuration;  
04.import org.apache.hadoop.fs.FSDataInputStream;  
05.import org.apache.hadoop.fs.FSDataOutputStream;  
06.import org.apache.hadoop.fs.FileSystem;  
07.import org.apache.hadoop.fs.Path;  
08.import org.apache.hadoop.io.Text;  
09.import org.apache.hadoop.util.LineReader;  
10.  
11.import java.io.IOException;  
12.import java.util.*;  
13.  
14.public class Assistance {  
15.    //读取聚类中心点信息:聚类中心ID、聚类中心点  
16.    public static List<ArrayList<Float>> getCenters(String inputpath){  
17.        List<ArrayList<Float>> result = new ArrayList<ArrayList<Float>>();  
18.        Configuration conf = new Configuration();  
19.        try {  
20.            FileSystem hdfs = FileSystem.get(conf);  
21.            Path in = new Path(inputpath);  
22.            FSDataInputStream fsIn = hdfs.open(in);  
23.            LineReader lineIn = new LineReader(fsIn, conf);  
24.            Text line = new Text();  
25.            while (lineIn.readLine(line) > 0){  
26.                String record = line.toString();  
27.                /* 
28.                因为Hadoop输出键值对时会在键跟值之间添加制表符, 
29.                所以用空格代替之。 
30.                */  
31.                String[] fields = record.replace("\t", " ").split(" ");  
32.                List<Float> tmplist = new ArrayList<Float>();  
33.                for (int i = 0; i < fields.length; ++i){  
34.                    tmplist.add(Float.parseFloat(fields[i]));  
35.                }  
36.                result.add((ArrayList<Float>) tmplist);  
37.            }  
38.            fsIn.close();  
39.        } catch (IOException e){  
40.            e.printStackTrace();  
41.        }  
42.        return result;  
43.    }  
44.  
45.    //删除上一次MapReduce作业的结果  
46.    public static void deleteLastResult(String path){  
47.        Configuration conf = new Configuration();  
48.        try {  
49.            FileSystem hdfs = FileSystem.get(conf);  
50.            Path path1 = new Path(path);  
51.            hdfs.delete(path1, true);  
52.        } catch (IOException e){  
53.            e.printStackTrace();  
54.        }  
55.    }  
56.    //计算相邻两次迭代结果的聚类中心的距离,判断是否满足终止条件  
57.    public static boolean isFinished(String oldpath, String newpath, int k, float threshold)  
58.    throws IOException{  
59.        List<ArrayList<Float>> oldcenters = Assistance.getCenters(oldpath);  
60.        List<ArrayList<Float>> newcenters = Assistance.getCenters(newpath);  
61.        float distance = 0;  
62.        for (int i = 0; i < k; ++i){  
63.            for (int j = 1; j < oldcenters.get(i).size(); ++j){  
64.                float tmp = Math.abs(oldcenters.get(i).get(j) - newcenters.get(i).get(j));  
65.                distance += Math.pow(tmp, 2);  
66.            }  
67.        }  
68.        System.out.println("Distance = " + distance + " Threshold = " + threshold);  
69.        if (distance < threshold)  
70.            return true;  
71.        /* 
72.        如果不满足终止条件,则用本次迭代的聚类中心更新聚类中心 
73.        */  
74.        Assistance.deleteLastResult(oldpath);  
75.        Configuration conf = new Configuration();  
76.        FileSystem hdfs = FileSystem.get(conf);  
77.        hdfs.copyToLocalFile(new Path(newpath), new Path("/home/hadoop/class/oldcenter.data"));  
78.        hdfs.delete(new Path(oldpath), true);  
79.        hdfs.moveFromLocalFile(new Path("/home/hadoop/class/oldcenter.data"), new Path(oldpath));  
80.        return false;  
81.    }  
82.}  




KMeansDriver.java 作业驱动类 



[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.conf.Configuration;  
04.import org.apache.hadoop.fs.FileSystem;  
05.import org.apache.hadoop.fs.Path;  
06.import org.apache.hadoop.io.IntWritable;  
07.import org.apache.hadoop.io.Text;  
08.import org.apache.hadoop.mapreduce.Job;  
09.import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
10.import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
11.import org.apache.hadoop.util.GenericOptionsParser;  
12.  
13.import java.io.IOException;  
14.  
15.public class KMeansDriver{  
16.    public static void main(String[] args) throws Exception{  
17.        int repeated = 0;  
18.  
19.        /* 
20.        不断提交MapReduce作业指导相邻两次迭代聚类中心的距离小于阈值或到达设定的迭代次数 
21.        */  
22.        do {  
23.            Configuration conf = new Configuration();  
24.            String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  
25.            if (otherArgs.length != 6){  
26.                System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  
27.                System.exit(2);  
28.            }  
29.            conf.set("centerpath", otherArgs[2]);  
30.            conf.set("kpath", otherArgs[4]);  
31.            Job job = new Job(conf, "KMeansCluster");//新建MapReduce作业  
32.            job.setJarByClass(KMeansDriver.class);//设置作业启动类  
33.  
34.            Path in = new Path(otherArgs[0]);  
35.            Path out = new Path(otherArgs[1]);  
36.            FileInputFormat.addInputPath(job, in);//设置输入路径  
37.            FileSystem fs = FileSystem.get(conf);  
38.            if (fs.exists(out)){//如果输出路径存在,则先删除之  
39.                fs.delete(out, true);  
40.            }  
41.            FileOutputFormat.setOutputPath(job, out);//设置输出路径  
42.  
43.            job.setMapperClass(KMeansMapper.class);//设置Map类  
44.            job.setReducerClass(KMeansReducer.class);//设置Reduce类  
45.  
46.            job.setOutputKeyClass(IntWritable.class);//设置输出键的类  
47.            job.setOutputValueClass(Text.class);//设置输出值的类  
48.  
49.            job.waitForCompletion(true);//启动作业  
50.  
51.            ++repeated;  
52.            System.out.println("We have repeated " + repeated + " times.");  
53.         } while (repeated < 10 && (Assistance.isFinished(args[2], args[3], Integer.parseInt(args[4]), Float.parseFloat(args[5])) == false));  
54.        //根据最终得到的聚类中心对数据集进行聚类  
55.        Cluster(args);  
56.    }  
57.    public static void Cluster(String[] args)  
58.            throws IOException, InterruptedException, ClassNotFoundException{  
59.        Configuration conf = new Configuration();  
60.        String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  
61.        if (otherArgs.length != 6){  
62.            System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  
63.            System.exit(2);  
64.        }  
65.        conf.set("centerpath", otherArgs[2]);  
66.        conf.set("kpath", otherArgs[4]);  
67.        Job job = new Job(conf, "KMeansCluster");  
68.        job.setJarByClass(KMeansDriver.class);  
69.  
70.        Path in = new Path(otherArgs[0]);  
71.        Path out = new Path(otherArgs[1]);  
72.        FileInputFormat.addInputPath(job, in);  
73.        FileSystem fs = FileSystem.get(conf);  
74.        if (fs.exists(out)){  
75.            fs.delete(out, true);  
76.        }  
77.        FileOutputFormat.setOutputPath(job, out);  
78.  
79.        //因为只是将样本点聚类,不需要reduce操作,故不设置Reduce类  
80.        job.setMapperClass(KMeansMapper.class);  
81.  
82.        job.setOutputKeyClass(IntWritable.class);  
83.        job.setOutputValueClass(Text.class);  
84.  
85.        job.waitForCompletion(true);  
86.    }  
87.}  

 KMeansMapper.java 




[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.io.IntWritable;  
04.import org.apache.hadoop.io.LongWritable;  
05.import org.apache.hadoop.io.Text;  
06.import org.apache.hadoop.mapreduce.Mapper;  
07.  
08.import java.io.IOException;  
09.import java.util.ArrayList;  
10.import java.util.List;  
11.  
12.public class KMeansMapper extends Mapper<Object, Text, IntWritable, Text> {  
13.    public void map(Object key, Text value, Context context)  
14.    throws IOException, InterruptedException{  
15.        String line = value.toString();  
16.        String[] fields = line.split(" ");  
17.        List<ArrayList<Float>> centers = Assistance.getCenters(context.getConfiguration().get("centerpath"));  
18.        int k = Integer.parseInt(context.getConfiguration().get("kpath"));  
19.        float minDist = Float.MAX_VALUE;  
20.        int centerIndex = k;  
21.        //计算样本点到各个中心的距离,并把样本聚类到距离最近的中心点所属的类  
22.        for (int i = 0; i < k; ++i){  
23.            float currentDist = 0;  
24.            for (int j = 0; j < fields.length; ++j){  
25.                float tmp = Math.abs(centers.get(i).get(j + 1) - Float.parseFloat(fields[j]));  
26.                currentDist += Math.pow(tmp, 2);  
27.            }  
28.            if (minDist > currentDist){  
29.                minDist = currentDist;  
30.                centerIndex = i;  
31.            }  
32.        }  
33.        context.write(new IntWritable(centerIndex), new Text(value));  
34.    }  
35.}  

 KMeansReducer.java




[java] view plain copy

 在CODE上查看代码片派生到我的代码片01.package KMeans;  
02.  
03.import org.apache.hadoop.io.IntWritable;  
04.import org.apache.hadoop.io.Text;  
05.import org.apache.hadoop.mapreduce.Reducer;  
06.  
07.import java.io.IOException;  
08.import java.util.ArrayList;  
09.import java.util.List;  
10.  
11.public class KMeansReducer extends Reducer<IntWritable, Text, IntWritable, Text> {  
12.    public void reduce(IntWritable key, Iterable<Text> value, Context context)  
13.    throws IOException, InterruptedException{  
14.        List<ArrayList<Float>> assistList = new ArrayList<ArrayList<Float>>();  
15.        String tmpResult = "";  
16.        for (Text val : value){  
17.            String line = val.toString();  
18.            String[] fields = line.split(" ");  
19.            List<Float> tmpList = new ArrayList<Float>();  
20.            for (int i = 0; i < fields.length; ++i){  
21.                tmpList.add(Float.parseFloat(fields[i]));  
22.            }  
23.            assistList.add((ArrayList<Float>) tmpList);  
24.        }  
25.        //计算新的聚类中心  
26.        for (int i = 0; i < assistList.get(0).size(); ++i){  
27.            float sum = 0;  
28.            for (int j = 0; j < assistList.size(); ++j){  
29.                sum += assistList.get(j).get(i);  
30.            }  
31.            float tmp = sum / assistList.size();  
32.            if (i == 0){  
33.                tmpResult += tmp;  
34.            }  
35.            else{  
36.                tmpResult += " " + tmp;  
37.            }  
38.        }  
39.        Text result = new Text(tmpResult);  
40.        context.write(key, result);  
41.    }  
42.}  

 作业运行情况:




[plain] view plain copy

 在CODE上查看代码片派生到我的代码片01.hadoop@shaobo-ThinkPad-E420:~/class$ hadoop jar KMeans.jar KMeans.KMeansDriver input/iris.data output input/oldcenter.data output/part-r-00000 3 0.0001  
02.14/04/17 16:15:50 INFO input.FileInputFormat: Total input paths to process : 1  
03.14/04/17 16:15:51 INFO mapred.JobClient: Running job: job_201404171511_0012  
04.14/04/17 16:15:52 INFO mapred.JobClient:  map 0% reduce 0%  
05.14/04/17 16:16:07 INFO mapred.JobClient:  map 100% reduce 0%  
06.14/04/17 16:16:19 INFO mapred.JobClient:  map 100% reduce 100%  
07.14/04/17 16:16:24 INFO mapred.JobClient: Job complete: job_201404171511_0012  
08.14/04/17 16:16:24 INFO mapred.JobClient: Counters: 25  
09.14/04/17 16:16:24 INFO mapred.JobClient:   Job Counters   
10.14/04/17 16:16:24 INFO mapred.JobClient:     Launched reduce tasks=1  
11.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12041  
12.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
13.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
14.14/04/17 16:16:24 INFO mapred.JobClient:     Launched map tasks=1  
15.14/04/17 16:16:24 INFO mapred.JobClient:     Data-local map tasks=1  
16.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10030  
17.14/04/17 16:16:24 INFO mapred.JobClient:   File Output Format Counters   
18.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Written=125  
19.14/04/17 16:16:24 INFO mapred.JobClient:   FileSystemCounters  
20.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
21.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_READ=11214  
22.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
23.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=125  
24.14/04/17 16:16:24 INFO mapred.JobClient:   File Input Format Counters   
25.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Read=2550  
26.14/04/17 16:16:24 INFO mapred.JobClient:   Map-Reduce Framework  
27.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input groups=3  
28.14/04/17 16:16:24 INFO mapred.JobClient:     Map output materialized bytes=3306  
29.14/04/17 16:16:24 INFO mapred.JobClient:     Combine output records=0  
30.14/04/17 16:16:24 INFO mapred.JobClient:     Map input records=150  
31.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce shuffle bytes=0  
32.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce output records=3  
33.14/04/17 16:16:24 INFO mapred.JobClient:     Spilled Records=300  
34.14/04/17 16:16:24 INFO mapred.JobClient:     Map output bytes=3000  
35.14/04/17 16:16:24 INFO mapred.JobClient:     Combine input records=0  
36.14/04/17 16:16:24 INFO mapred.JobClient:     Map output records=150  
37.14/04/17 16:16:24 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
38.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input records=150  
39.We have repeated 1 times.  
40.Distance = 0.35025704 Threshold = 1.0E-4  
41.14/04/17 16:16:24 INFO input.FileInputFormat: Total input paths to process : 1  
42.14/04/17 16:16:25 INFO mapred.JobClient: Running job: job_201404171511_0013  
43.14/04/17 16:16:26 INFO mapred.JobClient:  map 0% reduce 0%  
44.14/04/17 16:16:40 INFO mapred.JobClient:  map 100% reduce 0%  
45.14/04/17 16:16:52 INFO mapred.JobClient:  map 100% reduce 100%  
46.14/04/17 16:16:57 INFO mapred.JobClient: Job complete: job_201404171511_0013  
47.14/04/17 16:16:57 INFO mapred.JobClient: Counters: 25  
48.14/04/17 16:16:57 INFO mapred.JobClient:   Job Counters   
49.14/04/17 16:16:57 INFO mapred.JobClient:     Launched reduce tasks=1  
50.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12077  
51.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
52.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
53.14/04/17 16:16:57 INFO mapred.JobClient:     Launched map tasks=1  
54.14/04/17 16:16:57 INFO mapred.JobClient:     Data-local map tasks=1  
55.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10048  
56.14/04/17 16:16:57 INFO mapred.JobClient:   File Output Format Counters   
57.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Written=116  
58.14/04/17 16:16:57 INFO mapred.JobClient:   FileSystemCounters  
59.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
60.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_READ=21414  
61.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
62.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  
63.14/04/17 16:16:57 INFO mapred.JobClient:   File Input Format Counters   
64.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Read=2550  
65.14/04/17 16:16:57 INFO mapred.JobClient:   Map-Reduce Framework  
66.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input groups=3  
67.14/04/17 16:16:57 INFO mapred.JobClient:     Map output materialized bytes=3306  
68.14/04/17 16:16:57 INFO mapred.JobClient:     Combine output records=0  
69.14/04/17 16:16:57 INFO mapred.JobClient:     Map input records=150  
70.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce shuffle bytes=3306  
71.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce output records=3  
72.14/04/17 16:16:57 INFO mapred.JobClient:     Spilled Records=300  
73.14/04/17 16:16:57 INFO mapred.JobClient:     Map output bytes=3000  
74.14/04/17 16:16:57 INFO mapred.JobClient:     Combine input records=0  
75.14/04/17 16:16:57 INFO mapred.JobClient:     Map output records=150  
76.14/04/17 16:16:57 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
77.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input records=150  
78.We have repeated 2 times.  
79.Distance = 0.006297064 Threshold = 1.0E-4  
80.14/04/17 16:16:57 INFO input.FileInputFormat: Total input paths to process : 1  
81.14/04/17 16:16:58 INFO mapred.JobClient: Running job: job_201404171511_0014  
82.14/04/17 16:16:59 INFO mapred.JobClient:  map 0% reduce 0%  
83.14/04/17 16:17:14 INFO mapred.JobClient:  map 100% reduce 0%  
84.14/04/17 16:17:25 INFO mapred.JobClient:  map 100% reduce 100%  
85.14/04/17 16:17:30 INFO mapred.JobClient: Job complete: job_201404171511_0014  
86.14/04/17 16:17:30 INFO mapred.JobClient: Counters: 25  
87.14/04/17 16:17:30 INFO mapred.JobClient:   Job Counters   
88.14/04/17 16:17:30 INFO mapred.JobClient:     Launched reduce tasks=1  
89.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12046  
90.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
91.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
92.14/04/17 16:17:30 INFO mapred.JobClient:     Launched map tasks=1  
93.14/04/17 16:17:30 INFO mapred.JobClient:     Data-local map tasks=1  
94.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10051  
95.14/04/17 16:17:30 INFO mapred.JobClient:   File Output Format Counters   
96.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Written=116  
97.14/04/17 16:17:30 INFO mapred.JobClient:   FileSystemCounters  
98.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
99.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  
100.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  
101.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  
102.14/04/17 16:17:30 INFO mapred.JobClient:   File Input Format Counters   
103.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Read=2550  
104.14/04/17 16:17:30 INFO mapred.JobClient:   Map-Reduce Framework  
105.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input groups=3  
106.14/04/17 16:17:30 INFO mapred.JobClient:     Map output materialized bytes=3306  
107.14/04/17 16:17:30 INFO mapred.JobClient:     Combine output records=0  
108.14/04/17 16:17:30 INFO mapred.JobClient:     Map input records=150  
109.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce shuffle bytes=0  
110.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce output records=3  
111.14/04/17 16:17:30 INFO mapred.JobClient:     Spilled Records=300  
112.14/04/17 16:17:30 INFO mapred.JobClient:     Map output bytes=3000  
113.14/04/17 16:17:30 INFO mapred.JobClient:     Combine input records=0  
114.14/04/17 16:17:30 INFO mapred.JobClient:     Map output records=150  
115.14/04/17 16:17:30 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
116.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input records=150  
117.We have repeated 3 times.  
118.Distance = 0.0 Threshold = 1.0E-4  
119.14/04/17 16:17:30 INFO input.FileInputFormat: Total input paths to process : 1  
120.14/04/17 16:17:30 INFO mapred.JobClient: Running job: job_201404171511_0015  
121.14/04/17 16:17:31 INFO mapred.JobClient:  map 0% reduce 0%  
122.14/04/17 16:17:47 INFO mapred.JobClient:  map 100% reduce 0%  
123.14/04/17 16:17:59 INFO mapred.JobClient:  map 100% reduce 100%  
124.14/04/17 16:18:04 INFO mapred.JobClient: Job complete: job_201404171511_0015  
125.14/04/17 16:18:04 INFO mapred.JobClient: Counters: 25  
126.14/04/17 16:18:04 INFO mapred.JobClient:   Job Counters   
127.14/04/17 16:18:04 INFO mapred.JobClient:     Launched reduce tasks=1  
128.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12036  
129.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  
130.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  
131.14/04/17 16:18:04 INFO mapred.JobClient:     Launched map tasks=1  
132.14/04/17 16:18:04 INFO mapred.JobClient:     Data-local map tasks=1  
133.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10050  
134.14/04/17 16:18:04 INFO mapred.JobClient:   File Output Format Counters   
135.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Written=2700  
136.14/04/17 16:18:04 INFO mapred.JobClient:   FileSystemCounters  
137.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_READ=3306  
138.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  
139.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48717  
140.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=2700  
141.14/04/17 16:18:04 INFO mapred.JobClient:   File Input Format Counters   
142.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Read=2550  
143.14/04/17 16:18:04 INFO mapred.JobClient:   Map-Reduce Framework  
144.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input groups=3  
145.14/04/17 16:18:04 INFO mapred.JobClient:     Map output materialized bytes=3306  
146.14/04/17 16:18:04 INFO mapred.JobClient:     Combine output records=0  
147.14/04/17 16:18:04 INFO mapred.JobClient:     Map input records=150  
148.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce shuffle bytes=0  
149.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce output records=150  
150.14/04/17 16:18:04 INFO mapred.JobClient:     Spilled Records=300  
151.14/04/17 16:18:04 INFO mapred.JobClient:     Map output bytes=3000  
152.14/04/17 16:18:04 INFO mapred.JobClient:     Combine input records=0  
153.14/04/17 16:18:04 INFO mapred.JobClient:     Map output records=150  
154.14/04/17 16:18:04 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  
155.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input records=150  








      .
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值