xt value, Context context)
076 | throws IOException, InterruptedException { |
078 | kv = value.toString().split( "\t" ); |
080 | if (userMap.containsKey(kv[ 0 ]) && sexMap.containsKey(kv[ 1 ])) { |
081 | oKey.set(userMap.get(kv[ 0 ]) + "\t" + sexMap.get(kv[ 1 ])); |
083 | context.write(oKey, oValue); |
089 | public static class Reduce extends Reducer<Text, Text, Text, Text> { |
091 | private Text oValue = new Text(); |
093 | public void reduce(Text key, Iterable<Text> values, Context context) |
094 | throws IOException, InterruptedException { |
097 | for (Text val : values) { |
098 | sumCount += Integer.parseInt(val.toString()); |
100 | oValue.set(String.valueOf(sumCount)); |
101 | context.write(key, oValue); |
106 | public int run(String[] args) throws Exception { |
107 | Job job = new Job(getConf(), "MultiTableJoin" ); |
109 | job.setJobName( "MultiTableJoin" ); |
110 | job.setJarByClass(MultiTableJoin. class ); |
111 | job.setMapperClass(MapClass. class ); |
112 | job.setReducerClass(Reduce. class ); |
114 | job.setInputFormatClass(TextInputFormat. class ); |
115 | job.setOutputFormatClass(TextOutputFormat. class ); |
117 | job.setOutputKeyClass(Text. class ); |
118 | job.setOutputValueClass(Text. class ); |
120 | String[] otherArgs = new GenericOptionsParser(job.getConfiguration(), |
121 | args).getRemainingArgs(); |
124 | DistributedCache.addCacheFile( new Path(otherArgs[ 1 ]).toUri(), job |
125 | .getConfiguration()); |
126 | DistributedCache.addCacheFile( new Path(otherArgs[ 2 ]).toUri(), job |
127 | .getConfiguration()); |
129 | FileInputFormat.addInputPath(job, new Path(otherArgs[ 3 ])); |
130 | FileOutputFormat.setOutputPath(job, new Path(otherArgs[ 4 ])); |
132 | return job.waitForCompletion( true ) ? 0 : 1 ; |
135 | public static void main(String[] args) throws Exception { |
136 | int res = ToolRunner.run( new Configuration(), new MultiTableJoin(), |
运行命令:
1 | hadoop jar MultiTableJoin.jar MultiTableJoin /test/decli/sex /test/decli/user /test/decli/login /test/decli/output |
4、结果:
运行结果:
root@master 192.168.120.236 02:47:18 ~/test/table >
hadoop fs -cat /test/decli/output/*|column -t
cat: File does not exist: /test/decli/output/_logs
张三 男 4
李四 男 2
王五 女 2
赵六 女 2
root@master 192.168.120.236 02:47:26 ~/test/table >
TIPS: 转自 http://my.oschina.net/leejun2005/blog/111963