Map/Reduce API样例二

环境:
CentOS 6.5, Eclipse 4.4.2, Hadoop 1.1.2


任务目标:从数据源统计ip第一位数字的记录列表


一、数据源准备

在hdfs://vm1:9000/user/hadoop/in目录中上传了两个数据文件,test1.txt和test2.txt

内容如下:

test1.txt

MAY 12:10:12 192.158.202 calvin
THR 11:22:23 192.168.22.3 james
THR 22:33:22 192.155.23.22 john
FRI 23:22:12 158.129.234.23 kate
LL 
DDI 23:11:33 192.168.11.10 frame

test2.txt

EIG 12:10:12 192.158.202 calvin
OCT 11:22:23 192.168.22.3 james
NUM 22:33:22 192.155.23.22 john
SEC 23:22:12 158.129.234.23 kate


二、程序编写

import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class Test_2 extends Configured implements Tool {


<span style="white-space:pre">	</span>enum Counter {
<span style="white-space:pre">		</span>LINE_SKIP
<span style="white-space:pre">	</span>}
<span style="white-space:pre">	</span>
<span style="white-space:pre">	</span>
<span style="white-space:pre">	</span>public static class Map_2 extends Mapper<LongWritable, Text, Text, Text> {


<span style="white-space:pre">		</span>@Override
<span style="white-space:pre">		</span>protected void map(LongWritable key, Text value, Context context)
<span style="white-space:pre">				</span>throws IOException, InterruptedException {
<span style="white-space:pre">			</span>
<span style="white-space:pre">			</span>String line = value.toString(); 
<span style="white-space:pre">			</span>
<span style="white-space:pre">			</span>try {
<span style="white-space:pre">				</span>String[] lineSplits = line.split(" ");
<span style="white-space:pre">				</span>String ip = lineSplits[2];
<span style="white-space:pre">				</span>
<span style="white-space:pre">				</span>String k = ip.split("\\.")[0];
<span style="white-space:pre">				</span>
<span style="white-space:pre">				</span>
<span style="white-space:pre">				</span>context.write(new Text(k), new Text(ip));
<span style="white-space:pre">				</span>
<span style="white-space:pre">			</span>} catch (Exception e) {
<span style="white-space:pre">				</span>e.printStackTrace();
<span style="white-space:pre">				</span>
<span style="white-space:pre">				</span>context.getCounter(Counter.LINE_SKIP).increment(1);
<span style="white-space:pre">			</span>}
<span style="white-space:pre">		</span>}
<span style="white-space:pre">		</span>
<span style="white-space:pre">	</span>}
<span style="white-space:pre">	</span>
<span style="white-space:pre">	</span>public static class Reducer_2 extends Reducer<Text, Text, Text, Text> {


<span style="white-space:pre">		</span>@Override
<span style="white-space:pre">		</span>protected void reduce(Text key, Iterable<Text> values, Context context)
<span style="white-space:pre">				</span>throws IOException, InterruptedException {
<span style="white-space:pre">			</span>
<span style="white-space:pre">			</span>String out = "";
<span style="white-space:pre">			</span>
<span style="white-space:pre">			</span>for (Text value : values) {
<span style="white-space:pre">				</span>out += value.toString() + "|";
<span style="white-space:pre">			</span>}
<span style="white-space:pre">			</span>
<span style="white-space:pre">			</span>context.write(key, new Text(out));
<span style="white-space:pre">		</span>}
<span style="white-space:pre">		</span>
<span style="white-space:pre">	</span>}
<span style="white-space:pre">	</span>
<span style="white-space:pre">	</span>@Override
<span style="white-space:pre">	</span>public int run(String[] args) throws Exception {
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>Configuration conf = this.getConf();
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>Job job = new Job(conf, "Test_2");
<span style="white-space:pre">		</span>job.setJarByClass(Test_2.class);
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>FileInputFormat.addInputPath(job, new Path(args[0]));
<span style="white-space:pre">		</span>FileOutputFormat.setOutputPath(job, new Path(args[1]));
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>job.setMapperClass(Map_2.class);
<span style="white-space:pre">		</span>job.setReducerClass(Reducer_2.class);
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>job.setOutputFormatClass(TextOutputFormat.class);
<span style="white-space:pre">		</span>job.setOutputKeyClass(Text.class);
<span style="white-space:pre">		</span>job.setOutputValueClass(Text.class);
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>job.waitForCompletion(true);
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>return job.isSuccessful() ? 1 : 0;
<span style="white-space:pre">	</span>}
<span style="white-space:pre">	</span>
<span style="white-space:pre">	</span>public static void main(String[] args) throws Exception {
<span style="white-space:pre">		</span>
<span style="white-space:pre">		</span>int res = ToolRunner.run(new Configuration(), new Test_2(), args);
<span style="white-space:pre">		</span>System.exit(res);
<span style="white-space:pre">		</span>
<span style="white-space:pre">	</span>}


}

三、在Eclipse中运行程序 

Run As -> Run Configuration -> Arguments选项卡 -> 在Program Arguments中填入hdfs://vm1:9000/user/hadoop/in hdfs://vm1:9000/user/hadoop/out  -> Run


四、运行结果

在hdfs://vm1:9000/user/hadoop/out/part-r-00000文件中查看结果

158<span style="white-space:pre">	</span>158.129.234.23|158.129.234.23|
192<span style="white-space:pre">	</span>192.158.202|192.168.22.3|192.155.23.22|192.168.11.10|192.158.202|192.168.22.3|192.155.23.22|


五、打包运行

1. 将工程打成jar包(hadoop-test.jar),并指定Main Class为Test_2

2. 执行命令 hadoop jar hadoop-test.jar hdfs://vm1:9000/user/hadoop/in hdfs://vm1:9000/user/hadoop/out 运行程序


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值