环境:
CentOS 6.5, Eclipse 4.4.2, Hadoop 1.1.2
任务目标:从数据源统计ip第一位数字的记录列表
test2.txt
二、程序编写
CentOS 6.5, Eclipse 4.4.2, Hadoop 1.1.2
任务目标:从数据源统计ip第一位数字的记录列表
一、数据源准备
在hdfs://vm1:9000/user/hadoop/in目录中上传了两个数据文件,test1.txt和test2.txt
内容如下:
test1.txt
MAY 12:10:12 192.158.202 calvin
THR 11:22:23 192.168.22.3 james
THR 22:33:22 192.155.23.22 john
FRI 23:22:12 158.129.234.23 kate
LL
DDI 23:11:33 192.168.11.10 frame
test2.txt
EIG 12:10:12 192.158.202 calvin
OCT 11:22:23 192.168.22.3 james
NUM 22:33:22 192.155.23.22 john
SEC 23:22:12 158.129.234.23 kate
二、程序编写
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Test_2 extends Configured implements Tool {
<span style="white-space:pre"> </span>enum Counter {
<span style="white-space:pre"> </span>LINE_SKIP
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>public static class Map_2 extends Mapper<LongWritable, Text, Text, Text> {
<span style="white-space:pre"> </span>@Override
<span style="white-space:pre"> </span>protected void map(LongWritable key, Text value, Context context)
<span style="white-space:pre"> </span>throws IOException, InterruptedException {
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>String line = value.toString();
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>try {
<span style="white-space:pre"> </span>String[] lineSplits = line.split(" ");
<span style="white-space:pre"> </span>String ip = lineSplits[2];
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>String k = ip.split("\\.")[0];
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>context.write(new Text(k), new Text(ip));
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>} catch (Exception e) {
<span style="white-space:pre"> </span>e.printStackTrace();
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>context.getCounter(Counter.LINE_SKIP).increment(1);
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>public static class Reducer_2 extends Reducer<Text, Text, Text, Text> {
<span style="white-space:pre"> </span>@Override
<span style="white-space:pre"> </span>protected void reduce(Text key, Iterable<Text> values, Context context)
<span style="white-space:pre"> </span>throws IOException, InterruptedException {
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>String out = "";
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>for (Text value : values) {
<span style="white-space:pre"> </span>out += value.toString() + "|";
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>context.write(key, new Text(out));
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>@Override
<span style="white-space:pre"> </span>public int run(String[] args) throws Exception {
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>Configuration conf = this.getConf();
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>Job job = new Job(conf, "Test_2");
<span style="white-space:pre"> </span>job.setJarByClass(Test_2.class);
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>FileInputFormat.addInputPath(job, new Path(args[0]));
<span style="white-space:pre"> </span>FileOutputFormat.setOutputPath(job, new Path(args[1]));
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>job.setMapperClass(Map_2.class);
<span style="white-space:pre"> </span>job.setReducerClass(Reducer_2.class);
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>job.setOutputFormatClass(TextOutputFormat.class);
<span style="white-space:pre"> </span>job.setOutputKeyClass(Text.class);
<span style="white-space:pre"> </span>job.setOutputValueClass(Text.class);
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>job.waitForCompletion(true);
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>return job.isSuccessful() ? 1 : 0;
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>public static void main(String[] args) throws Exception {
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>int res = ToolRunner.run(new Configuration(), new Test_2(), args);
<span style="white-space:pre"> </span>System.exit(res);
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>}
}
三、在Eclipse中运行程序
Run As -> Run Configuration -> Arguments选项卡 -> 在Program Arguments中填入hdfs://vm1:9000/user/hadoop/in hdfs://vm1:9000/user/hadoop/out -> Run
四、运行结果
在hdfs://vm1:9000/user/hadoop/out/part-r-00000文件中查看结果
158<span style="white-space:pre"> </span>158.129.234.23|158.129.234.23|
192<span style="white-space:pre"> </span>192.158.202|192.168.22.3|192.155.23.22|192.168.11.10|192.158.202|192.168.22.3|192.155.23.22|
五、打包运行
1. 将工程打成jar包(hadoop-test.jar),并指定Main Class为Test_2
2. 执行命令 hadoop jar hadoop-test.jar hdfs://vm1:9000/user/hadoop/in hdfs://vm1:9000/user/hadoop/out 运行程序