MapReduce单表关联

由于书中给出的代码阅读性比较差..就自己稍微修改了下..

此处是child-parent表,要求输出grandchild-grandparent表

input file:

child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma

package me.river.study.hadoop.mr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class STjoin {
	private static boolean isFirst = true;

	public static class STMapper extends Mapper<LongWritable, Text, Text, Text> {
		private Text outKey = new Text();
		private Text outValue = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] line = value.toString().split("\\s");
			if (!"child".equals(line[0])) {
				String relationtype = null;
				String childname = line[0];
				String parentname = line[1];

				relationtype = "1"; // 左表 key的子女信息
				outKey.set(parentname);
				outValue.set(relationtype + "\t" + childname);
				context.write(outKey, outValue);

				relationtype = "2"; // 右表 key的父母信息
				outKey.set(childname);
				outValue.set(relationtype + "\t" + parentname);
				context.write(outKey, outValue);
			}
		}
	}

	public static class STReducer extends Reducer<Text, Text, Text, Text> {
		private Text outKey = new Text();
		private Text outValue = new Text();

		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			// 表头
			if (isFirst) {
				outKey.set("grandchild");
				outValue.set("grandparent");
				context.write(outKey, outValue);
				isFirst = false;
			}
			// 封装key的儿女(子孙)与父母(祖父母)
			List<String> grandchildren = new ArrayList<String>();
			List<String> grandparents = new ArrayList<String>();
			for (Text value : values) {
				String[] line = value.toString().split("\t");
				if ("1".equals(line[0])) {
					grandchildren.add(line[1]);
				} else {
					grandparents.add(line[1]);
				}
			}
			// 输出笛卡尔积--子孙与祖父母
			if (!grandchildren.isEmpty() && !grandparents.isEmpty()) {
				for (String grandchild : grandchildren) {
					for (String grandparent : grandparents) {
						outKey.set(grandchild);
						outValue.set(grandparent);
						context.write(outKey, outValue);
					}
				}
			}
		}
	}

	public static void main(String[] args) throws Exception {
		Job job = Job.getInstance(new Configuration(), "single table join");
		job.setJarByClass(STjoin.class);
		job.setMapperClass(STMapper.class);
		job.setReducerClass(STReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

输出结果:

grandchild      grandparent
Tom     Alice
Tom     Jesse
Jone    Alice
Jone    Jesse
Tom     Ben
Tom     Mary
Jone    Ben
Jone    Mary
Philip  Alice
Philip  Jesse
Mark    Alice
Mark    Jesse

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值