MapReduce通过多个Map运行不同格式文件

最新推荐文章于 2021-02-16 23:58:36 发布

亿万年前的星光

最新推荐文章于 2021-02-16 23:58:36 发布

阅读量636

点赞数

分类专栏： Hadoop 文章标签： MapReduce设置多个Map运行

本文链接：https://blog.csdn.net/weixin_41715878/article/details/84555239

版权

Hadoop 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

1.Bean

package com.oracle.join;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;

public class InfoBean implements Writable{
	private int order_id;
	private String dateString;
	private String p_id;
	private int amout;
	private String pname;
	private int category_id;
	private  float price;
	private String flag;
@Override
	public void readFields(DataInput in) throws IOException {
		this.order_id = in.readInt();
		this.dateString = in.readUTF();
		this.p_id = in.readUTF();
		this.amout = in.readInt();
		this.pname  =in.readUTF();
		this.category_id = in.readInt();
		this.price = in.readFloat();
		this.flag = in.readUTF();
	}
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(order_id);
		out.writeUTF(dateString);
		out.writeUTF(p_id);
		out.writeInt(amout);
		out.writeUTF(pname);
		out.writeInt(category_id);
		out.writeFloat(price);
		out.writeUTF(flag);	
	}
    。。。
}

2.Map
Map1：

package com.oracle.join;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class JoinMap1 extends Mapper<LongWritable, Text, Text, InfoBean>{
	private Text keys;
	private InfoBean values;
	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		String[] line = value.toString().split(" ");
		keys = new Text(line[2]);
		values = new InfoBean(Integer.parseInt(line[0]),line[1],line[2],Integer.parseInt(line[3]),"",0,0f,"0");
		context.write(keys, values);
	}
}

Map2：

package com.oracle.join;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class JoinMap2 extends Mapper<LongWritable, Text, Text, InfoBean>{
	private Text keys;
	private InfoBean values;
	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		String[] line = value.toString().split(" ");
		keys = new Text(line[0]);
		values = new InfoBean(0,"",line[0],0,line[1],Integer.parseInt(line[2]),Float.parseFloat(line[3]),"1");
		context.write(keys, values);
	}
}

3.Reduce

package com.oracle.join;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class JoinReduce extends Reducer<Text, InfoBean, Text, Text>{
	@Override
	protected void reduce(Text key, Iterable<InfoBean> values,Context context)
			throws IOException, InterruptedException {
		InfoBean infoOrder;
		InfoBean infoProduct = new InfoBean();
		List<InfoBean> list = new ArrayList<InfoBean>();
		for(InfoBean value:values){
			if(value.getFlag().equals("1")){
				try {
					BeanUtils.copyProperties(infoProduct, value);
				} catch (Exception e) {
					e.printStackTrace();
				}
			}else{
				try{
					infoOrder = new InfoBean();
					BeanUtils.copyProperties(infoOrder, value);
					list.add(infoOrder);
				}catch(Exception e){
					e.printStackTrace();
				}
			}
		}
		for(InfoBean val:list){
			val.setPname(infoProduct.getPname());
			val.setCategory_id(infoProduct.getCategory_id());
			val.setPrice(infoProduct.getPrice());
			context.write(key, new Text(val.toString()));
		}
	}
}

4.main

package com.oracle.join;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class JoinMain implements Tool {
	private Configuration conf;
	@Override
	public Configuration getConf() {
		conf = new Configuration();
		return conf;
	}
	@Override
	public void setConf(Configuration conf) {
		this.conf = conf;
	}
	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = new JoinMain().getConf();
		Job job = Job.getInstance(conf);
		job.setJarByClass(JoinMain.class);
		job.setJobName("JobMain");
		//不再设置普通Mapl类
		/*job.setMapperClass(JoinMap.class);*/
		MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, JoinMap1.class);
		MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, JoinMap2.class);
		job.setReducerClass(JoinReduce.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(InfoBean.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		/*FileInputFormat.addInputPath(job, new Path(args[0]));*/
		FileOutputFormat.setOutputPath(job, new Path(args[2]));
		return job.waitForCompletion(true)?0:1;
	}
	public static void main(String[] args) throws Exception {
		int rel = ToolRunner.run(new JoinMain(), args);
		System.exit(rel);
	}
	
}

5.运行命令
hadoop jar *.jar /data/maps/input1 /data/maps/input2 /data/maps/output