package com.jym.hadoop.mr.rjoin; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import org.apache.commons.beanutils.BeanUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class RJoin { static class RJoinMapper extends Mapper<LongWritable, Text, Text, InfoBean> { InfoBean bean = new InfoBean(); Text k = new Text(); @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String line = value.toString(); FileSplit inputSplit = (FileSplit) context.getInputSplit(); String name = inputSplit.getPath().getName(); String pid = ""; //通过文件名判断是哪种数据,在进行数据切分操作 if (name.startsWith("order")) { String[] fields = line.split(","); pid = fields[2]; //id date pid amount bean.set(Integer.parseInt(fields[0]),fields[1], fields[2], Integer.parseInt(fields[3]), "", 0, 0, "0"); }else { String[] fields = line.split(","); pid = fields[0]; bean.set(0,"",pid,0,fields[1],Integer.parseInt(fields[2]),Float.parseFloat(fields[3]),"1"); } k.set(pid); context.write(k, bean); } } static class RJoinReducer extends Reducer<Text, InfoBean, InfoBean, NullWritable> { InfoBean pdBean = new InfoBean(); ArrayList<InfoBean> orderBeans = new ArrayList<>(); @Override protected void reduce(Text pid, Iterable<InfoBean> beans,Context context) throws IOException, InterruptedException { for(InfoBean bean : beans) { if ("1".equals(bean.getFlag())) { try { BeanUtils.copyProperties(pdBean,bean); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (InvocationTargetException e) { e.printStackTrace(); } }else { InfoBean odBean = new InfoBean(); try { BeanUtils.copyProperties(odBean,bean); orderBeans.add(odBean); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (InvocationTargetException e) { e.printStackTrace(); } } } //拼接两类数据形成最终结果 for(InfoBean bean:orderBeans) { bean.setPname(pdBean.getPname()); bean.setCategory_id(pdBean.getCategory_id()); bean.setPrice(pdBean.getPrice()); context.write(bean, NullWritable.get()); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); Job job = Job.getInstance(conf); //指定本程序的jar包所在的本地路径 job.setJarByClass(RJoin.class); //指定本业务job要使用的mapper/reducer业务类 job.setMapperClass(RJoinMapper.class); job.setReducerClass(RJoinReducer.class); //指定mapper输出数据的kv类型; job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(InfoBean.class); //指定最终输出的数据的kv类型 job.setOutputKeyClass(InfoBean.class); job.setOutputValueClass(NullWritable.class); //指定job的输入原始文件所在目录 //FileInputFormat.setInputPaths(job, new Path(args[0])); //传一个路径参数 FileInputFormat.setInputPaths(job, new Path("G:/wordcount/mapreduce/input")); //传一个路径 //指定job的输出结果所在目录 //FileOutputFormat.setOutputPath(job, new Path(args[1])); //传一个参数进来作为输出的路径参数 FileOutputFormat.setOutputPath(job, new Path("G:/wordcount/mapreduce/output")); //传一个参数进来作为输出的路径参数 //将job中配置的相关参数,以及job所用的Java类所在的jar包,提交给yarn去运行; boolean res = job.waitForCompletion(true); System.exit(res?0:1); } } |