一句sql就能解决的事,
package com.zz.bd; import org.apache.commons.beanutils.BeanUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; public class joinsele { // 偏移量 每一行的文本 商品id 多个项封装成自定义Bean static class joinMapper extends Mapper<LongWritable, Text, Text, InfoBean> { InfoBean bean = new InfoBean(); Text text = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); //两张表,不确定是那张表,所以根据分布获得路径获得name进行判断 FileSplit inputSplit = (FileSplit) context.getInputSplit(); String name = inputSplit.getPath().getName(); String pid = ""; if (name.startsWith("order")) { String[] fields = line.split(","); //implement writerable 序列化 反射 null 空惨调用报错 //所以加个默认值 pid = fields[2]; bean.set(Integer.parseInt(fields[0]), fields[1], pid, Integer.parseInt(fields[3]), "", 0, 0, "0"); } else { String[] fields = line.split(","); pid = fields[0]; bean.set(0, "", pid, 0, fields[1], Integer.parseInt(fields[2]), Float.parseFloat(fields[3]), "1"); } text.set(pid); context.write(text, bean); } } static class joinReduce extends Reducer<Text, InfoBean, InfoBean, NullWritable> { @Override protected void reduce(Text key, Iterable<InfoBean> values, Context context) throws IOException, InterruptedException { InfoBean pdbean = new InfoBean(); ArrayList<InfoBean> orderBeans = new ArrayList<>(); for (InfoBean bean : values) { if ("1".equals(bean.getFlag())) { try { BeanUtils.copyProperties(pdbean, bean); } catch (Exception e) { e.printStackTrace(); } } else { InfoBean odbean = new InfoBean(); try { //BeanUtils.copyProperties(bean, odbean); BeanUtils.copyProperties(odbean, bean); orderBeans.add(odbean); } catch (Exception e) { e.printStackTrace(); } } } //拼接两类数据形成最终结果 //select p_id pname from b1 inner join b2 on p_id=id for (InfoBean bean:orderBeans){ bean.setPname(pdbean.getPname()); bean.setCategory_id(pdbean.getCategory_id()); bean.setPrice(pdbean.getPrice()); context.write(bean, NullWritable.get()); } } } public static void main(String[] args) { } }
package com.zz.bd; import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class InfoBean implements Writable { private int order_id; private String dateString; private String p_id; private int amount; private String pname; private int category_id; private float price; @Override public String toString() { return "{" + "order_id=" + order_id + ", dateString='" + dateString + '\'' + ", p_id=" + p_id + ", amount=" + amount + ", pname='" + pname + '\'' + ", category_id=" + category_id + ", price=" + price + ", flag='" + flag + '\'' + '}'; } /* 封装标志位 为0 表示这个对象是封装订单表记录 为1 表示这个对象是封装产品信息记录 */ private String flag; public String getFlag() { return flag; } public void setFlag(String flag) { this.flag = flag; } public InfoBean() { } // public void set(int order_id, String dateString, String p_id, int amount, String pname, int category_id, float price, String flag) { this.order_id = order_id; this.dateString = dateString; this.p_id = p_id; this.amount = amount; this.pname = pname; this.category_id = category_id; this.price = price; this.flag = flag; } public int getOrder_id() { return order_id; } public void setOrder_id(int order_id) { this.order_id = order_id; } public String getDateString() { return dateString; } public void setDateString(String dateString) { this.dateString = dateString; } public String getP_id() { return p_id; } public void setP_id(String p_id) { this.p_id = p_id; } public int getAmount() { return amount; } public void setAmount(int amount) { this.amount = amount; } public String getPname() { return pname; } public void setPname(String pname) { this.pname = pname; } public int getCategory_id() { return category_id; } public void setCategory_id(int category_id) { this.category_id = category_id; } public float getPrice() { return price; } public void setPrice(float price) { this.price = price; } /* private int order_id; private String dateString; private int p_id; private int amount; private String pname; private int category_id; private float price; */ @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(order_id); dataOutput.writeUTF(dateString); dataOutput.writeUTF(p_id); dataOutput.writeInt(amount); dataOutput.writeUTF(pname); dataOutput.writeInt(category_id); dataOutput.writeFloat(price); dataOutput.writeUTF(flag); } @Override public void readFields(DataInput dataInput) throws IOException { this.order_id = dataInput.readInt(); this.dateString = dataInput.readUTF(); this.p_id = dataInput.readUTF(); this.amount = dataInput.readInt(); this.pname = dataInput.readUTF(); this.category_id = dataInput.readInt(); this.price = dataInput.readFloat(); this.flag = dataInput.readUTF(); } }