Hadoop案例:Reduce join - 用户数据与订单数据

目录

一、输入——数据集

二、输出(形式同mysql的“join”-二表关联查询)

三、思路

四、实现

Maven依赖

Bean类——UserOrderBean 

Driver类——UserOrderDriver

五、结果


一、输入——数据集

1、user.txt(用户数据)

u001,senge,18,male,angelababy
u002,xiaoli,58,male,ruhua
u003,shuaishuai,16,female,chunge
u004,laoyang,28,female,zengge
u005,nana,24,female,huangbo
u006,dingding,19,male,taojiji

2、order1.txt(订单数据1)

order011,u001
order012,u001
order033,u005
order034,u002
order055,u003
order066,u004

3、order2.txt(订单数据2)

order001,u006
order002,u006
order003,u005
order004,u006
order005,u003
order006,u002

二、输出(形式同mysql的“join”-二表关联查询)

order011,u001,senge,18,male,angelababy
order012,u001,senge,18,male,angelababy
order033,u005,nana,24,female,huangbo
order034,u002,xiaoli,58,male,ruhua
order055,u003,shuaishuai,16,female,chunge
order066,u004,laoyang,28,female,zengge

三、思路

1、在mapper中,通过文件名判断文件类型,不同文件类型的数据分别处理,并设置标记,封装Bean对象输出;

2、以uid(用户数据文件的第一列、订单数据文件的第二列)为key,进行汇总,从mapper ——> reducer;

3、在reducer中,通过标记识别不同文件数据,将用户数据向订单数据中填充,并排序输出

四、实现

windows环境下,需要解压hadoop,并设置环境变量

Maven依赖

<properties>
        <hadoop.version>3.3.0</hadoop.version>
    </properties>

    <dependencies>
        <!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.75</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/cglib/cglib -->
        <dependency>
            <groupId>cglib</groupId>
            <artifactId>cglib</artifactId>
            <version>3.3.0</version>
        </dependency>

        <dependency>
            <groupId>org.wicketstuff</groupId>
            <artifactId>wicketstuff-springreference</artifactId>
            <version>9.0.1-M3</version>
        </dependency>


        <!--添加hdfs的客户端依赖-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
    </dependencies>

Bean类——UserOrderBean 

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class UserOrderBean implements Writable {
    private String orderId;
    private String uid;
    private String userName;
    private Integer age;
    private String sex;
    private String dishName;

    public UserOrderBean() {
    }

    public UserOrderBean(String orderId, String uid, String userName, Integer age, String sex, String dishName) {
        this.orderId = orderId;
        this.uid = uid;
        this.userName = userName;
        this.age = age;
        this.sex = sex;
        this.dishName = dishName;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(orderId);
        out.writeUTF(uid);
        out.writeUTF(userName);
        out.writeInt(age);
        out.writeUTF(sex);
        out.writeUTF(dishName);

    }

    @Override
    public void readFields(DataInput in) throws IOException {
        orderId = in.readUTF();
        uid = in.readUTF();
        userName = in.readUTF();
        age = in.readInt();
        sex = in.readUTF();
        dishName = in.readUTF();
    }

    @Override
    public String toString() {
        return orderId + "," + uid + "," + userName + "," + age + "," + sex + "," + dishName;
    }

    public String getOrderId() {
        return orderId;
    }

    public void setOrderId(String orderId) {
        this.orderId = orderId;
    }

    public String getUid() {
        return uid;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public Integer getAge() {
        return age;
    }

    public void setAge(Integer age) {
        this.age = age;
    }

    public String getSex() {
        return sex;
    }

    public void setSex(String sex) {
        this.sex = sex;
    }

    public String getDishName() {
        return dishName;
    }

    public void setDishName(String dishName) {
        this.dishName = dishName;
    }
}

Driver类——UserOrderDriver

import com.hermesfuxi.hdfs.application.userorder.domain.UserOrderBean;
import com.hermesfuxi.hdfs.utils.LoggerUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.springframework.cglib.beans.BeanCopier;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

public class UserOrderDriver {
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance();
        job.setJarByClass(UserOrderDriver.class);
        job.setNumReduceTasks(1);

        job.setMapperClass(UserOrderMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(UserOrderBean.class);

        job.setReducerClass(UserOrderReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.addInputPath(job, new Path("E:\\join\\input"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\join\\output"));

        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }

    private static class UserOrderMapper extends Mapper<LongWritable, Text, Text, UserOrderBean> {
        private String fileName;
        private final Text text = new Text();
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit fileSplit = (FileSplit) context.getInputSplit();
            fileName = fileSplit.getPath().getName();
        }
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] strings = value.toString().split(",");
            String uidKey;
            UserOrderBean userOrderBean;
            // 通过文件名区分用户信息与订单信息,并将用户信息Bean 的 orderId 设置为“userInfo”
            if (fileName.startsWith("user") && strings.length >= 5) {
                uidKey = strings[0];
                userOrderBean = new UserOrderBean("userInfo", strings[0], strings[1], Integer.parseInt(strings[2]), strings[3], strings[4]);
            } else if (strings.length >= 2) {
                uidKey = strings[1];
                // 订单信息中没有的属性,全部设置默认值(序列化与反序列化需要)
                userOrderBean = new UserOrderBean(strings[0], strings[1], "", 0, "", "");
            } else {
                LoggerUtils.error("数据或系统解析错误, data: " + value.toString());
                return;
            }
            // 使用 uid 为 key, 这样才能让组合
            text.set(uidKey);
            context.write(text, userOrderBean);
        }
    }

    private static class UserOrderReducer extends Reducer<Text, UserOrderBean, UserOrderBean, NullWritable> {
        private static final BeanCopier BEAN_COPIER = BeanCopier.create(UserOrderBean.class, UserOrderBean.class, false);

        @Override
        protected void reduce(Text key, Iterable<UserOrderBean> values, Context context) throws IOException, InterruptedException {
            List<UserOrderBean> userOrderBeanList = new ArrayList<>();
            String userName = null;
            Integer age = null;
            String sex = null;
            String dishName = null;
            for (UserOrderBean value : values) {
                // 相同的UID下,应该将 userInfo ——> orderInfo
                if ("userInfo".equals(value.getOrderId())) {
                    userName = value.getUserName();
                    age = value.getAge();
                    sex = value.getSex();
                    dishName = value.getDishName();
                } else {
                    // 使用Bean的深copy
                    UserOrderBean orderInfoBean = new UserOrderBean();
                    BEAN_COPIER.copy(value, orderInfoBean, null);
                    userOrderBeanList.add(orderInfoBean);
                }
            }
            if(StringUtils.isBlank(userName)){
                LoggerUtils.error("数据有误!!!!!!!!!!");
            }
            // 排序后直接输出
            userOrderBeanList.sort(Comparator.comparing(UserOrderBean::getUid));
            for (UserOrderBean userOrderBean : userOrderBeanList) {
                userOrderBean.setUserName(userName);
                userOrderBean.setAge(age);
                userOrderBean.setSex(sex);
                userOrderBean.setDishName(dishName);
                context.write(userOrderBean, NullWritable.get());
            }

        }
    }
}

五、结果

order012,u001,senge,18,male,angelababy
order011,u001,senge,18,male,angelababy
order006,u002,xiaoli,58,male,ruhua
order034,u002,xiaoli,58,male,ruhua
order055,u003,shuaishuai,16,female,chunge
order005,u003,shuaishuai,16,female,chunge
order066,u004,laoyang,28,female,zengge
order003,u005,nana,24,female,huangbo
order033,u005,nana,24,female,huangbo
order004,u006,dingding,19,male,taojiji
order002,u006,dingding,19,male,taojiji
order001,u006,dingding,19,male,taojiji

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值