java操作Mapreduce实现reducejoin(CustomerOrder业务)

数据下载链接: https://pan.baidu.com/s/1hN1HBr_JPj0MdO3mEZLF7w
提取码: xgxm

CustomerOrders类

package reduceJointest;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @Author Bright
 * @Date 2020/12/3
 * @Description
 */
public class CustomerOrders implements Writable {
    //客户 id
    private String customerId;
    //订单 id
    private String orderId ;
    //客户名称
    private String customerName;
    //订单状态
    private String orderStatus;
    //标志
    private int flag;

    public CustomerOrders() {
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(customerId);
        dataOutput.writeUTF(customerName);
        dataOutput.writeUTF(orderId);
        dataOutput.writeUTF(orderStatus);
        dataOutput.writeInt(flag);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.customerId=dataInput.readUTF();
        this.customerName=dataInput.readUTF();
        this.orderId=dataInput.readUTF();
        this.flag=dataInput.readInt();
    }

    @Override
    public String toString() {
        return orderId + "\t" + customerName + "\t" + orderStatus;
    }

    public String getCustomerId() {
        return customerId;
    }

    public void setCustomerId(String customerId) {
        this.customerId = customerId;
    }

    public String getOrderId() {
        return orderId;
    }

    public void setOrderId(String orderId) {
        this.orderId = orderId;
    }

    public String getCustomerName() {
        return customerName;
    }

    public void setCustomerName(String customerName) {
        this.customerName = customerName;
    }

    public String getOrderStatus() {
        return orderStatus;
    }

    public void setOrderStatus(String orderStatus) {
        this.orderStatus = orderStatus;
    }

    public int getFlag() {
        return flag;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }
}

CustomerOrderMapper类

package reduceJointest;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

/**
 * @Author Bright
 * @Date 2020/12/3
 * @Description
 */
//输出key customer_id customerOrders
public class CustomerOrderMapper extends Mapper<LongWritable, Text,Text,CustomerOrders> {
     private String name;
     private CustomerOrders customerOrders = new CustomerOrders();
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        //判断一次name
        FileSplit fileSplit = (FileSplit) context.getInputSplit();
        name = fileSplit.getPath().getName();
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(",");
        if(name.startsWith("order")) {
            //"4712 "2013-08-23 00:00:00""
            customerOrders.setCustomerId(split[2]);
            customerOrders.setOrderId(split[0]);
            customerOrders.setOrderStatus(split[3]);
            customerOrders.setFlag(1);
            customerOrders.setCustomerName("");
        }else {
            customerOrders.setCustomerId(split[0]);
            customerOrders.setCustomerName(split[1]);
            customerOrders.setFlag(0);
            customerOrders.setOrderId("");
            customerOrders.setOrderStatus("");
        }
        context.write(new Text(customerOrders.getCustomerId()),customerOrders);
    }
}

CustomerOrderReducer类

package reduceJointest;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;

/**
 * @Author Bright
 * @Date 2020/12/3
 * @Description
 */
public class CustomerOrderReducer extends Reducer<Text,CustomerOrders,CustomerOrders, NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<CustomerOrders> values, Context context) throws IOException, InterruptedException {
        //key  customer_id  customerOrders
        // order customer
        ArrayList<CustomerOrders> orderBeans = new ArrayList<>();
        CustomerOrders cusBean = new CustomerOrders();

        for (CustomerOrders bean : values) {
            //判断是否属于订单表
            if(1==bean.getFlag()) {
                //把订单对象放到集合里面
                CustomerOrders orderbean = new CustomerOrders();
                try {
                    BeanUtils.copyProperties(orderbean,bean);
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }
                orderBeans.add(orderbean);
            } else {
                //customer表
                try {
                    BeanUtils.copyProperties(cusBean,bean);
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }
            }
        }
        for (CustomerOrders orderBean : orderBeans) {
            orderBean.setCustomerName(cusBean.getCustomerName());
            context.write(orderBean,NullWritable.get());
        }

    }
}

CustomerOrderDriver类

package reduceJointest;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Author Bright
 * @Date 2020/12/3
 * @Description
 */
public class CustomerOrderDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 1 获取配置信息,或者 job 对象实例
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
// 2 指定本程序的 jar 包所在的本地路径
        job.setJarByClass(CustomerOrderDriver.class);
// 3 指定本业务 job 要使用的 mapper/Reducer 业务类
        job.setMapperClass(CustomerOrderMapper.class);
        job.setReducerClass(CustomerOrderReducer.class);
// 4 指定 mapper 输出数据的 kv 类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(CustomerOrders.class);
// 5 指定最终输出的数据的 kv 类型
        job.setOutputKeyClass(CustomerOrders.class);
        job.setOutputValueClass(NullWritable.class);
// 6 指定 job 的输入原始文件所在目录
        FileInputFormat.setInputPaths(job, new Path("D:\\scalastu\\kb09\\hadoop_day1201\\data\\join\\reducejoin\\input"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\scalastu\\kb09\\hadoop_day1201\\data\\join\\reducejoin\\output"));
// 7 将 job 中配置的相关参数,以及 job 所用的 java 类所在的 jar 包,
        //提交给 yarn 去运行
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }
}

java——hadoop——Maven依赖

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>RELEASE</version>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-core</artifactId>
      <version>2.8.2</version>
    </dependency>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-log4j12</artifactId>
      <version>1.7.21</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.6.0</version>
    </dependency>
  <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.6.0</version>
  </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>2.6.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
      <version>2.6.0</version>
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-auth</artifactId>
      <version>2.6.0</version>
    </dependency>
  </dependencies>

  <build>
    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
      <plugins>
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
        <plugin>
          <artifactId>maven-clean-plugin</artifactId>
          <version>3.1.0</version>
        </plugin>
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
        <plugin>
          <artifactId>maven-resources-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-compiler-plugin</artifactId>
          <version>3.8.0</version>
          <configuration>
            <source>1.8</source>
            <target>1.8</target>
          </configuration>
        </plugin>
        <plugin>
          <artifactId>maven-surefire-plugin</artifactId>
          <version>2.22.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-jar-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-install-plugin</artifactId>
          <version>2.5.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-deploy-plugin</artifactId>
          <version>2.8.2</version>
        </plugin>
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
        <plugin>
          <artifactId>maven-site-plugin</artifactId>
          <version>3.7.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-project-info-reports-plugin</artifactId>
          <version>3.0.0</version>
        </plugin>
        <plugin>
          <artifactId>maven-assembly-plugin</artifactId>
          <executions>
            <execution>
              <phase>package</phase>
              <goals>
                <goal>single</goal>
              </goals>
            </execution>
          </executions>
          <configuration>
            <descriptorRefs>
              <descriptorRef>jar-with-dependencies</descriptorRef>
            </descriptorRefs>
          </configuration>
        </plugin>
      </plugins>
    </pluginManagement>
  </build>
</project>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值