java操作Mapreduce实现reducejoin
数据下载链接: https://pan.baidu.com/s/1hN1HBr_JPj0MdO3mEZLF7w
提取码: xgxm
CustomerOrders类
package reduceJointest;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @Author Bright
* @Date 2020/12/3
* @Description
*/
public class CustomerOrders implements Writable {
//客户 id
private String customerId;
//订单 id
private String orderId ;
//客户名称
private String customerName;
//订单状态
private String orderStatus;
//标志
private int flag;
public CustomerOrders() {
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(customerId);
dataOutput.writeUTF(customerName);
dataOutput.writeUTF(orderId);
dataOutput.writeUTF(orderStatus);
dataOutput.writeInt(flag);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.customerId=dataInput.readUTF();
this.customerName=dataInput.readUTF();
this.orderId=dataInput.readUTF();
this.flag=dataInput.readInt();
}
@Override
public String toString() {
return orderId + "\t" + customerName + "\t" + orderStatus;
}
public String getCustomerId() {
return customerId;
}
public void setCustomerId(String customerId) {
this.customerId = customerId;
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public String getCustomerName() {
return customerName;
}
public void setCustomerName(String customerName) {
this.customerName = customerName;
}
public String getOrderStatus() {
return orderStatus;
}
public void setOrderStatus(String orderStatus) {
this.orderStatus = orderStatus;
}
public int getFlag() {
return flag;
}
public void setFlag(int flag) {
this.flag = flag;
}
}
CustomerOrderMapper类
package reduceJointest;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Author Bright
* @Date 2020/12/3
* @Description
*/
//输出key customer_id customerOrders
public class CustomerOrderMapper extends Mapper<LongWritable, Text,Text,CustomerOrders> {
private String name;
private CustomerOrders customerOrders = new CustomerOrders();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
//判断一次name
FileSplit fileSplit = (FileSplit) context.getInputSplit();
name = fileSplit.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(",");
if(name.startsWith("order")) {
//"4712 "2013-08-23 00:00:00""
customerOrders.setCustomerId(split[2]);
customerOrders.setOrderId(split[0]);
customerOrders.setOrderStatus(split[3]);
customerOrders.setFlag(1);
customerOrders.setCustomerName("");
}else {
customerOrders.setCustomerId(split[0]);
customerOrders.setCustomerName(split[1]);
customerOrders.setFlag(0);
customerOrders.setOrderId("");
customerOrders.setOrderStatus("");
}
context.write(new Text(customerOrders.getCustomerId()),customerOrders);
}
}
CustomerOrderReducer类
package reduceJointest;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
/**
* @Author Bright
* @Date 2020/12/3
* @Description
*/
public class CustomerOrderReducer extends Reducer<Text,CustomerOrders,CustomerOrders, NullWritable> {
@Override
protected void reduce(Text key, Iterable<CustomerOrders> values, Context context) throws IOException, InterruptedException {
//key customer_id customerOrders
// order customer
ArrayList<CustomerOrders> orderBeans = new ArrayList<>();
CustomerOrders cusBean = new CustomerOrders();
for (CustomerOrders bean : values) {
//判断是否属于订单表
if(1==bean.getFlag()) {
//把订单对象放到集合里面
CustomerOrders orderbean = new CustomerOrders();
try {
BeanUtils.copyProperties(orderbean,bean);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
orderBeans.add(orderbean);
} else {
//customer表
try {
BeanUtils.copyProperties(cusBean,bean);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
}
}
for (CustomerOrders orderBean : orderBeans) {
orderBean.setCustomerName(cusBean.getCustomerName());
context.write(orderBean,NullWritable.get());
}
}
}
CustomerOrderDriver类
package reduceJointest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @Author Bright
* @Date 2020/12/3
* @Description
*/
public class CustomerOrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1 获取配置信息,或者 job 对象实例
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
// 2 指定本程序的 jar 包所在的本地路径
job.setJarByClass(CustomerOrderDriver.class);
// 3 指定本业务 job 要使用的 mapper/Reducer 业务类
job.setMapperClass(CustomerOrderMapper.class);
job.setReducerClass(CustomerOrderReducer.class);
// 4 指定 mapper 输出数据的 kv 类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(CustomerOrders.class);
// 5 指定最终输出的数据的 kv 类型
job.setOutputKeyClass(CustomerOrders.class);
job.setOutputValueClass(NullWritable.class);
// 6 指定 job 的输入原始文件所在目录
FileInputFormat.setInputPaths(job, new Path("D:\\scalastu\\kb09\\hadoop_day1201\\data\\join\\reducejoin\\input"));
FileOutputFormat.setOutputPath(job, new Path("D:\\scalastu\\kb09\\hadoop_day1201\\data\\join\\reducejoin\\output"));
// 7 将 job 中配置的相关参数,以及 job 所用的 java 类所在的 jar 包,
//提交给 yarn 去运行
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
java——hadoop——Maven依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>