(一)MapJoinMapper
代码
package cn.china.kb23.demo4; import cn.china.kb23.demo3.CustomerOrders; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.util.ArrayList; /* 【缓存信息】 */ public class MapJoinMapper extends Mapper<LongWritable, Text, CustomerOrders, NullWritable> { //私有的 private ArrayList<CustomerOrders> list=new ArrayList<CustomerOrders>(); //添加setup和map方法 @Override //setup获取顾客信息 protected void setup(Context context) throws IOException, InterruptedException { URI[] cacheFiles = context.getCacheFiles(); for (URI uri :cacheFiles) { System.out.println(uri.getPath()); String currentFileName = new Path(uri).getName(); if (currentFileName.startsWith("customers")){ String path=uri.getPath(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path))); String line; while ((line=br.readLine())!=null){ String[] fields = line.split(","); CustomerOrders customerOrders=new CustomerOrders(Integer.parseInt(fields[0]), fields[1]+fields[2],0,"",""); list.add(customerOrders);//添加 } } } } @Override //map阶段已经获得所有顾客的信息 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //订单信息获取分割 String[] orderFields = value.toString().split(","); //获取客户id,转换 int custmerId = Integer.parseInt(orderFields[2]); //定义customerOrders CustomerOrders customerOrder = new CustomerOrders(); // CustomerOrders crrentCustomer = new CustomerOrders(); //判断如果顾客id和订单中id相等 for (CustomerOrders customer : list) { if (custmerId==customer.getCustomerId()){ customerOrder=customer; } } //------------------------------------塞值 //客户名称需要判断是否为空 if(customerOrder!=null){ //不为空,赋值 customerOrder.setCustomerName(customerOrder.getCustomerName()); }else { //为空,输出空值 customerOrder.setCustomerName(""); } //订单状态 customerOrder.setOrderStatus(orderFields[3]); //订单id customerOrder.setOrderId(Integer.parseInt(orderFields[0])); //客户id customerOrder.setCustomerId(Integer.parseInt(orderFields[2])); //信息表 customerOrder.setFlag("1"); //输出 context.write(customerOrder,NullWritable.get()); } }
(二)MapJoinDriver
代码
package cn.china.kb23.demo4; import cn.china.kb23.demo3.CustomerOrders; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /* 【缓存信息】 */ public class MapJoinDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //启动开始时间程序 long start = System.currentTimeMillis(); //1 Configuration conf = new Configuration(); //2 Job job = Job.getInstance(conf); //3 job.setJarByClass(MapJoinDriver.class); //4 job.setMapperClass(MapJoinMapper.class); job.setMapOutputKeyClass(CustomerOrders.class); job.setMapOutputValueClass(NullWritable.class); //5输入路径 Path inPath = new Path("E:\\springboot\\kb23\\in\\demo3\\orders.csv"); FileInputFormat.setInputPaths(job, inPath); //6输出路径 Path outpath = new Path("E:\\springboot\\kb23\\out\\outdemo4"); FileSystem fs=FileSystem.get(outpath.toUri(),conf); //7判断如果存在删除 if(fs.exists(outpath)){ fs.delete(outpath,true); } FileOutputFormat.setOutputPath(job,outpath); //8设置job Reduce阶段任务数量 job.setNumReduceTasks(0); //9添加缓存 Path cachepath = new Path("E:\\springboot\\kb23\\in\\demo3\\customers.csv"); job.addCacheFile(cachepath.toUri());//缓存空间不大,所以数据不能太大!!!!!!! //10输出 job.waitForCompletion(true); //启动结束时间程序 long end = System.currentTimeMillis(); //输出运行时间 System.out.println("运行时间:"+(end-start)); } }