MapReduce入门案例——某东手机和电脑销售数据分析
简介
利用Python爬虫从平台爬取部分时段销售数据,按照一定格式存储在文本文档(input目录下的computerfinal.txt和Pone)中。通过MapReduce进行数据分析输出处理结果到output目录下part-r-00000文件(其余文件为crc校验信息等),之后将结果利用前端页面展示。
工程目录
数据源
数据量大概数万条,手机电脑分开存储。
电脑数据computerfinal.txt 部分数据如下
8999.00#--惠普(HP)暗影精灵9 Intel 16.1英寸游戏本 #--电脑(13代i9-13900HX RTX4060 16G 1TBSSD 2.5K 240Hz)#--惠普(HP)OMEN暗影精灵京东自营旗舰店#--1#--2023-5-12
9999.00#--联想(Lenovo)拯救者Y9000P 2023 英特尔酷睿i9 16英寸游戏#--电脑(13代i9-13900HX 16G 1T RTX4060 2.5k 240Hz高色域)#--联想京东自营旗舰店#--2#--2023-5-12
6999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿 16英寸电竞游戏#--电脑(13代i5-13500H 16G 1T RTX4050 2.5K 165Hz高色域)#--联想京东自营旗舰店#--3#--2023-5-12
6499.00#--机械革命(MECHREVO)极光Pro 15.6英寸游戏本 #--电脑(i7-12650H 16G 512G RTX4060 165HZ 2.5K屏)#--机械革命京东自营官方旗舰店#--4#--2023-5-12
7999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿i7 16英寸电竞游戏#--电脑(13代i7-13700H 16G 1T RTX4060 2.5K高刷高色域)#--联想京东自营旗舰店#--5#--2023-5-12
7199.00#--AppleMacBookAir【教育优惠】13.3 8核M1芯片(7核图形处理器) 8G 256G SSD 深空灰 #--电脑 MGN63CH/A#--Apple产品京东自营旗舰店#--6#--2023-5-12
7098.00#--联想拯救者R9000P 16英寸游戏#--电脑(8核16线程R7-6800H 16G 512G RTX3060 2.5k 165Hz高色域)灰#--联想京东自营旗舰店#--7#--2023-5-12
12999.00#--ROG枪神7 Plus 第13代英特尔酷睿i9 18英寸 星云屏 电竞游戏本#--电脑(i9-13980HX 液金导热 16G 1T RTX4060 2.5K 240Hz P3广色域)#--玩家国度ROG京东自营官方旗舰店#--8#--2023-5-12
手机数据Pone 部分数据如下
6899.00#--Apple iPhone 14 (A2884) 支持移动联通电信5G 双卡双待#-- 星光色 256G#--大盛魁手机数码专营店#--22#--2023-5-20
7899.00#--Apple iPhone 14 Pro (A2892) 128GB 暗紫色 支持移动联通电信5G 双卡双待#--【大王卡】#--中国联通京东自营旗舰店#--28#--2023-5-20
2298.00#--荣耀80 1.6亿像素超清主摄 AI Vlog视频大师 全新Magic OS 7.0系统 5G#-- 8GB+256GB 墨玉青#--荣耀京东自营旗舰店#--30#--2023-5-20
8719.00#--Apple 苹果 iPhone 14 Pro Max(A2896) 全网通5G#-- 深空黑色 全网通 256G#--零疆旗舰店#--34#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max (A2896) 256GB 暗紫色 支持移动联通电信5G 双卡双待#--【活动专享】#--中国移动京东自营官方旗舰店#--36#--2023-5-20
8899.00#--Apple iPhone 14 Pro (A2892) 支持移动联通电信5G 双卡双待#-- 银色 256G#--大盛魁手机数码专营店#--37#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max(A2896)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--40#--2023-5-20
7999.00#--Apple 苹果 iphone 14 pro 5G#-- 银色 256G#--创优数码专营店#--42#--2023-5-20
8654.00#--【12期俛息套餐可选】Apple 苹果 iPhone 14 Pro Max 5G 双卡双待#-- 深空黑色 256G#--头号卖家旗舰店#--44#--2023-5-20
8899.00#--Apple iPhone 14 Pro(A2892)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--45#--2023-5-20
核心MapReduce代码
-
HomemadeOrNot:分别输出该时间段内平台售出的国产手机和非国产手机数量
HomemadeOrNotDriver
package com.csx.HomemadeOrNot; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class HomemadeOrNotDriver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(HomemadeOrNotDriver.class); job.setMapperClass(HomemadeOrNotMapper.class); job.setReducerClass(HomemadeOrNotReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }
HomemadeOrNotMapper
package com.csx.HomemadeOrNot; import com.csx.Tools.Tools; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class HomemadeOrNotMapper extends Mapper<LongWritable,Text, Text, IntWritable> { private Text outKey = new Text(); private final static IntWritable outValue = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); String words[] = line.split("#--"); if (Tools.findBrand(words[1])) { outKey.set("国产"); } else { outKey.set("非国产"); } context.write(outKey, outValue); } }
HomemadeOrNotReducer
package com.csx.HomemadeOrNot; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class HomemadeOrNotReducer extends Reducer<Text, IntWritable,Text,IntWritable> { private IntWritable outV = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } outV.set(sum); context.write(key,outV); } }
输出结果:part-r-00000
国产 16914 非国产 24720
-
LaptopGPUdata:输出被分析数据源中不同型号主流显卡销售情况
LaptopGPUdataDriver
package com.csx.LaptopGPUdata; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class LaptopGPUdataDriver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(LaptopGPUdataDriver.class); job.setMapperClass(LaptopGPUdataMapper.class); job.setReducerClass(LaptopGPUdataReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }
LaptopGPUdataMapper
package com.csx.LaptopGPUdata; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class LaptopGPUdataMapper extends Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable outV = new IntWritable(1); private Text outK = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("#--"); if(words[2].contains("RTX4050") || words[2].contains("4050")){ outK.set("RTX4050"); } else if (words[2].contains("RTX4060") || words[2].contains("4060")) { outK.set("RTX4060"); } else if (words[2].contains("RTX4070") || words[2].contains("4070")) { outK.set("RTX4070"); } else if (words[2].contains("RTX4080") || words[2].contains("4080")) { outK.set("RTX4080"); } else if (words[2].contains("RTX4090") || words[2].contains("4090")) { outK.set("RTX4090"); } else if (words[2].contains("RTX3050") || words[2].contains("3050")) { outK.set("RTX3050"); } else if (words[2].contains("RTX3060") || words[2].contains("3060")) { outK.set("RTX3060"); } else if (words[2].contains("RTX3070") || words[2].contains("3070")) { outK.set("RTX3070"); } else if (words[2].contains("RTX3080") || words[2].contains("3080")) { outK.set("RTX3080"); } else if (words[2].contains("RTX3090") || words[2].contains("3090")) { outK.set("RTX3090"); }else { outK.set("其他"); } context.write(outK,outV); } }
LaptopGPUdataReducer
package com.csx.LaptopGPUdata; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class LaptopGPUdataReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable outV = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value:values) { sum += value.get(); } outV.set(sum); context.write(key,outV); } }
输出结果:part-r-00000
RTX3050 3865 RTX3060 1985 RTX3070 319 RTX3080 64 RTX4050 1494 RTX4060 4708 RTX4070 984 RTX4080 694 RTX4090 584 其他 26937
-
MostExpensiveLaptop:输出被分析时间段内笔记本电脑售价前10名的产品信息
MostExpensiveLaptopDriver
package com.csx.MostExpensiveLaptop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class MostExpensiveLaptopDriver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(MostExpensiveLaptopDriver.class); job.setMapperClass(MostExpensiveLaptopMapper.class); job.setReducerClass(MostExpensiveLaptopReducer.class); job.setMapOutputKeyClass(LaptopInfoBean.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LaptopInfoBean.class); FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }
LaptopInfoBean:自定义Bean对象
package com.csx.MostExpensiveLaptop; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class LaptopInfoBean implements Writable , WritableComparable<LaptopInfoBean> { private double price; private String information; public LaptopInfoBean() { } public LaptopInfoBean(double price, String information) { this.price = price; this.information = information; } public double getPrice() { return price; } public String getInformation() { return information; } public void setPrice(double price) { this.price = price; } public void setInformation(String information) { this.information = information; } @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeDouble(price); dataOutput.writeUTF(information); } @Override public void readFields(DataInput dataInput) throws IOException { this.price = dataInput.readDouble(); this.information = dataInput.readUTF(); } @Override public int compareTo(LaptopInfoBean o) { int result; // 按照总流量大小,倒序排列 if (this.price > o.price) { result = -1; }else if (this.price < o.price) { result = 1; }else { //如果总流量一样的话按照上行流量再排 result = 0; } return result; } @Override public String toString() { return information+"------------"+price; } }
MostExpensiveLaptopMapper
package com.csx.MostExpensiveLaptop; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; import java.util.Iterator; import java.util.TreeMap; public class MostExpensiveLaptopMapper extends Mapper<LongWritable, Text, LaptopInfoBean,Text> { LaptopInfoBean outK = new LaptopInfoBean(); private Text outV = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LaptopInfoBean, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("#--"); outK.setPrice(Double.parseDouble(words[0])); outK.setInformation(words[1]+words[2]); context.write(outK,outV); } }
MostExpensiveLaptopReducer
package com.csx.MostExpensiveLaptop; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Iterator; import java.util.TreeMap; public class MostExpensiveLaptopReducer extends Reducer<LaptopInfoBean, Text,Text,LaptopInfoBean> { private TreeMap<LaptopInfoBean,Text> tMap = new TreeMap<LaptopInfoBean, Text>(); @Override protected void reduce(LaptopInfoBean key, Iterable<Text> values, Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException { for (Text val:values) { LaptopInfoBean bean = new LaptopInfoBean(); bean.setInformation(key.getInformation()); bean.setPrice(key.getPrice()); tMap.put(bean,val); if(tMap.size()>10){ tMap.remove(tMap.lastKey()); } } } @Override protected void cleanup(Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException { Iterator<LaptopInfoBean> it =tMap.keySet().iterator(); LaptopInfoBean k; while (it.hasNext()){ k = it.next(); Text val = tMap.get(k); context.write(val,k); } } }
输出结果:part-r-00000
联想ThinkPad商用电脑P15 15.6英寸移动工作站(标配i9-11950H 64G 2T 独显16G A5000 Win11pro 4K屏)------------60999.0 联想电脑ThinkPad P1 2022(0FCD)16英寸高性能轻薄设计师工作站 i9-12900H 32G 2T A5500 600nit触摸4K------------54999.0 外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------51999.0 外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1996:i9 64G 2TB 4090标配 13代处理器 Cherry键盘------------49999.0 外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------49969.0 [新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------42999.0 雷蛇(Razer)[新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------41999.0 外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986Q:i9HX/4080/2.5K 包鼠套装 官方标配------------41098.0 ROG冰刃7 双屏 16英寸 设计师高性能 游戏本电脑(R9 7945HX 32G 1T RTX4090 2.5K 240Hz MiniLED)------------40999.0 外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986:标配加购包鼠套装 13代处理器 Cherry键盘------------40888.0
-
PhoneRAM:分析输出被分析时间段内各常见大小内存的手机售量
PhoneRAMDriver
package com.csx.PhoneRAM; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class PhoneRAMDriver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(PhoneRAMDriver.class); job.setMapperClass(PhoneRAMMapper.class); job.setReducerClass(PhoneRAMReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }
PhoneRAMMapper
package com.csx.PhoneRAM; import com.csx.Tools.Tools; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class PhoneRAMMapper extends Mapper <LongWritable, Text, Text, IntWritable>{ private final static IntWritable outV = new IntWritable(1); private Text outK = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("#--"); if("4G"==Tools.findPhoneRAM(words[1]) || "4G"==Tools.findPhoneRAM(words[2])){ outK.set("4G"); } else if ("6G"== Tools.findPhoneRAM(words[1]) || "6G"==Tools.findPhoneRAM(words[2])) { outK.set("6G"); } else if ("8G"==Tools.findPhoneRAM(words[1]) || "8G"==Tools.findPhoneRAM(words[2])) { outK.set("8G"); } else if ("16G"==Tools.findPhoneRAM(words[1]) || "16G"==Tools.findPhoneRAM(words[2])) { outK.set("16G"); } context.write(outK,outV); } }
PhoneRAMReducer
package com.csx.PhoneRAM; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class PhoneRAMReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable outV = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value:values) { sum += value.get(); } outV.set(sum); context.write(key,outV); } }
输出结果:part-r-00000
16G 32571 4G 3983 6G 240 8G 4840
-
MaxAndMin2:分析输出本分析时间段内,各主流手机品牌产品价格最低与最高值(数据爬取中不可避免地爬取到了一些脏数据,比如手机壳的销售信息,导致输出结果出现了很低的售价)
MaxAndMin2Bean
package com.csx.MaxAndMin2; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class MaxAndMin2Bean implements WritableComparable<MaxAndMin2Bean> { private int type;//类型表示品牌 private double min;//最低价格 private double max;//最高价格 public MaxAndMin2Bean(int type, double min, double max) { this.type = type; this.min = min; this.max = max; } public MaxAndMin2Bean() { } public int getType() { return type; } public double getMin() { return min; } public double getMax() { return max; } public void setType(int type) { this.type = type; } public void setMin(double min) { this.min = min; } public void setMax(double max) { this.max = max; } @Override public int compareTo(MaxAndMin2Bean o) { return 1; } @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(type); dataOutput.writeDouble(min); dataOutput.writeDouble(max); } @Override public void readFields(DataInput dataInput) throws IOException { type = dataInput.readInt(); min = dataInput.readDouble(); max = dataInput.readDouble(); } @Override public String toString() { String s = null; if (type == 0) s = "HUAWEI"; else if (type == 1) s = "Apple"; else if (type == 2) s = "OPPO"; else if (type == 3) s = "Redmi"; else if (type == 4) s = "荣耀"; else if (type == 5) s = "魅族"; else if (type == 6) s = "小米"; else if (type == 7) s = "三星"; else if (type == 8) s = "vivo"; else if (type == 9) s = "realme"; else if (type == 10) s = "其他"; return s + "\t" + min + "\t" + max + "\t"; } }
MaxAndMin2Driver
package com.csx.MaxAndMin2; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class MaxAndMin2Driver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(MaxAndMin2Driver.class); job.setMapperClass(MaxAndMin2Mapper.class); job.setReducerClass(MaxAndMin2Reducer.class); job.setMapOutputKeyClass(MaxAndMin2Bean.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(MaxAndMin2Bean.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路输出路径不可以存在 boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }
MaxAndMin2Mapper
package com.csx.MaxAndMin2; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class MaxAndMin2Mapper extends Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable> { private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE}; private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE}; int type; @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split("#--"); double price = Double.parseDouble(words[0]); if (words[1].contains("HUAWEI") || words[1].contains("华为")) { type = 0; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("Apple") || words[1].contains("iphone") || words[1].contains("IPHONE")) { type = 1; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("OPPO")) { type = 2; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("Redmi")) { type = 3; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("荣耀") || words[1].contains("HONOR")) { type = 4; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("魅族") || words[1].contains("MEIZU")) { type = 5; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("小米")) { type = 6; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("三星") || words[1].contains("SAMSUNG")) { type = 7; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("vivo")) { type = 8; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else if (words[1].contains("realme")) { type = 9; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; }else{ type = 10; if (price < min[type]) min[type] = price; if(price > max[type]) max[type] = price; } } @Override protected void cleanup(Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException { for (int i = 0; i <= 10; i ++ ) { MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]); context.write(keyOut, NullWritable.get()); } } }
MaxAndMin2Reducer
package com.csx.MaxAndMin2; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class MaxAndMin2Reducer extends Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable> { private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE}; private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE}; @Override protected void reduce(MaxAndMin2Bean key, Iterable<NullWritable> values, Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException { if (key.getMin() < min[key.getType()]) { min[key.getType()] = key.getMin(); } if (key.getMax() > max[key.getType()]) { max[key.getType()] = key.getMax(); } } @Override protected void cleanup(Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException { for (int i = 0; i <= 10; i ++ ) { MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]); context.write(keyOut, NullWritable.get()); } } }