## MapReduce入门案例——某东手机和电脑销售数据分析

MapReduce入门案例——某东手机和电脑销售数据分析

简介

利用Python爬虫从平台爬取部分时段销售数据,按照一定格式存储在文本文档(input目录下的computerfinal.txt和Pone)中。通过MapReduce进行数据分析输出处理结果到output目录下part-r-00000文件(其余文件为crc校验信息等),之后将结果利用前端页面展示。

工程目录

在这里插入图片描述

数据源

数据量大概数万条,手机电脑分开存储。

电脑数据computerfinal.txt 部分数据如下

8999.00#--惠普(HP)暗影精灵9 Intel 16.1英寸游戏本 #--电脑(13代i9-13900HX RTX4060 16G 1TBSSD 2.5K 240Hz)#--惠普(HP)OMEN暗影精灵京东自营旗舰店#--1#--2023-5-12
9999.00#--联想(Lenovo)拯救者Y9000P 2023 英特尔酷睿i9 16英寸游戏#--电脑(13代i9-13900HX 16G 1T RTX4060 2.5k 240Hz高色域)#--联想京东自营旗舰店#--2#--2023-5-12
6999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿 16英寸电竞游戏#--电脑(13代i5-13500H 16G 1T RTX4050 2.5K 165Hz高色域)#--联想京东自营旗舰店#--3#--2023-5-12
6499.00#--机械革命(MECHREVO)极光Pro 15.6英寸游戏本 #--电脑(i7-12650H 16G 512G RTX4060 165HZ 2.5K屏)#--机械革命京东自营官方旗舰店#--4#--2023-5-12
7999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿i7 16英寸电竞游戏#--电脑(13代i7-13700H 16G 1T RTX4060 2.5K高刷高色域)#--联想京东自营旗舰店#--5#--2023-5-12
7199.00#--AppleMacBookAir【教育优惠】13.3 8核M1芯片(7核图形处理器) 8G 256G SSD 深空灰 #--电脑 MGN63CH/A#--Apple产品京东自营旗舰店#--6#--2023-5-12
7098.00#--联想拯救者R9000P 16英寸游戏#--电脑(8核16线程R7-6800H 16G 512G RTX3060 2.5k 165Hz高色域)灰#--联想京东自营旗舰店#--7#--2023-5-12
12999.00#--ROG枪神7 Plus 第13代英特尔酷睿i9 18英寸 星云屏 电竞游戏本#--电脑(i9-13980HX 液金导热 16G 1T RTX4060 2.5K 240Hz P3广色域)#--玩家国度ROG京东自营官方旗舰店#--8#--2023-5-12

手机数据Pone 部分数据如下

6899.00#--Apple iPhone 14 (A2884) 支持移动联通电信5G 双卡双待#-- 星光色 256G#--大盛魁手机数码专营店#--22#--2023-5-20
7899.00#--Apple iPhone 14 Pro (A2892) 128GB 暗紫色 支持移动联通电信5G 双卡双待#--【大王卡】#--中国联通京东自营旗舰店#--28#--2023-5-20
2298.00#--荣耀80 1.6亿像素超清主摄 AI Vlog视频大师 全新Magic OS 7.0系统 5G#-- 8GB+256GB 墨玉青#--荣耀京东自营旗舰店#--30#--2023-5-20
8719.00#--Apple 苹果 iPhone 14 Pro Max(A2896) 全网通5G#-- 深空黑色 全网通 256G#--零疆旗舰店#--34#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max (A2896) 256GB 暗紫色 支持移动联通电信5G 双卡双待#--【活动专享】#--中国移动京东自营官方旗舰店#--36#--2023-5-20
8899.00#--Apple iPhone 14 Pro (A2892) 支持移动联通电信5G 双卡双待#-- 银色 256G#--大盛魁手机数码专营店#--37#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max(A2896)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--40#--2023-5-20
7999.00#--Apple 苹果 iphone 14 pro 5G#-- 银色 256G#--创优数码专营店#--42#--2023-5-20
8654.00#--【12期俛息套餐可选】Apple 苹果 iPhone 14 Pro Max 5G 双卡双待#-- 深空黑色 256G#--头号卖家旗舰店#--44#--2023-5-20
8899.00#--Apple iPhone 14 Pro(A2892)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--45#--2023-5-20

核心MapReduce代码

  • HomemadeOrNot:分别输出该时间段内平台售出的国产手机和非国产手机数量

    HomemadeOrNotDriver

    package com.csx.HomemadeOrNot;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import java.io.IOException;
    public class HomemadeOrNotDriver {
        public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(HomemadeOrNotDriver.class);
            job.setMapperClass(HomemadeOrNotMapper.class);
            job.setReducerClass(HomemadeOrNotReducer.class);
    
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));
            FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
    
        }
    }
    
    

    HomemadeOrNotMapper

    package com.csx.HomemadeOrNot;
    
    import com.csx.Tools.Tools;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class HomemadeOrNotMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
        private Text outKey = new Text();
        private final static IntWritable outValue = new IntWritable(1);
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String words[] = line.split("#--");
    
            if (Tools.findBrand(words[1])) {
                outKey.set("国产");
            } else {
                outKey.set("非国产");
            }
    
            context.write(outKey, outValue);
        }
    }
    
    

    HomemadeOrNotReducer

    package com.csx.HomemadeOrNot;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    import java.io.IOException;
    
    public class HomemadeOrNotReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
        private IntWritable outV = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable value : values) {
                sum += value.get();
            }
    
            outV.set(sum);
            context.write(key,outV);
        }
    }
    
    

    输出结果:part-r-00000

    国产	16914
    非国产	24720
    
    
  • LaptopGPUdata:输出被分析数据源中不同型号主流显卡销售情况

    LaptopGPUdataDriver

    package com.csx.LaptopGPUdata;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class LaptopGPUdataDriver {
    
        public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(LaptopGPUdataDriver.class);
            job.setMapperClass(LaptopGPUdataMapper.class);
            job.setReducerClass(LaptopGPUdataReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));
            FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    
    }
    
    

    LaptopGPUdataMapper

    package com.csx.LaptopGPUdata;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class LaptopGPUdataMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable outV = new IntWritable(1);
        private Text outK = new Text();
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split("#--");
            if(words[2].contains("RTX4050") || words[2].contains("4050")){
                outK.set("RTX4050");
            } else if (words[2].contains("RTX4060") || words[2].contains("4060")) {
                outK.set("RTX4060");
            } else if (words[2].contains("RTX4070") || words[2].contains("4070")) {
                outK.set("RTX4070");
            } else if (words[2].contains("RTX4080") || words[2].contains("4080")) {
                outK.set("RTX4080");
            } else if (words[2].contains("RTX4090") || words[2].contains("4090")) {
                outK.set("RTX4090");
            } else if (words[2].contains("RTX3050") || words[2].contains("3050")) {
                outK.set("RTX3050");
            } else if (words[2].contains("RTX3060") || words[2].contains("3060")) {
                outK.set("RTX3060");
            } else if (words[2].contains("RTX3070") || words[2].contains("3070")) {
                outK.set("RTX3070");
            } else if (words[2].contains("RTX3080") || words[2].contains("3080")) {
                outK.set("RTX3080");
            } else if (words[2].contains("RTX3090") || words[2].contains("3090")) {
                outK.set("RTX3090");
            }else {
                outK.set("其他");
            }
    
            context.write(outK,outV);
        }
    }
    
    

    LaptopGPUdataReducer

    package com.csx.LaptopGPUdata;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class LaptopGPUdataReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable outV = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable value:values) {
                sum += value.get();
            }
    
            outV.set(sum);
            context.write(key,outV);
        }
    }
    
    

    输出结果:part-r-00000

    RTX3050	3865
    RTX3060	1985
    RTX3070	319
    RTX3080	64
    RTX4050	1494
    RTX4060	4708
    RTX4070	984
    RTX4080	694
    RTX4090	584
    其他	26937
    
  • MostExpensiveLaptop:输出被分析时间段内笔记本电脑售价前10名的产品信息

    MostExpensiveLaptopDriver

    package com.csx.MostExpensiveLaptop;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class MostExpensiveLaptopDriver {
        public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(MostExpensiveLaptopDriver.class);
            job.setMapperClass(MostExpensiveLaptopMapper.class);
            job.setReducerClass(MostExpensiveLaptopReducer.class);
    
            job.setMapOutputKeyClass(LaptopInfoBean.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LaptopInfoBean.class);
    
            FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));
            FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    
    }
    
    

    LaptopInfoBean:自定义Bean对象

    package com.csx.MostExpensiveLaptop;
    
    import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class LaptopInfoBean implements Writable , WritableComparable<LaptopInfoBean> {
        private double price;
        private String information;
    
        public LaptopInfoBean() {
        }
        public LaptopInfoBean(double price, String information) {
            this.price = price;
            this.information = information;
        }
    
        public double getPrice() {
            return price;
        }
        public String getInformation() {
            return information;
        }
        public void setPrice(double price) {
            this.price = price;
        }
        public void setInformation(String information) {
            this.information = information;
        }
    
    
        @Override
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeDouble(price);
            dataOutput.writeUTF(information);
        }
    
        @Override
        public void readFields(DataInput dataInput) throws IOException {
            this.price = dataInput.readDouble();
            this.information = dataInput.readUTF();
        }
    
        @Override
        public int compareTo(LaptopInfoBean o) {
            int result;
    
            // 按照总流量大小,倒序排列
            if (this.price > o.price) {
                result = -1;
            }else if (this.price < o.price) {
                result = 1;
            }else {
                //如果总流量一样的话按照上行流量再排
                result = 0;
            }
            return result;
        }
    
        @Override
        public String toString() {
            return information+"------------"+price;
        }
    }
    
    

    MostExpensiveLaptopMapper

    package com.csx.MostExpensiveLaptop;
    
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapreduce.Mapper;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.TreeMap;
    
    public class MostExpensiveLaptopMapper  extends Mapper<LongWritable, Text, LaptopInfoBean,Text> {
    
        LaptopInfoBean outK = new LaptopInfoBean();
        private Text outV = new Text();
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LaptopInfoBean, Text>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split("#--");
            outK.setPrice(Double.parseDouble(words[0]));
            outK.setInformation(words[1]+words[2]);
    
            context.write(outK,outV);
        }
    
    }
    
    

    MostExpensiveLaptopReducer

    package com.csx.MostExpensiveLaptop;
    
    import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.TreeMap;
    
    public class MostExpensiveLaptopReducer extends Reducer<LaptopInfoBean, Text,Text,LaptopInfoBean> {
        private TreeMap<LaptopInfoBean,Text> tMap = new TreeMap<LaptopInfoBean, Text>();
    
        @Override
        protected void reduce(LaptopInfoBean key, Iterable<Text> values, Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException {
            for (Text val:values) {
                LaptopInfoBean bean = new LaptopInfoBean();
    
                bean.setInformation(key.getInformation());
    
                bean.setPrice(key.getPrice());
    
                tMap.put(bean,val);
    
                if(tMap.size()>10){
                    tMap.remove(tMap.lastKey());
                }
            }
        }
    
        @Override
        protected void cleanup(Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException {
            Iterator<LaptopInfoBean> it =tMap.keySet().iterator();
            LaptopInfoBean k;
            while (it.hasNext()){
    
                k = it.next();
    
                Text val = tMap.get(k);
    
                context.write(val,k);
            }
    
        }
    }
    
    

    输出结果:part-r-00000

    	联想ThinkPad商用电脑P15 15.6英寸移动工作站(标配i9-11950H 64G 2T 独显16G A5000 Win11pro 4K屏)------------60999.0
    	联想电脑ThinkPad P1 2022(0FCD)16英寸高性能轻薄设计师工作站 i9-12900H 32G 2T A5500 600nit触摸4K------------54999.0
    	外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------51999.0
    	外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1996:i9 64G 2TB 4090标配 13代处理器 Cherry键盘------------49999.0
    	外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------49969.0
    	[新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------42999.0
    	雷蛇(Razer)[新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------41999.0
    	外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986Q:i9HX/4080/2.5K 包鼠套装 官方标配------------41098.0
    	ROG冰刃7 双屏 16英寸 设计师高性能 游戏本电脑(R9 7945HX 32G 1T RTX4090 2.5K 240Hz MiniLED)------------40999.0
    	外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986:标配加购包鼠套装 13代处理器 Cherry键盘------------40888.0
    
    
  • PhoneRAM:分析输出被分析时间段内各常见大小内存的手机售量

    PhoneRAMDriver

    package com.csx.PhoneRAM;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class PhoneRAMDriver {
    
        public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(PhoneRAMDriver.class);
            job.setMapperClass(PhoneRAMMapper.class);
            job.setReducerClass(PhoneRAMReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));
            FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    
    }
    

    PhoneRAMMapper

    package com.csx.PhoneRAM;
    
    import com.csx.Tools.Tools;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class PhoneRAMMapper extends Mapper <LongWritable, Text, Text, IntWritable>{
        private final static IntWritable outV = new IntWritable(1);
        private Text outK = new Text();
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split("#--");
    
            if("4G"==Tools.findPhoneRAM(words[1]) || "4G"==Tools.findPhoneRAM(words[2])){
                outK.set("4G");
            } else if ("6G"== Tools.findPhoneRAM(words[1]) || "6G"==Tools.findPhoneRAM(words[2])) {
                outK.set("6G");
            } else if ("8G"==Tools.findPhoneRAM(words[1]) || "8G"==Tools.findPhoneRAM(words[2])) {
                outK.set("8G");
            } else if ("16G"==Tools.findPhoneRAM(words[1]) || "16G"==Tools.findPhoneRAM(words[2])) {
                outK.set("16G");
            }
    
            context.write(outK,outV);
        }
    }
    
    

    PhoneRAMReducer

    package com.csx.PhoneRAM;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class PhoneRAMReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable outV = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable value:values) {
                sum += value.get();
            }
    
            outV.set(sum);
            context.write(key,outV);
        }
    }
    
    

    输出结果:part-r-00000

    16G	32571
    4G	3983
    6G	240
    8G	4840
    
  • MaxAndMin2:分析输出本分析时间段内,各主流手机品牌产品价格最低与最高值(数据爬取中不可避免地爬取到了一些脏数据,比如手机壳的销售信息,导致输出结果出现了很低的售价)

    MaxAndMin2Bean

    package com.csx.MaxAndMin2;
    
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class MaxAndMin2Bean implements WritableComparable<MaxAndMin2Bean> {
        private int type;//类型表示品牌
        private double min;//最低价格
        private double max;//最高价格
    
        public MaxAndMin2Bean(int type, double min, double max) {
            this.type = type;
            this.min = min;
            this.max = max;
        }
        public MaxAndMin2Bean() {
        }
        public int getType() {
            return type;
        }
        public double getMin() {
            return min;
        }
        public double getMax() {
            return max;
        }
        public void setType(int type) {
            this.type = type;
        }
        public void setMin(double min) {
            this.min = min;
        }
        public void setMax(double max) {
            this.max = max;
        }
    
    
        @Override
        public int compareTo(MaxAndMin2Bean o) {
            return 1;
        }
    
        @Override
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeInt(type);
            dataOutput.writeDouble(min);
            dataOutput.writeDouble(max);
        }
    
        @Override
        public void readFields(DataInput dataInput) throws IOException {
            type = dataInput.readInt();
            min = dataInput.readDouble();
            max = dataInput.readDouble();
        }
    
        @Override
        public String toString() {
            String s = null;
            if (type == 0) s = "HUAWEI";
            else if (type == 1) s = "Apple";
            else if (type == 2) s = "OPPO";
            else if (type == 3) s = "Redmi";
            else if (type == 4) s = "荣耀";
            else if (type == 5) s = "魅族";
            else if (type == 6) s = "小米";
            else if (type == 7) s = "三星";
            else if (type == 8) s = "vivo";
            else if (type == 9) s = "realme";
            else if (type == 10) s = "其他";
            return s + "\t" + min + "\t" + max + "\t";
        }
    }
    
    

    MaxAndMin2Driver

    package com.csx.MaxAndMin2;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class MaxAndMin2Driver {
        public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(MaxAndMin2Driver.class);
    
            job.setMapperClass(MaxAndMin2Mapper.class);
            job.setReducerClass(MaxAndMin2Reducer.class);
    
            job.setMapOutputKeyClass(MaxAndMin2Bean.class);
            job.setMapOutputValueClass(NullWritable.class);
    
            job.setOutputKeyClass(MaxAndMin2Bean.class);
            job.setOutputValueClass(NullWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));
            FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路输出路径不可以存在
    
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    }
    

    MaxAndMin2Mapper

    package com.csx.MaxAndMin2;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class MaxAndMin2Mapper extends Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable> {
        private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE};
        private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE};
        int type;
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split("#--");
    
            double price = Double.parseDouble(words[0]);
    
            if (words[1].contains("HUAWEI") || words[1].contains("华为")) {
                type = 0;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("Apple") || words[1].contains("iphone") || words[1].contains("IPHONE")) {
                type = 1;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("OPPO")) {
                type = 2;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("Redmi")) {
                type = 3;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("荣耀") || words[1].contains("HONOR")) {
                type = 4;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("魅族") || words[1].contains("MEIZU")) {
                type = 5;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("小米")) {
                type = 6;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("三星") || words[1].contains("SAMSUNG")) {
                type = 7;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("vivo")) {
                type = 8;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else if (words[1].contains("realme")) {
                type = 9;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }else{
                type = 10;
                if (price < min[type]) min[type] = price;
                if(price > max[type]) max[type] = price;
            }
        }
    
        @Override
        protected void cleanup(Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {
            for (int i = 0; i <= 10; i ++ )
            {
                MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]);
                context.write(keyOut, NullWritable.get());
            }
        }
    }
    

    MaxAndMin2Reducer

    package com.csx.MaxAndMin2;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Reducer;
    import java.io.IOException;
    
    public class MaxAndMin2Reducer extends Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable> {
        private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE};
        private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE};
    
        @Override
        protected void reduce(MaxAndMin2Bean key, Iterable<NullWritable> values, Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {
            if (key.getMin() < min[key.getType()]) {
                min[key.getType()] = key.getMin();
            }
            if (key.getMax() > max[key.getType()]) {
                max[key.getType()] = key.getMax();
            }
        }
    
        @Override
        protected void cleanup(Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {
            for (int i = 0; i <= 10; i ++ )
            {
                MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]);
                context.write(keyOut, NullWritable.get());
            }
        }
    }
    
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值