MapReduce框架使用小顶堆求评分均分前20条记录案例---movie案例

数据

{“movie”:”1193”,”rate”:”5”,”timeStamp”:”978300760”,”uid”:”1”}
{“movie”:”661”,”rate”:”3”,”timeStamp”:”978302109”,”uid”:”1”}
{“movie”:”914”,”rate”:”3”,”timeStamp”:”978301968”,”uid”:”1”}
{“movie”:”3408”,”rate”:”4”,”timeStamp”:”978300275”,”uid”:”1”}
{“movie”:”2355”,”rate”:”5”,”timeStamp”:”978824291”,”uid”:”1”}
{“movie”:”1197”,”rate”:”3”,”timeStamp”:”978302268”,”uid”:”1”}
{“movie”:”1287”,”rate”:”5”,”timeStamp”:”978302039”,”uid”:”1”}

需求

求出每部电影评分的平均值,将平均值从高到低排列,并求出平均值最高的前20条数据

需求分析

将文件中的数据的属性封装,进行序列化和反序列化
Map读出数据,Reduce中setup建立一个小顶堆,reduce部分求平均值并放入小顶堆
将电影ID,平均值封装到一个新对象里边,输出数据
这里写图片描述

代码

MovieBean.java

public class MovieBean implements Writable{
    //{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
    private String movie;
    private int rate;
    private String timeStamp;
    private String uid;


    public void Set(String movie, int rate, String timeStamp, String uid) {

        this.movie = movie;
        this.rate = rate;
        this.timeStamp = timeStamp;
        this.uid = uid;
    }
    public String getMovie() {
        return movie;
    }
    public void setMovie(String movie) {
        this.movie = movie;
    }
    public int getRate() {
        return rate;
    }
    public void setRate(int rate) {
        this.rate = rate;
    }
    public String getTimeStamp() {
        return timeStamp;
    }
    public void setTimeStamp(String timeStamp) {
        this.timeStamp = timeStamp;
    }
    public String getUid() {
        return uid;
    }
    public void setUid(String uid) {
        this.uid = uid;
    }
    @Override
    public String toString() {
        return "MovieBean [movie=" + movie + ", rate=" + rate + ", timeStamp=" + timeStamp + ", uid=" + uid + "]";
    }
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(movie);
        out.writeInt(rate);
        out.writeUTF(timeStamp);
        out.writeUTF(uid);

    }
    @Override
    public void readFields(DataInput in) throws IOException {
         movie = in.readUTF();
         rate = in.readInt();
         timeStamp = in.readUTF();
         uid = in.readUTF();

    }
}

MovieAvgBean.java

public class MovieAvgBean implements Writable{
    private float avg;
    private String MovieId;
    public float getAvg() {
        return avg;
    }
    public void setAvg(float avg) {
        this.avg = avg;
    }
    public String getMovieId() {
        return MovieId;
    }
    public void setMovieId(String movieId) {
        MovieId = movieId;
    }
    @Override
    public String toString() {
        return "MovieAvgBean [avg=" + avg + ", MovieId=" + MovieId + "]";
    }
    @Override
    public void readFields(DataInput in) throws IOException {
        avg = in.readFloat();
        MovieId = in.readUTF();
    }
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeFloat(avg);
        out.writeUTF(MovieId);
    }
    public void Set(float avg, String movieId) {

        this.avg = avg;
        MovieId = movieId;
    }
}

MapReduce部分

public class MovieAvgTopn {
    public static class MapTask extends Mapper<LongWritable, Text, Text, MovieBean>{
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, MovieBean>.Context context)
                throws IOException, InterruptedException {
            ObjectMapper mapper = new ObjectMapper();
            MovieBean bean = mapper.readValue(value.toString(), MovieBean.class);
            String movie = bean.getMovie();
            context.write(new Text(movie), bean);
        }
    }

    public static class ReduceTask extends Reducer<Text, MovieBean, MovieAvgBean, NullWritable>{
        TreeSet<MovieAvgBean> tree = null;
        @Override
        protected void setup(Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context)
                throws IOException, InterruptedException {
            tree = new TreeSet<>(new Comparator<MovieAvgBean>() {
                @Override
                public int compare(MovieAvgBean o1, MovieAvgBean o2) {
                    if(o1.getMovieId()==o2.getMovieId()) {
                        Float avg1 = o1.getAvg();
                        Float avg2 = o2.getAvg();
                        return avg1.compareTo(avg2);
                    }else {
                        return o1.getMovieId().compareTo(o2.getMovieId());
                    }

                }
            }) ;
        }

        @Override
        protected void reduce(Text key, Iterable<MovieBean> values,
                Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context) throws IOException, InterruptedException {
            int count = 0;
            float sum = 0;
            float avg = 0;
            for (MovieBean movieBean : values) {
                sum += movieBean.getRate();
                count++;
            }
            avg = sum/count;
            MovieAvgBean avgBean = new MovieAvgBean();
            avgBean.Set(avg,key.toString());
            if(tree.size()<20) {
                tree.add(avgBean);
            }else {
                MovieAvgBean first = tree.first();
                if(first.getAvg()<avgBean.getAvg()) {
                    tree.remove(first);
                    tree.add(avgBean);
                }
            }
        }

        @Override
        protected void cleanup(Reducer<Text, MovieBean, MovieAvgBean, NullWritable>.Context context)
                throws IOException, InterruptedException {
            for (MovieAvgBean movieAvgBean : tree) {
                context.write(movieAvgBean, NullWritable.get());
            }
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"movieAvgTopn");

        job.setMapperClass(MapTask.class);
        job.setReducerClass(ReduceTask.class);
        job.setJarByClass(MovieAvgTopn.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MovieBean.class);
        job.setOutputKeyClass(MovieAvgBean.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.addInputPath(job, new Path("D:\\a\\rating.json"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\a\\MovieAvgTopn-out"));


        //判断文件是否存在
        File file = new File("D:\\a\\MovieAvgTopn-out");
        if(file.exists()){
            FileUtils.deleteDirectory(file);
        }

        boolean completion = job.waitForCompletion(true);
        System.out.println(completion?"你很优秀!":"调bug");
    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值