hadoop实现云音乐排行榜

林图

已于 2022-12-21 20:34:46 修改

阅读量1.9k

点赞数 5

文章标签： hadoop hbase 大数据

于 2022-11-30 09:43:24 首次发布

本文链接：https://blog.csdn.net/weixin_46406308/article/details/128109089

版权

文章目录

一、MapReduce与Hbase的集成
二、批量数据导入
三、 Hbase MapReduce API-运行TableMapperDemo
四、TableMapper数据去重
五、TableReduce统计
六、音乐排行榜的实现
七：sqoop工具的使用
出现的问题与解决方案

一、MapReduce与Hbase的集成

将hbase-site.xml文件复制到$HADOOP_HOME/etc/hadoop下
.编辑$HADOOP_HOME/etc/hadoop/Hadoop-enc.sh文件，增加一下内容
Hbase与MapReduce集成环境测试
.测试结果

二、批量数据导入

1 准备数据创建music1.txt, music2.txt, music3,txt文件
在这里插入图片描述
2. 导入虚拟机，然后上传到hadoop

3. 推入到hadoop

4.查看是否推入成功

5.使用Hbase自带工具类批量导入方式导入

6.查看运行结果

7.使用Hbase中的completebulkload将tmp目录移动到Hregion中完成数据加载

8.进入Hbase查看是否导入
在这里插入图片描述

三、 Hbase MapReduce API-运行TableMapperDemo

TableMapper使用从Hbase中读取数据代码

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//

package com.music;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class TableMapperDemo {
    public TableMapperDemo() {
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        new GenericOptionsParser(conf, args);
        System.out.println("temjars");
        conf.set("hbase.mapreduce.inputtable", "music");
        conf.set("hbase.mapreduce.scan.columns", "info:name info:gender");
        conf.set("hbase.mapreduce.scan.row.start", "music1");
        conf.set("hbase.zookeeper.quorum", "Cluster-01:2181,Cluster-02:2181,Cluster-03:2181,Cluster-04:2181,Cluster-05:2181");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
        conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resourcemanager.address", "Cluster-01:8032");
        conf.set("yarn.resourcemanager.scheduler.address", "Cluster-01:8030");
        conf.addResource("mapred-site.xml");
        conf.addResource("core-site.xml");
        conf.addResource("hdfs-site.xml");
        conf.addResource("yarn-site.xml");
        Job job = Job.getInstance(conf, "hbase-mapreduce-api");
        job.setJarByClass(TableMapperDemo.class);
        job.setInputFormatClass(TableInputFormat.class);
        job.setMapperClass(TableMapperDemo.Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        List<String> libjars = new ArrayList();
        job.getConfiguration().set("tmpjars", StringUtils.join(libjars, ','));
        Path output = new Path("/output2/music2");
        if (FileSystem.get(conf).exists(output)) {
            FileSystem.get(conf).delete(output, true);
        }

        FileOutputFormat.setOutputPath(job, output);
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("gender"));
        TableMapReduceUtil.initTableMapperJob("music", scan, TableMapperDemo.Mapper.class, Text.class, (Class)null, job);
        job.waitForCompletion(true);
    }

    static class Mapper extends TableMapper<Text, Text> {
        Mapper() {
        }

        protected void map(ImmutableBytesWritable key, Result value, org.apache.hadoop.mapreduce.Mapper<ImmutableBytesWritable, Result, Text, Text>.Context context) throws IOException, InterruptedException {
            List<Cell> cells = value.listCells();
            Iterator var6 = cells.iterator();

            while(var6.hasNext()) {
                Cell cell = (Cell)var6.next();
                String outValue = String.format("Rowkey:%s Family:%s Qualifier:%s cellValue:%s", Bytes.toString(key.get()), Bytes.toString(CellUtil.cloneFamily(cell)), Bytes.toString(CellUtil.cloneQualifier(cell)), Bytes.toString(CellUtil.cloneValue(cell)));
                context.write(new Text(CellUtil.getCellKeyAsString(cell)), new Text(outValue));
            }

        }
    }
}

2.更改yarn-site.xml文件并发送到各个节点
在这里插入图片描述
3.将代码打包发送到虚拟机

4.使用hadoop jar+jar包方式运行代码

5.运行结果

6.在hadoop中查看结果

7.访问192.168.10.111：8080/cluster/apps/查看

在这里插入图片描述

四、TableMapper数据去重

1.TableMapperDemo2代码编写

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//

package com.music;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.GenericOptionsParser;

public class TableMapperDemo2 {
    public TableMapperDemo2() {
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        new GenericOptionsParser(conf, args);
        conf.set("hbase.zookeeper.quorum", "Cluster-01:2181,Cluster-02:2181,Cluster-03:2181,Cluster-04:2181,Cluster-05:2181");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
        conf.set("mapreduce.framework.name", "yarn");
        conf.set("yarn.resourcemanager.address", "Cluster-01:8032");
        conf.set("yarn.resourcemanager.scheduler.address", "Cluster-01:8030");
        conf.addResource("mapred-site.xml");
        conf.addResource("core-site.xml");
        conf.addResource("hdfs-site.xml");
        conf.addResource("yarn-site.xml");
        Job job = Job.getInstance(conf, "hbase-mapreduce-api");
        job.setJarByClass(TableMapperDemo2.class);
        job.setOutputFormatClass(TableOutputFormat.class);
        job.getConfiguration().set("hbase.mapred.outputtable", "namelist");
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
        TableMapReduceUtil.initTableMapperJob("music", scan, TableMapperDemo2.MyMapper.class, Text.class, Put.class, job);
        job.waitForCompletion(true);
    }

    static class MyMapper extends TableMapper<Text, Put> {
        MyMapper() {
        }

        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, Put>.Context context) throws IOException, InterruptedException {
            List<Cell> cells = value.listCells();
            Iterator var6 = cells.iterator();

            while(var6.hasNext()) {
                Cell cell = (Cell)var6.next();
                Put put = new Put(CellUtil.cloneValue(cell));
                put.addColumn(Bytes.toBytes("details"), Bytes.toBytes("rank"), Bytes.toBytes(0));
                context.write(new Text(Bytes.toString(CellUtil.cloneValue(cell))), put);
            }

        }
    }
}

2.先创建namelist表
在这里插入图片描述
3.打成jar包运行

4.运行结果

5.进入hbase查看namelist

五、TableReduce统计

1.编写TableReduce代码

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//

package com.music;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.GenericOptionsParser;

public class TableReduceDemo {
    public TableReduceDemo() {
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        new GenericOptionsParser(conf, args);
        Job job = Job.getInstance(conf, "top-music");
        job.setJarByClass(TableReduceDemo.class);
        job.setNumReduceTasks(2);
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
        TableMapReduceUtil.initTableMapperJob("music", scan, TableReduceDemo.MyMapper.class, Text.class, IntWritable.class, job);
        TableMapReduceUtil.initTableReducerJob("namelist", TableReduceDemo.MyReducer.class, job);
        job.waitForCompletion(true);
    }

    static class MyMapper extends TableMapper<Text, IntWritable> {
        MyMapper() {
        }

        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            List<Cell> cells = value.listCells();
            Iterator var6 = cells.iterator();

            while(var6.hasNext()) {
                Cell cell = (Cell)var6.next();
                context.write(new Text(Bytes.toString(CellUtil.cloneValue(cell))), new IntWritable(1));
            }

        }
    }

    static class MyReducer extends TableReducer<Text, IntWritable, Text> {
        MyReducer() {
        }

        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, Mutation>.Context context) throws IOException, InterruptedException {
            int playCount = 0;

            IntWritable num;
            for(Iterator var6 = values.iterator(); var6.hasNext(); playCount += num.get()) {
                num = (IntWritable)var6.next();
            }

            Put put = new Put(Bytes.toBytes(key.toString()));
            put.addColumn(Bytes.toBytes("details"), Bytes.toBytes("rank"), Bytes.toBytes(playCount));
            context.write(key, put);
        }
    }
}

2.打包上传运行
在这里插入图片描述
3.查看运行结果
4.进入habse查看表

六、音乐排行榜的实现

1.音乐排行榜代码编写

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by FernFlower decompiler)
//

package com.music;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.IntWritable.Comparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class TopMusic {
    static final String TABLE_MUSIC = "music";
    static final String TABLE_NAMELIST = "namelist";
    static final String OUTPUT_PATH = "topmusic";
    static Configuration conf = HBaseConfiguration.create();

    public TopMusic() {
    }

    static boolean musicCount(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(conf, "music-count");
        job.setJarByClass(TopMusic.class);
        job.setNumReduceTasks(2);
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
        TableMapReduceUtil.initTableMapperJob("music", scan, TopMusic.ScanMusicMapper.class, Text.class, IntWritable.class, job);
        TableMapReduceUtil.initTableReducerJob("namelist", TopMusic.IntNumReducer.class, job);
        return job.waitForCompletion(true);
    }

    static boolean sortMusic(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(conf, "sort-music");
        job.setJarByClass(TopMusic.class);
        job.setNumReduceTasks(1);
        job.setSortComparatorClass(TopMusic.IntWritableDecreaseingComparator.class);
        TableMapReduceUtil.initTableMapperJob("namelist", new Scan(), TopMusic.ScanMusicNameMapper.class, IntWritable.class, Text.class, job);
        Path output = new Path("topmusic");
        if (FileSystem.get(conf).exists(output)) {
            FileSystem.get(conf).delete(output, true);
        }

        FileOutputFormat.setOutputPath(job, output);
        return job.waitForCompletion(true);
    }

    static void showResult() throws IllegalArgumentException, IOException {
        FileSystem fs = FileSystem.get(conf);
        FSDataInputStream in = null;

        try {
            in = fs.open(new Path("topmusic/part-r-00000"));
            IOUtils.copyBytes(in, System.out, 4096, false);
        } finally {
            IOUtils.closeStream(in);
        }

    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        GenericOptionsParser gop = new GenericOptionsParser(conf, args);
        String[] otherArgs = gop.getRemainingArgs();
        if (musicCount(otherArgs) && sortMusic(otherArgs)) {
            showResult();
        }

    }

    static class IntNumReducer extends TableReducer<Text, IntWritable, Text> {
        IntNumReducer() {
        }

        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, Mutation>.Context context) throws IOException, InterruptedException {
            int playCount = 0;

            IntWritable num;
            for(Iterator var6 = values.iterator(); var6.hasNext(); playCount += num.get()) {
                num = (IntWritable)var6.next();
            }

            Put put = new Put(Bytes.toBytes(key.toString()));
            put.addColumn(Bytes.toBytes("details"), Bytes.toBytes("rank"), Bytes.toBytes(playCount));
            context.write(key, put);
        }
    }

    private static class IntWritableDecreaseingComparator extends Comparator {
        private IntWritableDecreaseingComparator() {
        }

        public int compare(WritableComparable a, WritableComparable b) {
            return -super.compare(a, b);
        }

        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
            return -super.compare(b1, s1, l1, b2, s2, l2);
        }
    }

    static class ScanMusicMapper extends TableMapper<Text, IntWritable> {
        ScanMusicMapper() {
        }

        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            List<Cell> cells = value.listCells();
            Iterator var6 = cells.iterator();

            while(var6.hasNext()) {
                Cell cell = (Cell)var6.next();
                if (Bytes.toString(CellUtil.cloneFamily(cell)).equals("info") && Bytes.toString(CellUtil.cloneQualifier(cell)).equals("name")) {
                    context.write(new Text(Bytes.toString(CellUtil.cloneValue(cell))), new IntWritable(1));
                }
            }

        }
    }

    static class ScanMusicNameMapper extends TableMapper<IntWritable, Text> {
        ScanMusicNameMapper() {
        }

        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, IntWritable, Text>.Context context) throws IOException, InterruptedException {
            List<Cell> cells = value.listCells();
            Iterator var6 = cells.iterator();

            while(var6.hasNext()) {
                Cell cell = (Cell)var6.next();
                context.write(new IntWritable(Bytes.toInt(CellUtil.cloneValue(cell))), new Text(Bytes.toString(key.get())));
            }

        }
    }
}

2.打包运行
在这里插入图片描述
3.查看结果输出

七：sqoop工具的使用

安装：将 /root/package/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz 压缩包解压到/home/admin 目录下并改名
配置 Sqoop 环境修改 sqoop-env.sh 文件，添加 Hdoop、Hbase、Hive 等组件的安装路径
配置 Linux 系统环境变量，添加 Sqoop 组件的路径
测试验证
测试 Sqoop 是否能够正常连接 MySQL 数据库（需要在lib文件中导入jdbc连接包）
创建数据库
实现数据迁移（需要将之前HDFS下生成的名为/topmusic/part-r-00000的包挪到/下）
迁移成功
查看数据
前端网页实现显示数据
网页显示结果

在这里插入图片描述

出现的问题与解决方案

1.代码上传运行后若之前有正常突然发现无法运行某节点无法连接
解决：重启所有节点虚拟机，然后查看服务是否未开启
2.MapReduce与Hbase的集成时hbase数据库中必须有scores表不然会报错

林图

关注

5
点赞
踩
26

收藏

觉得还不错? 一键收藏
0
评论
hadoop实现云音乐排行榜

7.使用Hbase中的completebulkload将tmp目录移动到Hregion中完成数据加载。2.MapReduce与Hbase的集成时hbase数据库中必须有scores表不然会报错。1 准备数据创建music1.txt, music2.txt, music3,txt文件。1.代码上传运行后若之前有正常突然发现无法运行某节点无法连接。4.使用hadoop jar+jar包方式运行代码。解决：重启所有节点虚拟机，然后查看服务是否未开启。8.进入Hbase查看是否导入。6.在hadoop中查看结果。
复制链接

扫一扫