MapReduce1,HashCode

Java基本类型

Writable

序列化大小(字节)

布尔型(boolean)

BooleanWritable

1

字节型(byte)

ByteWritable

1

整型(int)

IntWritable

4

VIntWritable

1~5

浮点型(float)

FloatWritable

4

长整型(long)

LongWritable

8

VLongWritable

1~9

双精度浮点型(double)

DoubleWritable

8

 

[root@master ~]# rz -E
rz waiting to receive.
[root@master ~]# ls
ac.sh                                        words.txt  文档
anaconda-ks.cfg                              公共       下载
dump.rdb                                     模板       音乐
initial-setup-ks.cfg                         视频       桌面
mysql57-community-release-el7-10.noarch.rpm  图片
[root@master ~]# cat words.txt
java,c,c++,sql,scale,python
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix[root@master ~]# mv words.txt /usr/local/soft/data/
[root@master ~]# cd /usr/local/soft/data/
[root@master data]# ls
new_db.sql  student.sql         theZen.txt  words.txt
score.sql   theZenOfPython.txt  wordcount
[root@master data]# cat words.txt
java,c,c++,sql,scale,python
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
phoenix
mysql,springboot,redis
hadoop,hive,hbase,spark,flink
kafka,sqoop,flume,datax,kettle,flink
[root@master data]# hdfs dfs -put words.txt /data/wc/input/
package com.shujia.MapReduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Demo01WordCount {
    //Map任务
    /**
    *继承Mapper类,指定输入的K-V的类型以及输出K-V的类型
     * 然后重写map方法
    */
    public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
        @Override
        /**
         * key 偏移量
         * value 一行数据
         * context MR任务运行的是上下文环境,可以获取当前运行的配置等其他信息
         *          主要用于将map端构建好的K-V进行输出
         */
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
           //自己的map逻辑
            //按分割符切分除每一个单词
            String[] splits = value.toString().split(",");
            for (String word : splits) {
                //以单词做为key,1作为value,构建好输出的K-V
                //再通过context输出
                /**
                 * java,c,c++,sql,scale,python
                 * 输出:java,1
                 * c,1
                 * c++,1
                 * sql,1
                 * scale,1
                 * python,1
                 */
                Text outPutKey = new Text(word);
                IntWritable outputValue = new IntWritable(1);
                context.write(outPutKey,outputValue);
            }
        }
    }
    /**
     *继承Reducer类,指定输入的K-V的类型(同map端的输出K-V类型保存一致)以及输出K-V的类型
     * 然后重写map方法
     */
    //Reduce任务
    public static class MyReducer extends Reducer<Text, IntWritable,Text,IntWritable>{
        @Override
        /**
         * key 经过shuffle过程,相同的K进入到同一个reduce
         * values 相同的Key对应的value的一个“集合”
         * context 主要用于输出数据到HDFS
         */
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum=0; //用于保存最后的结果
            for (IntWritable value : values) {
               sum+=value.get();
            }
            //将最后的结果输出
          context.write(key,new IntWritable(sum));
        }
    }
    //Driver程序,主要负责配置和提交任务
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000");
        //创建一个MapReduce的job
        Job job = Job.getInstance(conf);
        //配置任务
        job.setJobName("Demo01WordCount");
        //设置任务运行哪个类
        job.setJarByClass(Demo01WordCount.class);
        //设置Reduce的数量,默认是1,最终生成文件的数量同Reduce的数量一致
        job.setNumReduceTasks(3);

        //配置map端
        //指定map运行时哪一个类
        job.setMapperClass(MyMapper.class);
        //配置Map端输出的key类型
        job.setMapOutputKeyClass(Text.class);
        //配置Map端输出的value类型
        job.setMapOutputValueClass(IntWritable.class);

        //配置Reduce端
        //指定Reduce运行时哪一个类
        job.setReducerClass(MyReducer.class);
        //配置Reduce端输出的key类型
        job.setOutputKeyClass(Text.class);
        //配置Reduce端输出的value类型
        job.setOutputValueClass(IntWritable.class);

        //配置输入输出路径
        /**
         * hdfs dfs -mkdir /data/wc/input
         * hdfs dfs -put words.txt /data/wc/input
         */
        FileInputFormat.addInputPath(job,new Path("/data/wc/input"));
        Path path = new Path("/data/wc/output");
        FileSystem fs = FileSystem.get(conf);
        //判断输出路径是否存在,存在则删除
        if (fs.exists(path)){
            fs.delete(path,true);
        }
        //输出路径已存在,会报错
        FileOutputFormat.setOutputPath(job,path);

        //等待任务完成
        job.waitForCompletion(true);
    }
    /**
     * 1.将words.txt上传至虚拟机并使用HDFS命令上传至HDFS
     *  hdfs dfs -mkdir /data/wc/input
     *  hdfs dfs -put words.txt /data/wc/input
     *  2.将代码通过maven的package打包成jar包,并上传至虚拟机
     *  3.使用命令提交任务
     *  hadoop jar hadoop-1.0-SNAPSHOT.jar com.shujia.MapReduce.Demo01WordCount
     *  查看日志:在任务运行时会自动生成一个applicationId
     *  yarn logs -applicationId application_1647858149677_0004
     *  也可以通过historyserver去查看,因为任务真正运行在NodeManager中,日志可能会分散
     *  historyserver可以负责从Nodemanager中收集日志到Master中方便查看日志
     *  启动historyserver:在Master上启动即可
     *  mr-jobhistory-daemon.sh start historyserver
     *  http://master:19888
     */
}
[root@master data]# cd ..
[root@master soft]# ls
0??   hadoop-2.7.6  redis        shell01   zookeeper-3.4.6
A??   jdk1.8.0_171  redis-6.2.6  show
data  packages      shell        test.txt
[root@master soft]# mkdir jars
[root@master soft]# mv data/hadoop-1.0-SNAPSHOT.jar ./jars/
[root@master soft]# ls
0??   hadoop-2.7.6  packages     shell    test.txt
A??   jars          redis        shell01  zookeeper-3.4.6
data  jdk1.8.0_171  redis-6.2.6  show
[root@master soft]# cd jars/
[root@master jars]# ls
hadoop-1.0-SNAPSHOT.jar
[root@master jars]# pwd
/usr/local/soft/jars
[root@master jars]# hadoop jar hadoop-1.0-SNAPSHOT.jar com.shujia.MapReduce.Demo01WordCount
[root@master jars]# hdfs dfs -cat /data/wc/output/part-r-00000
c++	1
datax	4
hbase	4
hive	4
kettle	4
mysql	4
spark	4
package com.shujia;

public class HashCode {
    public static void main(String[] args) {
        String k1="spark";
        String k2="c++";
        String k3="datax";
        String k4="hbase";
        String k5="hive";
        String k6="kettle";
        String k7="mysql";
        int numReduceTasks = 3;
        System.out.println(k1.hashCode()%numReduceTasks);
        System.out.println(k2.hashCode()%numReduceTasks);
        System.out.println(k3.hashCode()%numReduceTasks);
        System.out.println(k4.hashCode()%numReduceTasks);
        System.out.println(k5.hashCode()%numReduceTasks);
        System.out.println(k6.hashCode()%numReduceTasks);
        System.out.println(k7.hashCode()%numReduceTasks);
    }
}
[root@master ~]# mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /usr/local/soft/hadoop-2.7.6/logs/mapred-root-historyserver-master.out
[root@master ~]# yarn logs -applicationId application_1647858149677_0004

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值