MapReduce的WordCount案例上传到Linux中运行

最新推荐文章于 2022-01-05 00:04:27 发布

落幕7

最新推荐文章于 2022-01-05 00:04:27 发布

阅读量603

点赞数 1

分类专栏： # Hadoop 文章标签： linux hadoop big data mapreduce hdfs

本文链接：https://blog.csdn.net/qq_43278189/article/details/120423214

版权

Hadoop 专栏收录该内容

11 篇文章 1 订阅

订阅专栏

MapReduce的WordCount案例

- 案例一：统计单词个数
- 案例二：按班级统计年龄总和

案例一：统计单词个数

首先准备一个word.txt文件上传到Linux（内容随意，一行只有一个单词就行）
然后
hdfs dfs -put 你的txt存放目录上传到hdfs的要存放目录

例：hdfs dfs -put /usr/local/data/word.txt  /word

package com.liu.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Author : ld
 * @Description :
 * @ClassName : WordCountDemo1
 * @Date : 2021/9/22 18:59
 * @Version : 1.0
 */
// 用来统计文件中单词个数
// 重写 覆盖mapreduce框架中map() 和reduce()方法
public class WordCountDemo1 {
    // map类
    // 第一对kv,是决定数据输入的格式
    // 第二队kv 是决定数据输出的格式
    public static class WCMapper extends Mapper<LongWritable, Text,Text,LongWritable> {
      /*
        map阶段数据是一行一行过来的
        每一行数据都需要执行代码
      */
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
          String line = value.toString();
          //通过Context输出 Text(一整行数据),1
          context.write(new Text(line),new LongWritable(1));
      }
    }
    //reduce类
    // 用来接收map端输出的数据
    public static class WCReduce extends Reducer<Text,LongWritable,Text,LongWritable>{
        /**
         * reduce 聚合程序 每一个k都会调用一次
         * 默认是一个节点
         * key:每一个单词
         * values:map端 当前k所对应的所有的v
         */
        protected void reduce(Text key,Iterable<LongWritable> values,Context context) throws IOException, InterruptedException {
        long sum= 0L;
            for (LongWritable value : values) {
                sum+=value.get();
            }
            // 把计算结果输出到hdfs
            context.write(key,new LongWritable(sum));
        }
    }
    /**
     * 是当前mapreduce程序入口
     * 用来构建mapreduce程序
     */
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance();//创建一个job任务
        job.setJobName("mapreduce单词统计");   //指定job名称
        //构建mr
        //指定当前main所在类名(识别具体的类)
        job.setJarByClass(WordCountDemo1.class);
        job.setMapperClass(WCMapper.class);//指定map端口
        job.setMapOutputKeyClass(Text.class);// 指定map输出的kv类型
        job.setMapOutputValueClass(LongWritable.class);
        //指定reduce端类
        //指定reduce端输出的kv类型
        job.setReducerClass(WCReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        // 指定输入路径
        Path in = new Path("/word");
        FileInputFormat.addInputPath(job, in);
        //指定输出
        Path out = new Path("/output");
        //如果路径存在 删除
        FileSystem fs = FileSystem.get(new Configuration());
        if(fs.exists(out)){
            fs.delete(out,true);
        }
        FileOutputFormat.setOutputPath(job,out);
        //启动
        job.waitForCompletion(true);
/**
 * 提交任务
 * 1.通过maven中package将项目打包上传服务器然后执行
 * 2.执行任务 hadoop jar hadoop-1.0-SNAPSHOT.jar com.liu.hadoop.WordCountDemo1 /word  /output
 *
 */
        System.out.println("mr正在执行");
    }
}

#运行：
hadoop jar hadoop-1.0-SNAPSHOT.jar com.liu.hadoop.WordCountDemo1 /word  /output

在这里插入图片描述

案例二：按班级统计年龄总和

首先把准备好的students.txt文件上传到hdfs的data目录下
点击下载students.txt
（没积分？？？随便写一篇文章就能有积分下载了）

package com.liu.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Author : ld
 * @Description :
 * @ClassName : WordCountDemo3
 * @Date : 2021/9/22 18:59
 * @Version : 1.0
 */
public class WordCountDemo3 {
    public static class SumMapper extends Mapper<LongWritable,Text, Text, LongWritable> {
        protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] split = line.split(",");
            LongWritable age = new LongWritable(Integer.valueOf(split[2]));
            String clazz=split[4];
            context.write(new Text(clazz),age);
        }
    }
    public static class SumReduce extends Reducer<Text,LongWritable,Text,LongWritable> {
        protected void reduce(Text key,Iterable<LongWritable> values ,Context context) throws IOException, InterruptedException {
            long sum= 0L;
            for (LongWritable value : values) {
                sum+=value.get();
            }
            context.write(key,new LongWritable(sum));
        }
    }
    public static void main(String[] args) throws Exception{
//创建一个job任务
        Job job = Job.getInstance();
        //指定job名称
        job.setJobName("第三个mr程序,年龄统计");
        //构建mr
        //指定当前main所在类名(识别具体的类)
        job.setJarByClass(WordCountDemo3.class);
        //指定map端类
        job.setMapperClass(SumMapper.class);
        // 指定map输出的kv类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        //指定reduce端类
        //指定reduce端输出的kv类型
        job.setReducerClass(SumReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        // 指定输入路径
        Path in = new Path("/data");
        FileInputFormat.addInputPath(job,in);
        //指定输出
        Path out = new Path("/output");
        //如果路径存在 删除
        FileSystem fs = FileSystem.get(new Configuration());
        if(fs.exists(out)){
            fs.delete(out,true);
        }
        FileOutputFormat.setOutputPath(job,out);

        //启动任务
        job.waitForCompletion(true);
        System.out.println("mr3正在执行");
    }
}

Linux中运行：(先切换到放jar包的目录下)
hadoop jar hadoop-1.0-SNAPSHOT.jar com.liu.hadoop.WordCountDemo3
别问为什么后面没加路径，上面的代码中设置过了
还有确保hdfs的data目录下只有students.txt这一个文件

在这里插入图片描述

落幕7

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
MapReduce的WordCount案例上传到Linux中运行

MapReduce的WordCount案例案例一：统计单词个数案例二：按班级统计年龄总和案例一：统计单词个数首先准备一个word.txt文件上传到Linux（内容随意，一行只有一个单词就行）然后hdfs dfs -put 你的txt存放目录上传到hdfs的要存放目录例：hdfs dfs -put /usr/local/data/word.txt /wordpackage com.liu.hadoop;import org.apache.hadoop.conf.Configuratio
复制链接

扫一扫