IDEA手写wordcount及Exception in thread “main“ java.io.IOException: (null) entry in command string: null

最新推荐文章于 2022-02-18 09:41:21 发布

Lynn Gin

最新推荐文章于 2022-02-18 09:41:21 发布

阅读量2.2k

点赞数 6

分类专栏：笔记文章标签： java mapreduce 大数据 maven hadoop

本文链接：https://blog.csdn.net/weixin_46376562/article/details/107841753

版权

笔记专栏收录该内容

44 篇文章 0 订阅

订阅专栏

新建maven工程

File --> new -->progect
选择maven工程
在这里插入图片描述
项目结构

编辑pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>wordcountdemo</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.8.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.7</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <!--打包工具，利用maven共计打包成jar -->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>com.atguigu.mapreduce.WordcountDriver</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>

代码编写

WordcountMapper.java

package com.mr.hdfsmapredurce;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


//Map阶段：输入的行号作为key,每行读取的值作为value
public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    private Text k = new Text();
    private IntWritable v = new IntWritable(1);

    /**
     * 重写map 实现wordcount
     */
    @Override
    protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, java.lang.InterruptedException {

        //获取一行，把数据类型转换成 string
        String line = value.toString();

        //切割
        String[] words = line.split(" ");

        //输出
        for (String word : words){
            String trim = word.trim();
            if (!" ".equals(trim)){
                k.set(trim);
                context.write(k, v);
            }
        }

    }
}

WordcountReduce.java

package com.mr.hdfsmapredurce;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.util.Iterator;

public class WordcountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override
    protected void reduce(Text text, Iterable<IntWritable> iterable, Context context) throws java.io.IOException, java.lang.InterruptedException {

        int sum = 0;
        Iterator<IntWritable> iterator = iterable.iterator();
        while(iterator.hasNext()){
            sum += iterator.next().get();
        }
        if(!text.toString().trim().equals("")) {
            //将结果输出
            context.write(text, new IntWritable(sum));

        }
    }
}

WoedcountDriver.java

package com.mr.hdfsmapredurce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordcountDrivre {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        //获取配置信息， 以及封装任务
        Configuration configuration = new Configuration();
        //用配置文件反射实例化job对象
        Job job = Job.getInstance(configuration);

        //设置jar加载路径
        job.setJarByClass(WordcountDrivre.class);

        //设置map和reduce类
        job.setMapperClass(WordcountMapper.class);
        job.setReducerClass(WordcountReduce.class);

        //设置最终输出k, v类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //设置输入和输出路径
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //提交
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }
}

问题解决

在运行时需要参数：
新建测试文件 E：\aaa.txt （内容为几个英文单词即可）
在这里插入图片描述

这时运行的话会报错：
Exception in thread “main” java.io.IOException: (null) entry in command string: null chmod 0700 D:\tmp\hadoop-31537\mapred\staging\31537577306978.staging
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:772)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:869)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:852)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:491)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:531)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:509)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:312)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:133)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:144)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308)
at com.mr.hdfsmapredurce.WordcountDrivre.main(WordcountDrivre.java:38)
在这里插入图片描述
解决方法：
添加 hadoop.dll文件

此时运行会在E：\ 下生成bbb.txt文件夹，含有计算结果

用记事本打开即可