新建maven工程
File --> new -->progect
选择maven工程
项目结构
编辑pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>wordcountdemo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.7</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<!--打包工具,利用maven共计打包成jar -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.atguigu.mapreduce.WordcountDriver</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
代码编写
- WordcountMapper.java
package com.mr.hdfsmapredurce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//Map阶段:输入的行号作为key,每行读取的值作为value
public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text k = new Text();
private IntWritable v = new IntWritable(1);
/**
* 重写map 实现wordcount
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, java.lang.InterruptedException {
//获取一行,把数据类型转换成 string
String line = value.toString();
//切割
String[] words = line.split(" ");
//输出
for (String word : words){
String trim = word.trim();
if (!" ".equals(trim)){
k.set(trim);
context.write(k, v);
}
}
}
}
- WordcountReduce.java
package com.mr.hdfsmapredurce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.util.Iterator;
public class WordcountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text text, Iterable<IntWritable> iterable, Context context) throws java.io.IOException, java.lang.InterruptedException {
int sum = 0;
Iterator<IntWritable> iterator = iterable.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
}
if(!text.toString().trim().equals("")) {
//将结果输出
context.write(text, new IntWritable(sum));
}
}
}
- WoedcountDriver.java
package com.mr.hdfsmapredurce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordcountDrivre {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//获取配置信息, 以及封装任务
Configuration configuration = new Configuration();
//用配置文件反射实例化job对象
Job job = Job.getInstance(configuration);
//设置jar加载路径
job.setJarByClass(WordcountDrivre.class);
//设置map和reduce类
job.setMapperClass(WordcountMapper.class);
job.setReducerClass(WordcountReduce.class);
//设置最终输出k, v类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置输入和输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//提交
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
问题解决
在运行时需要参数:
新建测试文件 E:\aaa.txt (内容为几个英文单词即可)
这时运行的话会报错:
Exception in thread “main” java.io.IOException: (null) entry in command string: null chmod 0700 D:\tmp\hadoop-31537\mapred\staging\31537577306978.staging
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:772)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:869)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:852)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem.mkOneDirWithMode(RawLocalFileSystem.java:491)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirsWithOptionalPermission(RawLocalFileSystem.java:531)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:509)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:312)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:133)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:144)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308)
at com.mr.hdfsmapredurce.WordcountDrivre.main(WordcountDrivre.java:38)
解决方法:
添加 hadoop.dll文件
此时运行会在E:\ 下生成bbb.txt文件夹,含有计算结果
用记事本打开即可
hadoop.dll 文件
链接: https://pan.baidu.com/s/1V6aA4J7T2N-1ivp_wkqgZw 提取码: ncva