package com.bjsxt.mr.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MainClass {
public static void main(String[] args) throws Exception {
//1.加载配置文件
Configuration conf=new Configuration(true);
//2.创建Job对象,用到用户配置文件
Job job=Job.getInstance(conf);
//3.设定Job的程序入口
job.setJarByClass(MainClass.class);
//4.设置Job名称
job.setJobName("mywordcount");
//5.设置mapper任务类
job.setMapperClass(WordCountMapper.class);
//6.设置reduce任务类
job.setReducerClass(WordCountReduce.class);
//7.指定文件从哪里读取,从HDFS加载一个输入文件给job来处理
FileInputFormat.addInputPath(job, new Path(args[0]));
//8.指定HDFS上不存在的一个路径作为job的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//FileOutputFormat.setOutputPath(job, new Path(args[1]));
//9.指定map输出键值对中的键的类型
job.setOutputKeyClass(Text.class);
//10.指定map输出键值对中值得类型
job.setOutputValueClass(LongWritable.class);
//11.设置reduce的任务数量
job.setNumReduceTasks(2);
//提交作业 等待作业的完成
job.waitForCompletion(true);
}
}
package com.bjsxt.mr.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
private LongWritable outValue = new LongWritable();
private Text outKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 将当前读取的行转换为String 类型
String lineString = value.toString();
String[] words = lineString.split(" ");
for (String word : words) {
outKey.set(word);
outValue.set(1L);
// 将计算出的结果写入环形缓冲区
context.write(outKey, outValue);
}
}
}
package com.bjsxt.mr.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReduce extends Reducer<Text, LongWritable, Text, LongWritable> {
private LongWritable outValue=new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long sum=0;
for (LongWritable longWritable : values) {
sum+=longWritable.get();
}
outValue.set(sum);
//将结果输出到HDFS的文件中
context.write(key, outValue);
}
}
[root@nodeok ~]# hdfs dfs -D dfs.replication=1 -D dfs.blocksize=1048576 -put hello.txt /
19/04/11 16:40:49 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@nodeok ~]# clear
[root@nodeok ~]# ls
anaconda-ks.cfg hadoop-2.6.5.tar.gz hello.txt install.log install.log.syslog jdk-7u80-linux-x64.rpm wordcount.jar
[root@nodeok ~]# yarn jar wordcount.jar com.bjsxt.mr.wordcount.MainClass /hello.txt /mr
19/04/11 16:41:19 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
19/04/11 16:41:21 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
19/04/11 16:41:22 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
19/04/11 16:41:23 INFO input.FileInputFormat: Total input paths to process : 1
19/04/11 16:41:23 INFO mapreduce.JobSubmitter: number of splits:2
19/04/11 16:41:24 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1554971932274_0001
19/04/11 16:41:25 INFO impl.YarnClientImpl: Submitted application application_1554971932274_0001
19/04/11 16:41:25 INFO mapreduce.Job: The url to track the job: http://nodeok:8088/proxy/application_1554971932274_0001/
19/04/11 16:41:25 INFO mapreduce.Job: Running job: job_1554971932274_0001
19/04/11 16:41:45 INFO mapreduce.Job: Job job_1554971932274_0001 running in uber mode : false
19/04/11 16:41:45 INFO mapreduce.Job: map 0% reduce 0%
19/04/11 16:42:16 INFO mapreduce.Job: map 83% reduce 0%
19/04/11 16:42:17 INFO mapreduce.Job: map 100% reduce 0%
19/04/11 16:42:47 INFO mapreduce.Job: map 100% reduce 71%
19/04/11 16:42:50 INFO mapreduce.Job: map 100% reduce 98%
19/04/11 16:42:51 INFO mapreduce.Job: map 100% reduce 100%
19/04/11 16:42:52 INFO mapreduce.Job: Job job_1554971932274_0001 completed successfully
19/04/11 16:42:52 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=4788907
FILE: Number of bytes written=10005548
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=1793177
HDFS: Number of bytes written=788921
HDFS: Number of read operations=12
HDFS: Number of large read operations=0
HDFS: Number of write operations=4
Job Counters
Launched map tasks=2
Launched reduce tasks=2
Data-local map tasks=2
Total time spent by all maps in occupied slots (ms)=57804
Total time spent by all reduces in occupied slots (ms)=61580
Total time spent by all map tasks (ms)=57804
Total time spent by all reduce tasks (ms)=61580
Total vcore-milliseconds taken by all map tasks=57804
Total vcore-milliseconds taken by all reduce tasks=61580
Total megabyte-milliseconds taken by all map tasks=59191296
Total megabyte-milliseconds taken by all reduce tasks=63057920
Map-Reduce Framework
Map input records=100000
Map output records=300000
Map output bytes=4188895
Map output materialized bytes=4788919
Input split bytes=186
Combine input records=0
Combine output records=0
Reduce input groups=100002
Reduce shuffle bytes=4788919
Reduce input records=300000
Reduce output records=100002
Spilled Records=600000
Shuffled Maps =4
Failed Shuffles=0
Merged Map outputs=4
GC time elapsed (ms)=2504
CPU time spent (ms)=16690
Physical memory (bytes) snapshot=586608640
Virtual memory (bytes) snapshot=3370262528
Total committed heap usage (bytes)=273997824
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=1792991
File Output Format Counters
Bytes Written=788921
[root@nodeok ~]# hdfs dfs -get /mr
19/04/11 16:44:42 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@nodeok ~]# ls
anaconda-ks.cfg hadoop-2.6.5.tar.gz hello.txt install.log install.log.syslog jdk-7u80-linux-x64.rpm mr wordcount.jar
[root@nodeok ~]# cd mr/
[root@nodeok mr]# ls
part-r-00000 part-r-00001 _SUCCESS
[root@nodeok mr]# ll -h
total 776K
-rw-r--r-- 1 root root 386K Apr 11 16:44 part-r-00000
-rw-r--r-- 1 root root 386K Apr 11 16:44 part-r-00001
-rw-r--r-- 1 root root 0 Apr 11 16:44 _SUCCESS
[root@nodeok mr]# vi part-r-00000
[root@nodeok mr]# vi part-r-00001