hadoop_mapreduce01_wordcount
(一)需求
统计单词个数。
1)输入数据:
/Users/art/Documents/demo_datas/wordcount_inputs/words.txt
hive emr hive
hive d2 emr odps
helloword hive
2)输出数据:
/Users/art/Documents/demo_datas/wordcount_inputs(要求运行前文件夹不存在)
期望输出结果:
d2 1
emr 2
helloword 1
hive 4
odps 1
(二)需求分析
1)输入数据
hive emr hive
hive d2 emr odps
helloword hive
2)输出数据
d2 1
emr 2
helloword 1
hive 4
odps 1
3)Mapper
3.1 将MapTask传递过来的文本内容转换成String
hive emr hive
3.2 根据切分建\t将line切分成words列表
hive
emr
hive
3.3 遍历words列表,输出<word,1>
hive,1
emr,1
hive,1
4)Reducer
4.1 汇总各个key的个数
hive,1
emr,1
hive,1
4.2 输出该key的总个数
emr,1
hive,2
5)Driver
5.1 获取配置信息,获取job对象实例
5.2 指定本程序的jar包所在的本地路径
5.3 关联Mapper/Reducer业务类
5.4 指定Mapper输出数据kv类型
5.5 指定最终输出数据kv类型
5.6 指定job的输入原始文件所在目录
5.7 指定job的输出结果所在目录
5.8 提交作业
6)理解成MR的八个步骤(还要改)
6.1 读文件,解析成<k1,v1>对;
6.2 自定义map逻辑,解析成<k2,v2>对;
6.3 分区,将相同的key分发到同一个reduce里,形成集合(*);
6.4 排序,按字典顺序排序;
6.5 规约,在map端做一次预聚合,减少k2输出;
6.6 分组,将相同的数据发送到同一组里,调用一次reduce逻辑;
6.7 自定义reduce逻辑:<k2,v2>解析成<k3,v3>;
6.8 输出,reduce完成后到数据输出。
(三)创建Maven工程
1)建Project
Group Id :com.art
Aritifact Id : hadoop_mr_2103
2)建module
Group Id :com.art
Aritifact Id : mr_wordcount_2110
3)module xml文件添加
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hadoop_mr_2103</artifactId>
<groupId>com.art</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>mr_wordcount_2110</artifactId>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
</dependency>
</dependencies>
</project>
4)project pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.art</groupId>
<artifactId>hadoop_mr_2103</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<modules>
<module>mr_wordcount_2110</module>
<module>mr_SerializationDeserialization_2111</module>
</modules>
</project>
5)在项目的src/main/resources 目录下,新建一个文件,命名为“log4j.properties”,在文件中输入:
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
不加该文件,运行报错:
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
(四)编写程序
1)编写Mapper类
package com.art.mapreduce.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* LongWritable:输入key类型 第0行
* Text:输入value类型 hive emr hive 一行
* Text:输出key类型 <hive,1> <emr,1> <hive,1> 的word
* IntWritable:输出value类型 <hive,1> <emr,1> <hive,1> 的1
*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
// 3.1 避免频繁new对象,从循环中提出来
Text k = new Text();
// 3.2
IntWritable v = new IntWritable(); //这里可以直接指定1
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1.读取一行
String line = value.toString();
// 2.按行切割
String[] words = line.split("\t");
// 3.输出<key,1>对
for (String word : words) {
// 3.1
k.set(word);
// 3.2
v.set(1);
context.write(k, v);
}
}
}
2)编写Reducer类
package com.art.mapreduce.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
int sum;
IntWritable v2 = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
// 1.对每个key的vlaue进行累加
sum = 0;
for (IntWritable value : values) {
sum += value.get(); // 序列化不能直接加 get方法获取
}
v2.set(sum);
// 2.输出<k2,v2>
context.write(key, v2);
}
}
3)编写Driver类
package com.art.mapreduce.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountDriver {
public static void main(String[] args) throws java.io.IOException, InterruptedException, ClassNotFoundException {
// idea测试运行路径
// args = new String[]{"/Users/art/Documents/demo_datas/wordcount_inputs/words.txt", "/Users/art/Documents/demo_datas/wordcount_outputs"};
// 1. 配置项和job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 2. driver
job.setJarByClass(WordCountDriver.class);
// 3. map及输出类
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 4. reduce及输出类
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 5. 输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 6. 提交
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
(五)测试
1)本地测试
解压编译过的hadoop-2.7.2到本地developer_tools;
设置本地环境变量;
art@art-Pro ~ % cat ~/.bash_profile
export PATH=$PATH:/usr/bin:/usr/sbin:/usr/local/bin
export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_251.jdk/Contents/Home
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export MAVEN_HOME=/Users/art/Documents/developer_tools/apachemaven/apache-maven-3.6.3
export HADOOP_PATH=/Users/art/Documents/developer_tools/hadoop2.7.2/hadoop-2.7.2
PATH=$PATH:$HADOOP_HOME/bin:$JAVA_HOME/bin:$
在IDEA上运行程序。
2)集群上测试
2.1 用maven打jar包,添加打包插件依赖(mainClass需要替换为工程主类Driver)
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.art.mapreduce.wordcount.WordCountDriver</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
2.2 打包,用不带依赖的jar
2.3 启动集群
start-dfs.sh
start-yarn.sh
2.4 jar包拷贝到集群上,准备测试数据,执行wordcount程序
# SecureCRT sftp模式:cmd+shirft+p
# 上传jar到linux
sftp> cd /opt/demo_jars/
sftp> put -r "/Users/art/Documents/#Java_original_script/hadoop_mr_2103/mr_wordcount_2110/target/mr_wordcount_2110-1.0-SNAPSHOT.jar"
Uploading mr_wordcount_2110-1.0-SNAPSHOT.jar to /opt/demo_jars/mr_wordcount_2110-1.0-SNAPSHOT.jar
100% 5KB 5KB/s 00:00:00
/Users/art/Documents/#Java_original_script/hadoop_mr_2103/mr_wordcount_2110/target/mr_wordcount_2110-1.0-SNAPSHOT.jar: 5840 bytes transferred in 0 seconds (5840 bytes/s)
sftp>
# hdfs上建两个文件夹 /user/user02/demo_jars /user/user02/demo_inputs
[user02@hadoop102 ~]$ hadoop fs -mkdir demo_datas
[user02@hadoop102 ~]$ hadoop fs -mkdir demo_jars
# 从linux上传数据到hdfs
[user02@hadoop102 inputs2110]$ hadoop fs -put /opt/demo_datas/inputs2110 /user/user02/demo_datas/inputs2110
# 执行wordcount。 hadoop102:50070 web页面下载查看结果
[user02@hadoop102 inputs2110]$ hadoop jar /opt/demo_jars/mr_wordcount_2110-1.0-SNAPSHOT.jar com.art.mapreduce.wordcount.WordCountDriver /user/user02/demo_datas/inputs2110 /user/user02/demo_datas/outputs2110
3)运行日志
# 本地
0000-00-00 00 :29:33,552 WARN [org.apache.hadoop.util.NativeCodeLoader] - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
0000-00-00 00 :29:33,635 INFO [org.apache.hadoop.conf.Configuration.deprecation] - session.id is deprecated. Instead, use dfs.metrics.session-id
0000-00-00 00 :29:33,636 INFO [org.apache.hadoop.metrics.jvm.JvmMetrics] - Initializing JVM Metrics with processName=JobTracker, sessionId=
0000-00-00 00 :29:33,962 WARN [org.apache.hadoop.mapreduce.JobResourceUploader] - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
0000-00-00 00 :29:33,966 WARN [org.apache.hadoop.mapreduce.JobResourceUploader] - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
0000-00-00 00 :29:33,971 INFO [org.apache.hadoop.mapreduce.lib.input.FileInputFormat] - Total input paths to process : 1
0000-00-00 00 :29:34,007 INFO [org.apache.hadoop.mapreduce.JobSubmitter] - number of splits:1
0000-00-00 00 :29:34,083 INFO [org.apache.hadoop.mapreduce.JobSubmitter] - Submitting tokens for job: job_local1634956528_0001
0000-00-00 00 :29:34,174 INFO [org.apache.hadoop.mapreduce.Job] - The url to track the job: http://localhost:8080/
0000-00-00 00 :29:34,182 INFO [org.apache.hadoop.mapreduce.Job] - Running job: job_local1634956528_0001
0000-00-00 00 :29:34,182 INFO [org.apache.hadoop.mapred.LocalJobRunner] - OutputCommitter set in config null
0000-00-00 00 :29:34,186 INFO [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] - File Output Committer Algorithm version is 1
0000-00-00 00 :29:34,187 INFO [org.apache.hadoop.mapred.LocalJobRunner] - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
0000-00-00 00 :29:34,216 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Waiting for map tasks
0000-00-00 00 :29:34,216 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Starting task: attempt_local1634956528_0001_m_000000_0
0000-00-00 00 :29:34,230 INFO [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] - File Output Committer Algorithm version is 1
0000-00-00 00 :29:34,234 INFO [org.apache.hadoop.yarn.util.ProcfsBasedProcessTree] - ProcfsBasedProcessTree currently is supported only on Linux.
0000-00-00 00 :29:34,234 INFO [org.apache.hadoop.mapred.Task] - Using ResourceCalculatorProcessTree : null
0000-00-00 00 :29:34,237 INFO [org.apache.hadoop.mapred.MapTask] - Processing split: file:/Users/art/Documents/demo_datas/wordcount_inputs/words.txt:0+45
0000-00-00 00 :29:34,256 INFO [org.apache.hadoop.mapred.MapTask] - (EQUATOR) 0 kvi 26214396(104857584)
0000-00-00 00 :29:34,256 INFO [org.apache.hadoop.mapred.MapTask] - mapreduce.task.io.sort.mb: 100
0000-00-00 00 :29:34,256 INFO [org.apache.hadoop.mapred.MapTask] - soft limit at 83886080
0000-00-00 00 :29:34,256 INFO [org.apache.hadoop.mapred.MapTask] - bufstart = 0; bufvoid = 104857600
0000-00-00 00 :29:34,256 INFO [org.apache.hadoop.mapred.MapTask] - kvstart = 26214396; length = 6553600
0000-00-00 00 :29:34,258 INFO [org.apache.hadoop.mapred.MapTask] - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
0000-00-00 00 :29:34,262 INFO [org.apache.hadoop.mapred.LocalJobRunner] -
0000-00-00 00 :29:34,262 INFO [org.apache.hadoop.mapred.MapTask] - Starting flush of map output
0000-00-00 00 :29:34,262 INFO [org.apache.hadoop.mapred.MapTask] - Spilling map output
0000-00-00 00 :29:34,262 INFO [org.apache.hadoop.mapred.MapTask] - bufstart = 0; bufend = 82; bufvoid = 104857600
0000-00-00 00 :29:34,262 INFO [org.apache.hadoop.mapred.MapTask] - kvstart = 26214396(104857584); kvend = 26214364(104857456); length = 33/6553600
0000-00-00 00 :29:34,269 INFO [org.apache.hadoop.mapred.MapTask] - Finished spill 0
0000-00-00 00 :29:34,271 INFO [org.apache.hadoop.mapred.Task] - Task:attempt_local1634956528_0001_m_000000_0 is done. And is in the process of committing
0000-00-00 00 :29:34,275 INFO [org.apache.hadoop.mapred.LocalJobRunner] - map
0000-00-00 00 :29:34,275 INFO [org.apache.hadoop.mapred.Task] - Task 'attempt_local1634956528_0001_m_000000_0' done.
0000-00-00 00 :29:34,275 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Finishing task: attempt_local1634956528_0001_m_000000_0
0000-00-00 00 :29:34,275 INFO [org.apache.hadoop.mapred.LocalJobRunner] - map task executor complete.
0000-00-00 00 :29:34,277 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Waiting for reduce tasks
0000-00-00 00 :29:34,277 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Starting task: attempt_local1634956528_0001_r_000000_0
0000-00-00 00 :29:34,280 INFO [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] - File Output Committer Algorithm version is 1
0000-00-00 00 :29:34,281 INFO [org.apache.hadoop.yarn.util.ProcfsBasedProcessTree] - ProcfsBasedProcessTree currently is supported only on Linux.
0000-00-00 00 :29:34,281 INFO [org.apache.hadoop.mapred.Task] - Using ResourceCalculatorProcessTree : null
0000-00-00 00 :29:34,282 INFO [org.apache.hadoop.mapred.ReduceTask] - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@2ad92f58
0000-00-00 00 :29:34,290 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - MergerManager: memoryLimit=5345011200, maxSingleShuffleLimit=1336252800, mergeThreshold=3527707648, ioSortFactor=10, memToMemMergeOutputsThreshold=10
0000-00-00 00 :29:34,291 INFO [org.apache.hadoop.mapreduce.task.reduce.EventFetcher] - attempt_local1634956528_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
0000-00-00 00 :29:34,315 INFO [org.apache.hadoop.mapreduce.task.reduce.LocalFetcher] - localfetcher#1 about to shuffle output of map attempt_local1634956528_0001_m_000000_0 decomp: 102 len: 106 to MEMORY
0000-00-00 00 :29:34,323 INFO [org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput] - Read 102 bytes from map-output for attempt_local1634956528_0001_m_000000_0
0000-00-00 00 :29:34,324 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - closeInMemoryFile -> map-output of size: 102, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->102
0000-00-00 00 :29:34,326 INFO [org.apache.hadoop.mapreduce.task.reduce.EventFetcher] - EventFetcher is interrupted.. Returning
0000-00-00 00 :29:34,331 INFO [org.apache.hadoop.mapred.LocalJobRunner] - 1 / 1 copied.
0000-00-00 00 :29:34,332 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
0000-00-00 00 :29:34,353 INFO [org.apache.hadoop.mapred.Merger] - Merging 1 sorted segments
0000-00-00 00 :29:34,354 INFO [org.apache.hadoop.mapred.Merger] - Down to the last merge-pass, with 1 segments left of total size: 97 bytes
0000-00-00 00 :29:34,355 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - Merged 1 segments, 102 bytes to disk to satisfy reduce memory limit
0000-00-00 00 :29:34,355 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - Merging 1 files, 106 bytes from disk
0000-00-00 00 :29:34,355 INFO [org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl] - Merging 0 segments, 0 bytes from memory into reduce
0000-00-00 00 :29:34,355 INFO [org.apache.hadoop.mapred.Merger] - Merging 1 sorted segments
0000-00-00 00 :29:34,356 INFO [org.apache.hadoop.mapred.Merger] - Down to the last merge-pass, with 1 segments left of total size: 97 bytes
0000-00-00 00 :29:34,356 INFO [org.apache.hadoop.mapred.LocalJobRunner] - 1 / 1 copied.
0000-00-00 00 :29:34,364 INFO [org.apache.hadoop.conf.Configuration.deprecation] - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
0000-00-00 00 :29:34,367 INFO [org.apache.hadoop.mapred.Task] - Task:attempt_local1634956528_0001_r_000000_0 is done. And is in the process of committing
0000-00-00 00 :29:34,368 INFO [org.apache.hadoop.mapred.LocalJobRunner] - 1 / 1 copied.
0000-00-00 00 :29:34,368 INFO [org.apache.hadoop.mapred.Task] - Task attempt_local1634956528_0001_r_000000_0 is allowed to commit now
0000-00-00 00 :29:34,369 INFO [org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter] - Saved output of task 'attempt_local1634956528_0001_r_000000_0' to file:/Users/art/Documents/demo_datas/wordcount_outputs/_temporary/0/task_local1634956528_0001_r_000000
0000-00-00 00 :29:34,369 INFO [org.apache.hadoop.mapred.LocalJobRunner] - reduce > reduce
0000-00-00 00 :29:34,370 INFO [org.apache.hadoop.mapred.Task] - Task 'attempt_local1634956528_0001_r_000000_0' done.
0000-00-00 00 :29:34,370 INFO [org.apache.hadoop.mapred.LocalJobRunner] - Finishing task: attempt_local1634956528_0001_r_000000_0
0000-00-00 00 :29:34,370 INFO [org.apache.hadoop.mapred.LocalJobRunner] - reduce task executor complete.
0000-00-00 00 :29:35,189 INFO [org.apache.hadoop.mapreduce.Job] - Job job_local1634956528_0001 running in uber mode : false
0000-00-00 00 :29:35,191 INFO [org.apache.hadoop.mapreduce.Job] - map 100% reduce 100%
0000-00-00 00 :29:35,192 INFO [org.apache.hadoop.mapreduce.Job] - Job job_local1634956528_0001 completed successfully
0000-00-00 00 :29:35,199 INFO [org.apache.hadoop.mapreduce.Job] - Counters: 30
File System Counters
FILE: Number of bytes read=706
FILE: Number of bytes written=564811
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=3
Map output records=9
Map output bytes=82
Map output materialized bytes=106
Input split bytes=132
Combine input records=0
Combine output records=0
Reduce input groups=5
Reduce shuffle bytes=106
Reduce input records=9
Reduce output records=5
Spilled Records=18
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=5
Total committed heap usage (bytes)=1029701632
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=45
File Output Format Counters
Bytes Written=49
Process finished with exit code 0
# 集群
[user02@hadoop102 ~]$ hadoop jar /opt/demo_jars/mr_wordcount_2110-1.0-SNAPSHOT.jar com.art.mapreduce.wordcount.WordCountDriver /user/user02/demo_datas/inputs2110 /user/user02/demo_datas/outputs2110
00/00/00 00:39:24 INFO client.RMProxy: Connecting to ResourceManager at hadoop103/192.168.1.103:8032
00/00/00 00:39:24 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
00/00/00 00:39:24 INFO input.FileInputFormat: Total input paths to process : 1
00/00/00 00:39:24 INFO mapreduce.JobSubmitter: number of splits:1
00/00/00 00:39:24 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1636905362738_0002
00/00/00 00:39:24 INFO impl.YarnClientImpl: Submitted application application_1636905362738_0002
00/00/00 00:39:24 INFO mapreduce.Job: The url to track the job: http://hadoop103:8088/proxy/application_1636905362738_0002/
00/00/00 00:39:24 INFO mapreduce.Job: Running job: job_1636905362738_0002
00/00/00 00:39:29 INFO mapreduce.Job: Job job_1636905362738_0002 running in uber mode : false
00/00/00 00:39:29 INFO mapreduce.Job: map 0% reduce 0%
00/00/00 00:39:34 INFO mapreduce.Job: map 100% reduce 0%
00/00/00 00:39:39 INFO mapreduce.Job: map 100% reduce 100%
00/00/00 00:39:39 INFO mapreduce.Job: Job job_1636905362738_0002 completed successfully
00/00/00 00:39:39 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=106
FILE: Number of bytes written=235893
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=176
HDFS: Number of bytes written=37
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=1480
Total time spent by all reduces in occupied slots (ms)=2309
Total time spent by all map tasks (ms)=1480
Total time spent by all reduce tasks (ms)=2309
Total vcore-milliseconds taken by all map tasks=1480
Total vcore-milliseconds taken by all reduce tasks=2309
Total megabyte-milliseconds taken by all map tasks=1515520
Total megabyte-milliseconds taken by all reduce tasks=2364416
Map-Reduce Framework
Map input records=3
Map output records=9
Map output bytes=82
Map output materialized bytes=106
Input split bytes=130
Combine input records=0
Combine output records=0
Reduce input groups=5
Reduce shuffle bytes=106
Reduce input records=9
Reduce output records=5
Spilled Records=18
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=84
CPU time spent (ms)=700
Physical memory (bytes) snapshot=432156672
Virtual memory (bytes) snapshot=4438966272
Total committed heap usage (bytes)=277348352
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=46
File Output Format Counters
Bytes Written=37
[user02@hadoop102 inputs2110]$
done