1 本地运行模式
- mapreduce程序是被提交给LocalJobRunner在本地以单进程的形式运行
- 而处理的数据及输出结果可以在本地文件系统,也可以在hdfs上
- 实现本地运行?不要带集群的配置文件(本质是mr程序的conf中是否有mapreduce.framework.name=local以及yarn.resourcemanager.hostname参数)
- 本地模式非常便于进行业务逻辑的debug,只要在IDE中打断点即可
- 在windows下想运行本地模式来测试程序逻辑,需要在windows中配置环境变量,j将hadoop的lib和bin目录替换成windows平台编译的版本
Configuration conf = new Configuration();
conf.set("mapreduce.framework.name","local");
//本地模式运行mr,输入输出的数据可以在本地,也可以在hdfs
// conf.set("fs.defaultFS","hdfs://node1:9000");
conf.set("fs.defaultFS","file:///");
1.1 访问本地
package wcdemo;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/*
* KEYIN--默认是mr框架读到的一行文本的起始偏移量,Long
* hadoop有自己的更加精简的序列化接口,用LongWritable
*
* VALUES--默认是mr框架所读到的一行文本的内容,String
*
* KEYOUT--用户自定义逻辑处理完成后输出数据的key,在此处是单词,String
* VALUEOUT--用户自定义逻辑处理完成之后输出数据中的value,此处是单词次数,Integer
*
* */
public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
/*
* map阶段的业务逻辑就写在自定义的map()
* maptask会对每一行的输入数据调用自定义的map()方法
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] words = line.split(" ");
//将单词输出为<单词,1>
for(String word:words){
//相同的单词分发给相同的reduce
context.write(new Text(word),new IntWritable(1));
}
}
}
package wcdemo;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/*
* KEYIN ,VALUEIN 对应mapper输出的KEYOUT,VALUEOUT类型
* */
public class WordcountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
/*
* key--一组相同单词kv对的key
*
* */
int count =0;
/*
* Iterator<IntWritable> iterator = values.iterator();
* while(iterator.hasNext()){
* count += iterator.next().get();
* }
* */
for(IntWritable value:values){
count += value.get();
}
context.write(key,new IntWritable(count));
}
}
package wcdemo;
/*
*
* 相当于yarn 集群的客户端
* 封装mr程序的运行参数,指定jar包
* 最后提交给yarn
* */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordcountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
//访问本地
conf.set("mapreduce.framework.name","local");
conf.set("fs.defaultFS","file:///");
/*conf.set("mapreduce.framework.name","yarn");
conf.set("yarn.resourcemanager.hostname","node1");
conf.set("fs.defaultFS","hdfs://node1:9000/");*/
Job job = Job.getInstance(conf);
//job.setJar("/home/hadoop/wc.jar");
job.setJarByClass(WordcountDriver.class);
job.setMapperClass(WordcountMapper.class);
job.setReducerClass(WordcountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//指定最终输出的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//指定job的输入原始文件目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//将job配置的参数,以及job所用的java类所在的jar包提交给yarn去运行
//job.submit();
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
}
1.1.1 本地执行结果
D:\Java\jdk1.8.0_171\bin\java "-javaagent:D:\JetBrains\IntelliJ IDEA 2017.3.4\lib\idea_rt.jar=55149:D:\JetBrains\IntelliJ IDEA 2017.3.4\bin" -Dfile.encoding=UTF-8 -classpath D:\Java\jdk1.8.0_171\jre\lib\charsets.jar;D:\Java\jdk1.8.0_171\jre\lib\deploy.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\access-bridge-64.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\cldrdata.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\dnsns.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\jaccess.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\jfxrt.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\localedata.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\nashorn.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunec.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunjce_provider.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunmscapi.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunpkcs11.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\zipfs.jar;D:\Java\jdk1.8.0_171\jre\lib\javaws.jar;D:\Java\jdk1.8.0_171\jre\lib\jce.jar;D:\Java\jdk1.8.0_171\jre\lib\jfr.jar;D:\Java\jdk1.8.0_171\jre\lib\jfxswt.jar;D:\Java\jdk1.8.0_171\jre\lib\jsse.jar;D:\Java\jdk1.8.0_171\jre\lib\management-agent.jar;D:\Java\jdk1.8.0_171\jre\lib\plugin.jar;D:\Java\jdk1.8.0_171\jre\lib\resources.jar;D:\Java\jdk1.8.0_171\jre\lib\rt.jar;E:\JavaProject\mapreduce\out\production\mapreduce;D:\hadoop-2.7.6\share\hadoop\common\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\avro-1.7.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\gson-2.2.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\junit-4.11.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsch-0.1.54.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsp-api-2.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\xmlenc-0.52.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\guava-11.0.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jets3t-0.9.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jettison-1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsr305-3.0.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\paranamer-2.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\activation-1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\httpcore-4.2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jaxb-api-2.2.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\stax-api-1.0-2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-cli-1.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-net-3.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-json-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\servlet-api-2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\zookeeper-3.4.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-lang-2.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\httpclient-4.2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\slf4j-api-1.7.10.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-codec-1.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hadoop-auth-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hamcrest-core-1.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-xc-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jaxb-impl-2.2.3-1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-util-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\mockito-all-1.8.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\api-util-1.0.0-M20.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-math3-3.1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\java-xmlbuilder-0.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\snappy-java-1.0.4.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-digester-1.8.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-client-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-jaxrs-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\slf4j-log4j12-1.7.10.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-logging-1.1.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-recipes-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\api-asn1-api-1.0.0-M20.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-httpclient-3.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-sslengine-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\apacheds-i18n-2.0.0-M15.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-beanutils-1.7.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-framework-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hadoop-annotations-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-collections-3.2.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-configuration-1.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-beanutils-core-1.8.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\htrace-core-3.1.0-incubating.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\apacheds-kerberos-codec-2.0.0-M15.jar;D:\hadoop-2.7.6\share\hadoop\common\hadoop-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\guice-3.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\avro-1.7.4.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\junit-4.11.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\paranamer-2.3.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\javax.inject-1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\aopalliance-1.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-guice-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\guice-servlet-3.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\hamcrest-core-1.3.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\leveldbjni-all-1.8.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\snappy-java-1.0.4.1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\hadoop-annotations-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-app-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-core-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-plugins-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.7.6-tests.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-shuffle-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guice-3.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guava-11.0.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jettison-1.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jetty-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jsr305-3.0.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\activation-1.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\javax.inject-1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jaxb-api-2.2.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\stax-api-1.0-2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\aopalliance-1.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-cli-1.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-json-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\servlet-api-2.5.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\zookeeper-3.4.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-lang-2.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-guice-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-codec-1.4.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guice-servlet-3.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-xc-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jaxb-impl-2.2.3-1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-client-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jetty-util-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\leveldbjni-all-1.8.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-jaxrs-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-logging-1.1.3.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\zookeeper-3.4.6-tests.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-collections-3.2.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-api-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-applications-distributedshell-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-applications-unmanaged-am-launcher-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-client-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-registry-2.7.6.jar wcdemo.WordcountDriver h:\wc\input h:\wc\output
2018-08-07 21:24:42,497 INFO [main] jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2018-08-07 21:24:44,822 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(64)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2018-08-07 21:24:44,897 WARN [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set. User classes may not be found. See Job or Job#setJar(String).
2018-08-07 21:24:45,161 INFO [main] input.FileInputFormat (FileInputFormat.java:listStatus(283)) - Total input paths to process : 2
2018-08-07 21:24:45,229 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(198)) - number of splits:2
2018-08-07 21:24:45,497 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(287)) - Submitting tokens for job: job_local1711392538_0001
2018-08-07 21:24:45,795 INFO [main] mapreduce.Job (Job.java:submit(1294)) - The url to track the job: http://localhost:8080/
2018-08-07 21:24:45,796 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1339)) - Running job: job_local1711392538_0001
2018-08-07 21:24:45,808 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2018-08-07 21:24:45,821 INFO [Thread-5] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:45,823 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2018-08-07 21:24:46,017 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2018-08-07 21:24:46,018 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,064 INFO [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,075 INFO [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,124 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(614)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3e8cb5ad
2018-08-07 21:24:46,133 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/h:/wc/input/b.txt:0+332
2018-08-07 21:24:46,212 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2018-08-07 21:24:46,213 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2018-08-07 21:24:46,213 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2018-08-07 21:24:46,213 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2018-08-07 21:24:46,213 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2018-08-07 21:24:46,217 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2018-08-07 21:24:46,233 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2018-08-07 21:24:46,233 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2018-08-07 21:24:46,233 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1482)) - Spilling map output
2018-08-07 21:24:46,234 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1483)) - bufstart = 0; bufend = 548; bufvoid = 104857600
2018-08-07 21:24:46,234 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1485)) - kvstart = 26214396(104857584); kvend = 26214180(104856720); length = 217/6553600
2018-08-07 21:24:46,283 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1667)) - Finished spill 0
2018-08-07 21:24:46,294 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_m_000000_0 is done. And is in the process of committing
2018-08-07 21:24:46,311 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2018-08-07 21:24:46,312 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_m_000000_0' done.
2018-08-07 21:24:46,321 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_m_000000_0: Counters: 17
File System Counters
FILE: Number of bytes read=579
FILE: Number of bytes written=216547
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=4
Map output records=55
Map output bytes=548
Map output materialized bytes=664
Input split bytes=88
Combine input records=0
Spilled Records=55
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=8
Total committed heap usage (bytes)=234356736
File Input Format Counters
Bytes Read=332
2018-08-07 21:24:46,321 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,322 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,324 INFO [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,325 INFO [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,379 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(614)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@b3a77de
2018-08-07 21:24:46,381 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/h:/wc/input/a.txt:0+308
2018-08-07 21:24:46,458 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2018-08-07 21:24:46,458 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2018-08-07 21:24:46,458 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2018-08-07 21:24:46,459 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2018-08-07 21:24:46,459 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2018-08-07 21:24:46,459 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2018-08-07 21:24:46,463 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) -
2018-08-07 21:24:46,463 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2018-08-07 21:24:46,463 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1482)) - Spilling map output
2018-08-07 21:24:46,463 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1483)) - bufstart = 0; bufend = 504; bufvoid = 104857600
2018-08-07 21:24:46,464 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1485)) - kvstart = 26214396(104857584); kvend = 26214200(104856800); length = 197/6553600
2018-08-07 21:24:46,508 INFO [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1667)) - Finished spill 0
2018-08-07 21:24:46,515 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_m_000001_0 is done. And is in the process of committing
2018-08-07 21:24:46,520 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2018-08-07 21:24:46,520 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_m_000001_0' done.
2018-08-07 21:24:46,521 INFO [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_m_000001_0: Counters: 17
File System Counters
FILE: Number of bytes read=1082
FILE: Number of bytes written=217189
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=4
Map output records=50
Map output bytes=504
Map output materialized bytes=610
Input split bytes=88
Combine input records=0
Spilled Records=50
Failed Shuffles=0
Merged Map outputs=0
GC time elapsed (ms)=0
Total committed heap usage (bytes)=339738624
File Input Format Counters
Bytes Read=308
2018-08-07 21:24:46,522 INFO [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,524 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2018-08-07 21:24:46,529 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2018-08-07 21:24:46,530 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local1711392538_0001_r_000000_0
2018-08-07 21:24:46,547 INFO [pool-3-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,548 INFO [pool-3-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,629 INFO [pool-3-thread-1] mapred.Task (Task.java:initialize(614)) - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3138b036
2018-08-07 21:24:46,635 INFO [pool-3-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@227e0734
2018-08-07 21:24:46,652 INFO [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(205)) - MergerManager: memoryLimit=1318269696, maxSingleShuffleLimit=329567424, mergeThreshold=870058048, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-08-07 21:24:46,659 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1711392538_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2018-08-07 21:24:46,721 INFO [localfetcher#1] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(144)) - localfetcher#1 about to shuffle output of map attempt_local1711392538_0001_m_000001_0 decomp: 606 len: 610 to MEMORY
2018-08-07 21:24:46,744 INFO [localfetcher#1] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 606 bytes from map-output for attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,749 INFO [localfetcher#1] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(319)) - closeInMemoryFile -> map-output of size: 606, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->606
2018-08-07 21:24:46,783 INFO [localfetcher#1] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(144)) - localfetcher#1 about to shuffle output of map attempt_local1711392538_0001_m_000000_0 decomp: 660 len: 664 to MEMORY
2018-08-07 21:24:46,788 INFO [localfetcher#1] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 660 bytes from map-output for attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,789 INFO [localfetcher#1] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(319)) - closeInMemoryFile -> map-output of size: 660, inMemoryMapOutputs.size() -> 2, commitMemory -> 606, usedMemory ->1266
2018-08-07 21:24:46,793 INFO [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-08-07 21:24:46,796 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,796 INFO [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(691)) - finalMerge called with 2 in-memory map-outputs and 0 on-disk map-outputs
2018-08-07 21:24:46,812 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local1711392538_0001 running in uber mode : false
2018-08-07 21:24:46,814 INFO [pool-3-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 2 sorted segments
2018-08-07 21:24:46,815 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - map 100% reduce 0%
2018-08-07 21:24:46,815 INFO [pool-3-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 2 segments left of total size: 1260 bytes
2018-08-07 21:24:46,823 INFO [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(758)) - Merged 2 segments, 1266 bytes to disk to satisfy reduce memory limit
2018-08-07 21:24:46,824 INFO [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(788)) - Merging 1 files, 1268 bytes from disk
2018-08-07 21:24:46,825 INFO [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(803)) - Merging 0 segments, 0 bytes from memory into reduce
2018-08-07 21:24:46,825 INFO [pool-3-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-08-07 21:24:46,827 INFO [pool-3-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 1261 bytes
2018-08-07 21:24:46,830 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,836 INFO [pool-3-thread-1] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1243)) - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2018-08-07 21:24:46,886 INFO [pool-3-thread-1] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_r_000000_0 is done. And is in the process of committing
2018-08-07 21:24:46,888 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,888 INFO [pool-3-thread-1] mapred.Task (Task.java:commit(1225)) - Task attempt_local1711392538_0001_r_000000_0 is allowed to commit now
2018-08-07 21:24:46,910 INFO [pool-3-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(535)) - Saved output of task 'attempt_local1711392538_0001_r_000000_0' to file:/h:/wc/output/_temporary/0/task_local1711392538_0001_r_000000
2018-08-07 21:24:46,912 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2018-08-07 21:24:46,912 INFO [pool-3-thread-1] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_r_000000_0' done.
2018-08-07 21:24:46,913 INFO [pool-3-thread-1] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_r_000000_0: Counters: 24
File System Counters
FILE: Number of bytes read=3688
FILE: Number of bytes written=219182
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Combine input records=0
Combine output records=0
Reduce input groups=85
Reduce shuffle bytes=1274
Reduce input records=105
Reduce output records=85
Spilled Records=105
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=0
Total committed heap usage (bytes)=339738624
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Output Format Counters
Bytes Written=725
2018-08-07 21:24:46,913 INFO [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1711392538_0001_r_000000_0
2018-08-07 21:24:46,914 INFO [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2018-08-07 21:24:47,816 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - map 100% reduce 100%
2018-08-07 21:24:47,817 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1378)) - Job job_local1711392538_0001 completed successfully
2018-08-07 21:24:47,828 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30
File System Counters
FILE: Number of bytes read=5349
FILE: Number of bytes written=652918
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=8
Map output records=105
Map output bytes=1052
Map output materialized bytes=1274
Input split bytes=176
Combine input records=0
Combine output records=0
Reduce input groups=85
Reduce shuffle bytes=1274
Reduce input records=105
Reduce output records=85
Spilled Records=210
Shuffled Maps =2
Failed Shuffles=0
Merged Map outputs=2
GC time elapsed (ms)=8
Total committed heap usage (bytes)=913833984
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=640
File Output Format Counters
Bytes Written=725
Process finished with exit code 0
1.2 程序Windows本地运行,文件在hdfs
修改此处代码
Configuration conf = new Configuration();
//访问本地
conf.set("mapreduce.framework.name","local");
conf.set("fs.defaultFS","hdfs://node1:9000/");
//conf.set("fs.defaultFS","file:///");
/*conf.set("mapreduce.framework.name","yarn");
conf.set("yarn.resourcemanager.hostname","node1");
conf.set("fs.defaultFS","hdfs://node1:9000/");*/
Job job = Job.getInstance(conf);
1.3 本地调试
待统计的数据
hello apple apple
hello jack
hello mark
hello jerry jerry
1.3.1 打断点
1.3.2 开始调试