MapReduce学习笔记(3)—— mapreduce程序本地运行模式

1 本地运行模式

  • mapreduce程序是被提交给LocalJobRunner在本地以单进程的形式运行
  • 而处理的数据及输出结果可以在本地文件系统,也可以在hdfs上
  • 实现本地运行?不要带集群的配置文件(本质是mr程序的conf中是否有mapreduce.framework.name=local以及yarn.resourcemanager.hostname参数)
  • 本地模式非常便于进行业务逻辑的debug,只要在IDE中打断点即可
  • 在windows下想运行本地模式来测试程序逻辑,需要在windows中配置环境变量,j将hadoop的lib和bin目录替换成windows平台编译的版本
 Configuration conf = new Configuration();

        conf.set("mapreduce.framework.name","local");

        //本地模式运行mr,输入输出的数据可以在本地,也可以在hdfs
//        conf.set("fs.defaultFS","hdfs://node1:9000");
        conf.set("fs.defaultFS","file:///");

1.1 访问本地

package wcdemo;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/*
 * KEYIN--默认是mr框架读到的一行文本的起始偏移量,Long
 * hadoop有自己的更加精简的序列化接口,用LongWritable
 *
 * VALUES--默认是mr框架所读到的一行文本的内容,String
 *
 * KEYOUT--用户自定义逻辑处理完成后输出数据的key,在此处是单词,String
 * VALUEOUT--用户自定义逻辑处理完成之后输出数据中的value,此处是单词次数,Integer
 *
 * */
public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    /*
     * map阶段的业务逻辑就写在自定义的map()
     * maptask会对每一行的输入数据调用自定义的map()方法
     * */
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();
        String[] words = line.split(" ");

        //将单词输出为<单词,1>
        for(String word:words){
            //相同的单词分发给相同的reduce
            context.write(new Text(word),new IntWritable(1));
        }

    }
}
package wcdemo;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/*
* KEYIN ,VALUEIN 对应mapper输出的KEYOUT,VALUEOUT类型
* */

public class WordcountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        /*
        * key--一组相同单词kv对的key
        *
        * */
        int count =0;

        /*
        * Iterator<IntWritable> iterator = values.iterator();
        * while(iterator.hasNext()){
        *   count += iterator.next().get();
        * }
        * */

        for(IntWritable value:values){
             count += value.get();
        }

        context.write(key,new IntWritable(count));

    }
}
package wcdemo;


/*
 *
 * 相当于yarn 集群的客户端
 * 封装mr程序的运行参数,指定jar包
 * 最后提交给yarn
 * */

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;


public class WordcountDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration conf = new Configuration();

        //访问本地
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","file:///");


        /*conf.set("mapreduce.framework.name","yarn");
        conf.set("yarn.resourcemanager.hostname","node1");
        conf.set("fs.defaultFS","hdfs://node1:9000/");*/


        Job job = Job.getInstance(conf);


        //job.setJar("/home/hadoop/wc.jar");
        job.setJarByClass(WordcountDriver.class);

        job.setMapperClass(WordcountMapper.class);
        job.setReducerClass(WordcountReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        //指定最终输出的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);


        //指定job的输入原始文件目录
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));


        //将job配置的参数,以及job所用的java类所在的jar包提交给yarn去运行
        //job.submit();
        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0 : 1);
    }
}

1.1.1 本地执行结果

这里写图片描述

D:\Java\jdk1.8.0_171\bin\java "-javaagent:D:\JetBrains\IntelliJ IDEA 2017.3.4\lib\idea_rt.jar=55149:D:\JetBrains\IntelliJ IDEA 2017.3.4\bin" -Dfile.encoding=UTF-8 -classpath D:\Java\jdk1.8.0_171\jre\lib\charsets.jar;D:\Java\jdk1.8.0_171\jre\lib\deploy.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\access-bridge-64.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\cldrdata.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\dnsns.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\jaccess.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\jfxrt.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\localedata.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\nashorn.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunec.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunjce_provider.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunmscapi.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\sunpkcs11.jar;D:\Java\jdk1.8.0_171\jre\lib\ext\zipfs.jar;D:\Java\jdk1.8.0_171\jre\lib\javaws.jar;D:\Java\jdk1.8.0_171\jre\lib\jce.jar;D:\Java\jdk1.8.0_171\jre\lib\jfr.jar;D:\Java\jdk1.8.0_171\jre\lib\jfxswt.jar;D:\Java\jdk1.8.0_171\jre\lib\jsse.jar;D:\Java\jdk1.8.0_171\jre\lib\management-agent.jar;D:\Java\jdk1.8.0_171\jre\lib\plugin.jar;D:\Java\jdk1.8.0_171\jre\lib\resources.jar;D:\Java\jdk1.8.0_171\jre\lib\rt.jar;E:\JavaProject\mapreduce\out\production\mapreduce;D:\hadoop-2.7.6\share\hadoop\common\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\avro-1.7.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\gson-2.2.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\junit-4.11.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsch-0.1.54.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsp-api-2.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\xmlenc-0.52.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\guava-11.0.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jets3t-0.9.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jettison-1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jsr305-3.0.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\paranamer-2.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\activation-1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\httpcore-4.2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jaxb-api-2.2.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\stax-api-1.0-2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-cli-1.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-net-3.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-json-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\servlet-api-2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\zookeeper-3.4.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-lang-2.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\httpclient-4.2.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\slf4j-api-1.7.10.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-codec-1.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hadoop-auth-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hamcrest-core-1.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-xc-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jaxb-impl-2.2.3-1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-util-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\mockito-all-1.8.5.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\api-util-1.0.0-M20.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-math3-3.1.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\java-xmlbuilder-0.4.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\snappy-java-1.0.4.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-digester-1.8.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-client-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-jaxrs-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\slf4j-log4j12-1.7.10.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-logging-1.1.3.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-recipes-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\api-asn1-api-1.0.0-M20.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-httpclient-3.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jetty-sslengine-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\apacheds-i18n-2.0.0-M15.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-beanutils-1.7.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\curator-framework-2.7.1.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\hadoop-annotations-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-collections-3.2.2.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-configuration-1.6.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\commons-beanutils-core-1.8.0.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\htrace-core-3.1.0-incubating.jar;D:\hadoop-2.7.6\share\hadoop\common\lib\apacheds-kerberos-codec-2.0.0-M15.jar;D:\hadoop-2.7.6\share\hadoop\common\hadoop-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\guice-3.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\avro-1.7.4.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\junit-4.11.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\paranamer-2.3.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\javax.inject-1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\aopalliance-1.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-guice-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\guice-servlet-3.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\hamcrest-core-1.3.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\leveldbjni-all-1.8.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\snappy-java-1.0.4.1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\hadoop-annotations-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-app-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-core-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-hs-plugins-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.7.6-tests.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-jobclient-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\mapreduce\hadoop-mapreduce-client-shuffle-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\xz-1.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\asm-3.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guice-3.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guava-11.0.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jettison-1.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jetty-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jsr305-3.0.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\log4j-1.2.17.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\activation-1.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-io-2.4.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\javax.inject-1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jaxb-api-2.2.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\stax-api-1.0-2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\aopalliance-1.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-cli-1.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-core-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-json-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\servlet-api-2.5.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\zookeeper-3.4.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-lang-2.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-guice-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-codec-1.4.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\guice-servlet-3.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-xc-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jaxb-impl-2.2.3-1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-client-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jersey-server-1.9.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jetty-util-6.1.26.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\netty-3.6.2.Final.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\leveldbjni-all-1.8.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\protobuf-java-2.5.0.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-jaxrs-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-logging-1.1.3.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\zookeeper-3.4.6-tests.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-compress-1.4.1.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-core-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\commons-collections-3.2.2.jar;D:\hadoop-2.7.6\share\hadoop\yarn\lib\jackson-mapper-asl-1.9.13.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-api-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-applications-distributedshell-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-applications-unmanaged-am-launcher-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-client-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-common-2.7.6.jar;D:\hadoop-2.7.6\share\hadoop\yarn\hadoop-yarn-registry-2.7.6.jar wcdemo.WordcountDriver h:\wc\input h:\wc\output
2018-08-07 21:24:42,497 INFO  [main] jvm.JvmMetrics (JvmMetrics.java:init(76)) - Initializing JVM Metrics with processName=JobTracker, sessionId=
2018-08-07 21:24:44,822 WARN  [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(64)) - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2018-08-07 21:24:44,897 WARN  [main] mapreduce.JobResourceUploader (JobResourceUploader.java:uploadFiles(171)) - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
2018-08-07 21:24:45,161 INFO  [main] input.FileInputFormat (FileInputFormat.java:listStatus(283)) - Total input paths to process : 2
2018-08-07 21:24:45,229 INFO  [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(198)) - number of splits:2
2018-08-07 21:24:45,497 INFO  [main] mapreduce.JobSubmitter (JobSubmitter.java:printTokens(287)) - Submitting tokens for job: job_local1711392538_0001
2018-08-07 21:24:45,795 INFO  [main] mapreduce.Job (Job.java:submit(1294)) - The url to track the job: http://localhost:8080/
2018-08-07 21:24:45,796 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1339)) - Running job: job_local1711392538_0001
2018-08-07 21:24:45,808 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(471)) - OutputCommitter set in config null
2018-08-07 21:24:45,821 INFO  [Thread-5] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:45,823 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:createOutputCommitter(489)) - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
2018-08-07 21:24:46,017 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for map tasks
2018-08-07 21:24:46,018 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,064 INFO  [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,075 INFO  [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,124 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(614)) -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3e8cb5ad
2018-08-07 21:24:46,133 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/h:/wc/input/b.txt:0+332
2018-08-07 21:24:46,212 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2018-08-07 21:24:46,213 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2018-08-07 21:24:46,213 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2018-08-07 21:24:46,213 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2018-08-07 21:24:46,213 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2018-08-07 21:24:46,217 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2018-08-07 21:24:46,233 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 
2018-08-07 21:24:46,233 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2018-08-07 21:24:46,233 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1482)) - Spilling map output
2018-08-07 21:24:46,234 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1483)) - bufstart = 0; bufend = 548; bufvoid = 104857600
2018-08-07 21:24:46,234 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1485)) - kvstart = 26214396(104857584); kvend = 26214180(104856720); length = 217/6553600
2018-08-07 21:24:46,283 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1667)) - Finished spill 0
2018-08-07 21:24:46,294 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_m_000000_0 is done. And is in the process of committing
2018-08-07 21:24:46,311 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2018-08-07 21:24:46,312 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_m_000000_0' done.
2018-08-07 21:24:46,321 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_m_000000_0: Counters: 17
    File System Counters
        FILE: Number of bytes read=579
        FILE: Number of bytes written=216547
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=4
        Map output records=55
        Map output bytes=548
        Map output materialized bytes=664
        Input split bytes=88
        Combine input records=0
        Spilled Records=55
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=8
        Total committed heap usage (bytes)=234356736
    File Input Format Counters 
        Bytes Read=332
2018-08-07 21:24:46,321 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,322 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(224)) - Starting task: attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,324 INFO  [LocalJobRunner Map Task Executor #0] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,325 INFO  [LocalJobRunner Map Task Executor #0] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,379 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:initialize(614)) -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@b3a77de
2018-08-07 21:24:46,381 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:runNewMapper(756)) - Processing split: file:/h:/wc/input/a.txt:0+308
2018-08-07 21:24:46,458 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:setEquator(1205)) - (EQUATOR) 0 kvi 26214396(104857584)
2018-08-07 21:24:46,458 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(998)) - mapreduce.task.io.sort.mb: 100
2018-08-07 21:24:46,458 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(999)) - soft limit at 83886080
2018-08-07 21:24:46,459 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1000)) - bufstart = 0; bufvoid = 104857600
2018-08-07 21:24:46,459 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:init(1001)) - kvstart = 26214396; length = 6553600
2018-08-07 21:24:46,459 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:createSortingCollector(403)) - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
2018-08-07 21:24:46,463 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 
2018-08-07 21:24:46,463 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1460)) - Starting flush of map output
2018-08-07 21:24:46,463 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1482)) - Spilling map output
2018-08-07 21:24:46,463 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1483)) - bufstart = 0; bufend = 504; bufvoid = 104857600
2018-08-07 21:24:46,464 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:flush(1485)) - kvstart = 26214396(104857584); kvend = 26214200(104856800); length = 197/6553600
2018-08-07 21:24:46,508 INFO  [LocalJobRunner Map Task Executor #0] mapred.MapTask (MapTask.java:sortAndSpill(1667)) - Finished spill 0
2018-08-07 21:24:46,515 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_m_000001_0 is done. And is in the process of committing
2018-08-07 21:24:46,520 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - map
2018-08-07 21:24:46,520 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_m_000001_0' done.
2018-08-07 21:24:46,521 INFO  [LocalJobRunner Map Task Executor #0] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_m_000001_0: Counters: 17
    File System Counters
        FILE: Number of bytes read=1082
        FILE: Number of bytes written=217189
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=4
        Map output records=50
        Map output bytes=504
        Map output materialized bytes=610
        Input split bytes=88
        Combine input records=0
        Spilled Records=50
        Failed Shuffles=0
        Merged Map outputs=0
        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=339738624
    File Input Format Counters 
        Bytes Read=308
2018-08-07 21:24:46,522 INFO  [LocalJobRunner Map Task Executor #0] mapred.LocalJobRunner (LocalJobRunner.java:run(249)) - Finishing task: attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,524 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - map task executor complete.
2018-08-07 21:24:46,529 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(448)) - Waiting for reduce tasks
2018-08-07 21:24:46,530 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(302)) - Starting task: attempt_local1711392538_0001_r_000000_0
2018-08-07 21:24:46,547 INFO  [pool-3-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:<init>(108)) - File Output Committer Algorithm version is 1
2018-08-07 21:24:46,548 INFO  [pool-3-thread-1] util.ProcfsBasedProcessTree (ProcfsBasedProcessTree.java:isAvailable(192)) - ProcfsBasedProcessTree currently is supported only on Linux.
2018-08-07 21:24:46,629 INFO  [pool-3-thread-1] mapred.Task (Task.java:initialize(614)) -  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3138b036
2018-08-07 21:24:46,635 INFO  [pool-3-thread-1] mapred.ReduceTask (ReduceTask.java:run(362)) - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@227e0734
2018-08-07 21:24:46,652 INFO  [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:<init>(205)) - MergerManager: memoryLimit=1318269696, maxSingleShuffleLimit=329567424, mergeThreshold=870058048, ioSortFactor=10, memToMemMergeOutputsThreshold=10
2018-08-07 21:24:46,659 INFO  [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(61)) - attempt_local1711392538_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
2018-08-07 21:24:46,721 INFO  [localfetcher#1] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(144)) - localfetcher#1 about to shuffle output of map attempt_local1711392538_0001_m_000001_0 decomp: 606 len: 610 to MEMORY
2018-08-07 21:24:46,744 INFO  [localfetcher#1] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 606 bytes from map-output for attempt_local1711392538_0001_m_000001_0
2018-08-07 21:24:46,749 INFO  [localfetcher#1] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(319)) - closeInMemoryFile -> map-output of size: 606, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->606
2018-08-07 21:24:46,783 INFO  [localfetcher#1] reduce.LocalFetcher (LocalFetcher.java:copyMapOutput(144)) - localfetcher#1 about to shuffle output of map attempt_local1711392538_0001_m_000000_0 decomp: 660 len: 664 to MEMORY
2018-08-07 21:24:46,788 INFO  [localfetcher#1] reduce.InMemoryMapOutput (InMemoryMapOutput.java:shuffle(100)) - Read 660 bytes from map-output for attempt_local1711392538_0001_m_000000_0
2018-08-07 21:24:46,789 INFO  [localfetcher#1] reduce.MergeManagerImpl (MergeManagerImpl.java:closeInMemoryFile(319)) - closeInMemoryFile -> map-output of size: 660, inMemoryMapOutputs.size() -> 2, commitMemory -> 606, usedMemory ->1266
2018-08-07 21:24:46,793 INFO  [EventFetcher for fetching Map Completion Events] reduce.EventFetcher (EventFetcher.java:run(76)) - EventFetcher is interrupted.. Returning
2018-08-07 21:24:46,796 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,796 INFO  [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(691)) - finalMerge called with 2 in-memory map-outputs and 0 on-disk map-outputs
2018-08-07 21:24:46,812 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local1711392538_0001 running in uber mode : false
2018-08-07 21:24:46,814 INFO  [pool-3-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 2 sorted segments
2018-08-07 21:24:46,815 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 100% reduce 0%
2018-08-07 21:24:46,815 INFO  [pool-3-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 2 segments left of total size: 1260 bytes
2018-08-07 21:24:46,823 INFO  [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(758)) - Merged 2 segments, 1266 bytes to disk to satisfy reduce memory limit
2018-08-07 21:24:46,824 INFO  [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(788)) - Merging 1 files, 1268 bytes from disk
2018-08-07 21:24:46,825 INFO  [pool-3-thread-1] reduce.MergeManagerImpl (MergeManagerImpl.java:finalMerge(803)) - Merging 0 segments, 0 bytes from memory into reduce
2018-08-07 21:24:46,825 INFO  [pool-3-thread-1] mapred.Merger (Merger.java:merge(606)) - Merging 1 sorted segments
2018-08-07 21:24:46,827 INFO  [pool-3-thread-1] mapred.Merger (Merger.java:merge(705)) - Down to the last merge-pass, with 1 segments left of total size: 1261 bytes
2018-08-07 21:24:46,830 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,836 INFO  [pool-3-thread-1] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1243)) - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
2018-08-07 21:24:46,886 INFO  [pool-3-thread-1] mapred.Task (Task.java:done(1046)) - Task:attempt_local1711392538_0001_r_000000_0 is done. And is in the process of committing
2018-08-07 21:24:46,888 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - 2 / 2 copied.
2018-08-07 21:24:46,888 INFO  [pool-3-thread-1] mapred.Task (Task.java:commit(1225)) - Task attempt_local1711392538_0001_r_000000_0 is allowed to commit now
2018-08-07 21:24:46,910 INFO  [pool-3-thread-1] output.FileOutputCommitter (FileOutputCommitter.java:commitTask(535)) - Saved output of task 'attempt_local1711392538_0001_r_000000_0' to file:/h:/wc/output/_temporary/0/task_local1711392538_0001_r_000000
2018-08-07 21:24:46,912 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:statusUpdate(591)) - reduce > reduce
2018-08-07 21:24:46,912 INFO  [pool-3-thread-1] mapred.Task (Task.java:sendDone(1184)) - Task 'attempt_local1711392538_0001_r_000000_0' done.
2018-08-07 21:24:46,913 INFO  [pool-3-thread-1] mapred.Task (Task.java:done(1080)) - Final Counters for attempt_local1711392538_0001_r_000000_0: Counters: 24
    File System Counters
        FILE: Number of bytes read=3688
        FILE: Number of bytes written=219182
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Combine input records=0
        Combine output records=0
        Reduce input groups=85
        Reduce shuffle bytes=1274
        Reduce input records=105
        Reduce output records=85
        Spilled Records=105
        Shuffled Maps =2
        Failed Shuffles=0
        Merged Map outputs=2
        GC time elapsed (ms)=0
        Total committed heap usage (bytes)=339738624
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Output Format Counters 
        Bytes Written=725
2018-08-07 21:24:46,913 INFO  [pool-3-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(325)) - Finishing task: attempt_local1711392538_0001_r_000000_0
2018-08-07 21:24:46,914 INFO  [Thread-5] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete.
2018-08-07 21:24:47,816 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) -  map 100% reduce 100%
2018-08-07 21:24:47,817 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1378)) - Job job_local1711392538_0001 completed successfully
2018-08-07 21:24:47,828 INFO  [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30
    File System Counters
        FILE: Number of bytes read=5349
        FILE: Number of bytes written=652918
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=8
        Map output records=105
        Map output bytes=1052
        Map output materialized bytes=1274
        Input split bytes=176
        Combine input records=0
        Combine output records=0
        Reduce input groups=85
        Reduce shuffle bytes=1274
        Reduce input records=105
        Reduce output records=85
        Spilled Records=210
        Shuffled Maps =2
        Failed Shuffles=0
        Merged Map outputs=2
        GC time elapsed (ms)=8
        Total committed heap usage (bytes)=913833984
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=640
    File Output Format Counters 
        Bytes Written=725

Process finished with exit code 0

1.2 程序Windows本地运行,文件在hdfs

修改此处代码

Configuration conf = new Configuration();

        //访问本地
        conf.set("mapreduce.framework.name","local");
        conf.set("fs.defaultFS","hdfs://node1:9000/");
        //conf.set("fs.defaultFS","file:///");


        /*conf.set("mapreduce.framework.name","yarn");
        conf.set("yarn.resourcemanager.hostname","node1");
        conf.set("fs.defaultFS","hdfs://node1:9000/");*/


        Job job = Job.getInstance(conf);

1.3 本地调试

这里写图片描述
待统计的数据

hello apple apple
hello jack
hello mark
hello jerry jerry

1.3.1 打断点

这里写图片描述
这里写图片描述

1.3.2 开始调试

这里写图片描述
这里写图片描述

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值