hadoop streaming常用配置项

最新推荐文章于 2021-11-30 11:01:47 发布

Miner帆

最新推荐文章于 2021-11-30 11:01:47 发布

阅读量1k

点赞数

Job conf
<configuration>

<property>
<name>mapred.job.name</name>
<value></value>
<description>The name of the job.</description>
</property>

<property>
<name>mapred.mapper.class</name>
<value>org.apache.hadoop.mapred.lib.IdentityMapper</value>
<description>The full class name of the mapper.</description>
</property>

<property>
<name>mapred.combiner.class</name>
<value></value>
<description>The full class name of the combiner.</description>
</property>

<property>
<name>mapred.reducer.class</name>
<value>org.apache.hadoop.mapred.lib.IdentityReducer</value>
<description>The full class name of the reducer.</description>
</property>

<property>
<name>mapred.jar</name>
<value>No default.</value>
<description>The full path to the jarfile containing all the needed classes.</description>
</property>

<property>
<name>mapred.map.tasks</name>
<value>1</value>
<description>The default number of map tasks per job. Typically set to a prime several times greater than number of available hosts. Ignored when mapred.job.tracker is "local". </description>
</property>

<property>
<name>mapred.reduce.tasks</name>
<value>1</value>
<description>The default number of reduce tasks per job. Typically set to a prime close to the number of available hosts. Ignored when mapred.job.tracker is "local". </description>
</property>

<property>
<name>mapred.input.dir</name>
<value></value>
<description>A comma separated list of input directories.</description>
</property>

<property>
<name>mapred.output.dir</name>
<value></value>
<description>A comma separated list of output directories.</description>
</property>

<property>
<name>mapred.input.format.class</name>
<value>org.apache.hadoop.mapred.TextInputFormat</value>
<description>The full class name of the InputFormat class to be used for obtaining the input to the mapper.</description>
</property>

<property>
<name>mapred.output.format.class</name>
<value>org.apache.hadoop.mapred.TextOutputFormat</value>
<description>The full class name of the OutputFormat class to be used for saving the output of the reducer.</description>
</property>

<property>
<name>mapred.input.key.class</name>
<value>org.apache.hadoop.io.LongWritable</value>
<description>The full classname of the input key.</description>
</property>

<property>
<name>mapred.input.value.class</name>
<value>org.apache.hadoop.io.UTF8</value>
<description>The full classname of the input value.</description>
</property>

<property>
<name>mapred.output.key.class</name>
<value>org.apache.hadoop.io.LongWritable</value>
<description>The full classname of the output key.</description>
</property>

<property>
<name>mapred.output.value.class</name>
<value>org.apache.hadoop.io.UTF8</value>
<description>The full classname of the output value.</description>
</property>

<property>
<name>mapred.partitioner.class</name>
<value>org.apache.hadoop.mapred.lib.HashPartitioner</value>
<description>The full classname of the partitioner class.</description>
</property>

<property>
<name>user.name</name>
<value>Dr. Who</value>
<description>The name of the user running the job.</description>
</property>

<property>
<name>mapred.combine.buffer.size</name>
<value>100000</value>
<description>The number of entries the combining collector caches before combining them and writing to disk.</description>
</property>

<property>
<name>mapred.speculative.execution</name>
<value>true</value>
<description>If true, then multiple instances of some map tasks may be executed in parallel.</description>
</property>

<property>
<name>mapred.min.split.size</name>
<value>0</value>
<description>The minimum size chunk that map input should be split into. Note that some file formats may have minimum split sizes that take priority over this setting.</description>
</property>

</configuration>