Haoop tricks(自用)

配置

core-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>

  <property>
    <name>fs.default.name</name>
    <value>hdfs://192.168.0.1:9000</value>
    <description>The name of the default file system.  Either the
      literal string "local" or a host:port for NDFS.
    </description>
  </property>

  <property>
    <name>hadoop.tmp.dir</name>
	<value>/home/dbkehadoop/hadoop_tmp_dir</value>
  </property>

<property>
  <name>dfs.permissions</name>
  <value>false</value>
  <description>
    If "true", enable permission checking in HDFS.
    If "false", permission checking is turned off,
    but all other behavior is unchanged.
    Switching from one parameter value to the other does not change the mode,
    owner or group of files or directories.
  </description>
</property>
</configuration>

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
		<name>mapred.job.tracker</name>
		<value>192.168.0.1:9001</value>
	</property>

	<property>
		<name>mapred.input.dir.recursive</name>
		<value>true</value>
	</property>
	
	<property>
 		 <name>mapred.hosts.exclude</name>
  		 <value>/home/dbkehadoop/hadoop-1.2.1/conf/excludes</value>
  		 <description>Names a file that contains the list of hosts that
  		 should be excluded by the jobtracker.  If the value is empty, no
  		 hosts are excluded.</description>
	</property>
	
	<property>
  		 <name>mapred.submit.replication</name>
  		 <value>2</value>
 		 <description>The replication level for submitted job files.  This
  		 should be around the square root of the number of nodes.
  		 </description>
	</property>

	<property>
  		 <name>mapred.child.java.opts</name>
    	 <value>-Xmx2048m</value>
	     <description>Java opts for the task tracker child processes.  
	     The following symbol, if present, will be interpolated: @taskid@ is replaced 
	     by current TaskID. Any other occurrences of '@' will go unchanged.
             For example, to enable verbose gc logging to a file named for the taskid in
	     /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
	     -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
             The configuration variable mapred.child.ulimit can be used to control the
	     maximum virtual memory of the child processes. </description>
	</property>

	<property>
  		 <name>mapred.hosts</name>
  		 <value>/home/dbkehadoop/hadoop-1.2.1/conf/includes</value>
  		 <description>Names a file that contains the list of nodes that may
  		 connect to the jobtracker.  If the value is empty, all hosts are
  		 permitted.</description>
	</property>
</configuration>

hdfs-site.xml

<configuration>
  <property>
    <name>dfs.replication</name>
    <value>2</value>
  </property>
</configuration>


压缩

压缩map输出

压缩类型为可选项

        Configuration conf = new Configuration();
        conf.setBoolean("mapred.compress.map.output", true);
        conf.setClass("mapred.map.output.compression.codec", GzipCodec.class,
                CompressionCodec.class);
        Job job = new Job(conf, "EmitLinkRelations");

压缩reduce输出

        TextOutputFormat.setCompressOutput(job, true);
        TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class);



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值