Environment
System: Ubuntu 12.04
Hadoop: 0.23.9 download
jdk: 1.7.0_25
At the first user
free tar to directory /opt/haoop/: tar -zxvf hadoop-0.23.6.tar.gz /opt/hadoop/
install ssh server: sudo apt-get install openssh-server
add the user hadoop: sudo adduser hadoop
add the code on /etc/sudoers:
after:
%sudo ALL=(ALL:ALL) ALL
add:
hadoop ALL=(ALL:ALL) ALL
switch to the new user: su hadoop
At user hadoop
configurate no password to login local
create key and will genate id_rsa and id_rsa.pub in file ~/.ssh: ssh-keygen -t rsa -P ""
copy the autorized file: cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
test :
ssh localhost
if needn't password input, it's will be ok, and then exit to the test: exit
For Hadoop Configuration
change the own of file /opt/hadoop/ to hadoop: chown -R hadoop:hadoop /opt/haoop
add code to ~/.bashrc( if there is no this file, you can copy it from other user's home directory, and it would bring a good input for you)
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-i386
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
export PATH=/opt/hadoop/bin:$PATH
add code to yarn-env.sh in /opt/hadoop/etc/hadoop/
export HADOOP_FREFIX=/opt/hadoop
export HADOOP_COMMON_HOME=${HADOOP_FREFIX}
export HADOOP_HDFS_HOME=${HADOOP_FREFIX}
export PATH=$PATH:$HADOOP_FREFIX/bin
export PATH=$PATH:$HADOOP_FREFIX/sbin
export HADOOP_MAPRED_HOME=${HADOOP_FREFIX}
export YARN_HOME=${HADOOP_FREFIX}
export HADOOP_CONF_HOME=${HADOOP_FREFIX}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_FREFIX}/etc/hadoop
add code to code-site.xml in /opt/hadoop/etc/hadoop/ (some exception was happend, because of the needless space line at the first line )
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:12200</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/hadoop-root</value>
</property>
<property>
<name>fs.arionfs.impl</name>
<value>org.apache.hadoop.fs.pvfs2.Pvfs2FileSystem</value>
<description>The FileSystem for arionfs.</description>
</property>
</configuration>
add code to hdfs-site.xml in /opt/hadoop/etc/hadoop/
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop/data/dfs/name</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>file:/opt/hadoop/data/dfs/data</value>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permission</name>
<value>false</value>
</property>
</configuration>
add code to mapred-site.xml in /opt/hadoop/etc/hadoop/
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.tracker</name>
<value>hdfs://localhost:9001</value>
<final>true</final>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1536</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2560M</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>50</value>
</property>
<property>
<name>mapreduce.system.dir</name>
<value>file:/opt/hadoop/data/mapred/system</value>
</property>
<property>
<name>mapreduce.local.dir</name>
<value>file:/opt/hadoop/data/mapred/local</value>
<final>true</final>
</property>
</configuration>
add code to yarn-site.xml in /opt/hadoop/etc/hadoop/
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>user.name</name>
<value>hadoop</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:54311</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:54312</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:54313</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:54314</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>localhost:54315</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>localhost</value>
</property>
</configuration>
add JAVA_HOME to hadoop-config.sh in /opt/hadoop/libexec/
before:
if [[ -z $JAVA_HOME ]]; then
# On OSX use java_home (or /Library for older versions)
if [ "Darwin" == "$(uname -s)" ]; then
if [ -x /usr/libexec/java_home ]; then
export JAVA_HOME=($(/usr/libexec/java_home))
else
export JAVA_HOME=(/Library/Java/Home)
fi
fi
and add:
check the result: