http://releases.ubuntu.com/13.04/
新建用户组和用户:
sudo addgroup hadoop
sudo adduser -ingroup hadoop hadoop
授权:
sudo gedit /etc/sudoers
添加下面代码
hadoop ALL=(ALL:ALL) ALL
===========================
hosts:
127.0.0.1 localhost
192.168.18.220 master
192.168.66.130 node0
192.168.66.129 ubuntu
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
===========================
export JRE_HOME=${JAVA_HOME}/jre
export ZOOKEEPER_HOME=/home/hadoop/hadoop-2.2.0/zookeeper-3.4.5
export HADOOP_HOME=/home/hadoop/hadoop-2.2.0
export HADOOP_2_HOME=/home/hadoop/hadoop-0.20.2
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:${ZOOKEEPER_HOME}/lib:
export PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:${JAVA_HOME}/bin:${ZOOKEEPER_HOME}/bin :${HADOOP_HOME}/bin:${HADOOP_2_HOME}/bin"
source /etc/profile
sudo update-alternatives --install /usr/bin/java java /usr/lib/java/jdk7/bin/java 300
sudo update-alternatives --install /usr/bin/javac javac /usr/lib/java/jdk7/bin/javac 300
.bashrc
sudo source /etc/profile
====================================================================================================================================
#更新下载源
====================================================================================
#装机后的推荐
====================================================================================
#安装QQ
====================================================================================
#安装VM
http://download.pchome.net/system/sysenhance/detail-75584.htmlVM下载地址
http://www.th7.cn/system/lin/201210/32989.shtml
====================================================================================
#安装jdk
sudo apt-get install openjdk-7-jdk
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html官方地址
自己下载JDK:
tar zxvf jdk.tar.zg -C /home/hadoop/
sudo update-alternatives --install /usr/bin/java java /home/hadoop/jdk1.7.0_45/bin/java 300
sudo update-alternatives --install /usr/bin/javac javac /home/hadoop/jdk1.7.0_45/bin/javac 300
sudo update-alternatives --config java
====================================================================================
#安装eclipse
http://www.eclipse.org/downloads/packages/release/europa/wintereclipse地址
cd ~
mkdir java
#配置
tar xvfz ~/download/eclipse-jee-europa-winter-linux-gtk-x86_64.tar.gz -C ~/java
cd ~/java/eclipse
sudo gedit eclipse.desktop
#加入下面信息
[Desktop Entry]
Name=eclipse
Name[zh_CN]=eclipse
Comment=eclipse Client
Exec=/home/xiaomao/java/eclipse/eclipse
Icon=/home/xiaomao/java/eclipse/icon.xpm
Terminal=false
Type=Application
Categories=Application;
Encoding=UTF-8
StartupNotify=true
sudo cp ~/java/eclipse/eclipse.desktop /usr/share/applications
====================================================================================
#安装mysql
sudo apt-get install mysql-client-core-5.5
http://www.mysql.com/products/connector/mysql 驱动
====================================================================================
#修改主机名
sudo gedit /etc/hosts
sudo gedit /etc/hostname (非Root权限环境下)
#允许其他用户访问图形界面
xhost +
====================================================================================
SSH参考文章
http://blog.lizhigang.net/archives/249点击打开链接
#配置ssh
查看ssh运行状态
service ssh status
启动或重启 SSH
sudo service ssh start
或者
sudo /etc/init.d/ssh restart
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
#删除ssh
sudo apt-get remove ssh
sudo apt-get remove openssh-server
sudo apt-get remove openssh-client
#安装ssh
sudo apt-get install ssh
#节点登陆
scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub
cat ~/.ssh/master_rsa.pub >> ~/.ssh/authorized_keys
#自我登陆
scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub
cat ~/.ssh/master_rsa.pub >> ~/.ssh/authorized_keys
#主点登陆
scp hadoop@ubuntu:~/.ssh/id_rsa.pub ~/.ssh/ubuntu_rsa.pub
cat ~/.ssh/ubuntu_rsa.pub >> ~/.ssh/authorized_keys
====================================================================================
#下载hadoop,hbase,hive
解压 hadoop,hbase,hive
并修改配置文件
http://www.linuxidc.com/Linux/2013-02/79661.htmHBASE配置
====================================================================================
#hadoop0.20.2CDH下载
http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlhadoop0.20.2CDH
#sqoop下载
http://archive.cloudera.com/cdh/3/sqoop-1.2.0-CDH3B4.tar.gzsqoop-1.2.0下载
#sqoop配置安装
http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlsqoop安装
====================================================================================
分发文件
scp -r ~/hadoop hadoop@node1:~
====================================================================================
hadoop配置:
1,修改hadoop-env.sh文件:
export JAVA_HOME=/home/hadoop/jdk1.7.0_45
2,修改yarn-env.sh文件:
export JAVA_HOME=/home/hadoop/jdk1.7.0_45
3,修改core-site.xml文件:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="cmt.xml"/>
<!--设置缺省的目录前缀-->
<property>
<name>fs.defaultFS</name>
<value>viewfs://ns1</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<!--JournalNode 所在节点上的一个目录,用于存放 editlog 和其他状态信息。该参数只能设
置一个目录,你可以对磁盘做 RIAD 提高数据可靠性。-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/hadoop-2.2.0/journal</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop-2.2.0/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master1:9000</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>master1</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
4,
修改hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!--HDFS 命名服务的逻辑名称,可用户自己定义,比如 mycluster,注意,该名称将被基
于 HDFS 的系统使用,比如 Hbase 等,此外,需要你想启用 HDFS Federation,可以通过该
参数指定多个逻辑名称,并用“,”分割。-->
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
<description>Logical name for this new nameservice</description>
</property>
<!--dfs.ha.namenodes.[$nameservice ID]:
某个命名服务下包含的 NameNode 列表,可为每个 NameNode 指定一个自定义的 ID 名
称,比如命名服务 mycluster 下有两个 NameNode,分别命名为 nn1 和 nn2,-->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
<description>Unique identifiers for each NameNode in the nameservice
</description>
</property>
<!--dfs.namenode.rpc-address.[$nameservice ID].[$name node ID]
为每个 NameNode 设置 RPC 地址-->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>master1:9000</value>
</property>
<!--dfs.namenode.http-address.[$nameservice ID].[$name node ID]
为每个 NameNode 设置对外的 HTTP 地址-->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>master1:50070</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>master1-s:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>master1-s:50070</value>
</property>
<!--设置一组 journalNode 的 URI 地址,active NameNode 将 edit log 写入这些
JournalNode,而 standby NameNode 读取这些 edit log,并作用在内存中的目录树中,该属性
值应符合以下格式:
qjournal://host1:port1;host2:port2;host3:port3/journalId -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master1:8485;master1-s:8485;slave1:8485/ns1</value>
</property>
<!--设置切换模式-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
<description>设置为true时,表示自动切换</description>
</property>
<!--设置客户端与 active NameNode 进行交互的 Java 实现类,DFS 客户端通过该类寻找当前的
active NameNode。
该类可由用户自己实现,
默认实现为 ConfiguredFailoverProxyProvider。-->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--为了解决脑裂的问题:sshfence 通过 ssh 登录到前一个 active NameNode 并将其杀死。
为了让该机制成功执行,
需配置免密码 ssh 登陆,
这可通过参数 dfs.ha.fencing.ssh.private-key-files 设置一个私钥文件。-->
<property>
<name>dfs.ha.fencing.methods</name><value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!--
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence([[username][:port]])</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
-->
<!--Shell方式-->
<!--
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/path/to/my/script.sh arg1 arg2 ...)</value>
</property>
-->
<!--zookeeper-->
<property>
<name>ha.zookeeper.quorum</name>
<value>master1:2181,master1-s:2181,slave1:2181</value>
<description>指定用于HA的ZooKeeper集群机器列表</description>
</property>
<!--
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>5000</value>
<description>指定ZooKeeper超时间隔,单位毫秒</description>
</property>
-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
5,修改
yarn-site.xml
<?xml version="1.0"?>
<configuration>
<!--resourcemanager对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等-->
<property>
<name>yarn.resourcemanager.address</name>
<value>master1:18032</value>
</property>
<!--resourcemanager对application暴露的访问地址。applicationMaster通过该地址向RM申请资源,释放资源等-->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master1:18030</value>
</property>
<!--resourcemanager对NodeManager暴露的地址。NodeManager通过该地址向RM汇报心跳,领取任务等-->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master1:18033</value>
</property>
<!--处理来自NodeManager的RPC请求的Handler数目-->
<!--
<property>
<name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
<value>50</value>
</property>
-->
<!--处理来自ApplicationMaster的RPC请求的Handler数目-->
<!--
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>50</value>
</property>
-->
<!--resourcemanager对管理员暴露的访问地址,管理员通过该地址向RM发送管理命令-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master1:18087</value>
</property>
<!--resourcemanager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master1:18088</value>
</property>
<!--启用的资源调度器主类,目前可用的有FIFO,Capacity Scheduler 和 Fair Scheduler-->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<!--NodeManager心跳间隔-->
<!--
<property>
<name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
<value>1000</value>
</property>
-->
<!--NodeManager相关参数配置-->
<!--NodeManager上运行的附属服务-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
6,修改cmt.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--客户端挂载表,其内容就是虚拟路径到具体某个NS及其物理子目录的映射关系,-->
<property>
<name>fs.viewfs.mounttable.cmt.link./user</name>
<value>hdfs://ns1/user</value>
</property>
<property>
<name>fs.viewfs.mounttable.cmt.link./fee</name>
<value>hdfs://ns1/fee</value>
</property>
</configuration>
7, 修改slaves文件:
slave1