一. 准备工作
1 创建用户及用户组
groupadd hadoop
useradd hadoop -g hadoop
2 配置Hosts
vi /etc/hosts
添加 192.168.1.8 slaver
3 修改/etc/sysconfig/network 文件
Hostname = slaver
4 配置无密码登录
su hadoop
ssh-keygen -t rsa -P ' ' -f ~/.ssh/id_rsa
cd .ssh/
cat id_rsa.pub >> authorized_keys
chmod 600
authorized_keys
chmod 700 .ssh/
5 安装JDK
tar -zxvf jdk-8u45-linux-x64.tar.gz /usr/java
cd /usr/java
ln -s /usr/java/jdk1.8.0_45 /usr/java/jdk
添加环境变量
vi /etc/profile
export JAVA_HOME=/usr/java/jdk
export PATH=$JAVA_HOME/bin:$PATH
source /
etc/profile
验证是否成功 java -version
二 hadoop 的安装
1.解压
tar -zxvf hadoop-1.0.4.tar.gz -C /opt
mv /opt/
hadoop-1.0.4 /opt/hadoop
2.赋予权限
chown -R hadoop:hadoop /opt/hadoop
3 配置相关文件
su hadoop
cd /opt/hadoop/conf
(1) 配置环境变量
vi Hadoop-env.sh
添加:
export JAVA_HOME=/usr/java/jdk
export HADOOP_HEAGSIZE=512
export HADOOP_PID_DIR=/home/$USER/pids
(2)修改core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://slaver:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/${user.name}/tmp</value>
</property>
</configuration>
(3)修改hdfs-site.xml
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/home/${user.name}/dfs_name</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/${user.name}/dfs_data</value>
</property>
</configuration>
(4) 修改mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>slaver:9001</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>/home/${user.name}/mapred_system</value>
</property>
<property>
<name>mapred.local.dir</name>
<value>/home/${user.name}/mapred_local</value>
</property>
</configuration>
(5)修改masters
slaver
(6)修改slaves
slaver
4 格式化hdfs
cd /opt/hadoop/bin
./hadoop namenodes -format
5 启动
./start-all
6 查看启动的进程
jps
12192 TaskTracker
11712 NameNode
12258 Jps
11827 DataNode
11971 SecondaryNameNode
12062 JobTracker
7 通过页面查看
http://192.168.1.8:50030/jobtracker.jsp
http://192.168.1.8:50070/dfshealth.jsp
三 安装hbase
1 解压
su
tar -zxvf hbase-0.94.2.tar.gz -C /opt
mv /opt/hbase-0.94.2/ hbase
2 添加用户权限
chown -R hadoop:hadoop hbase/
3 修改配置文件
(1)添加环境变量
cd /opt/hbase/conf/
vi hbase-env.sh
添加:
export JAVA_HOME=/usr/java/jdk/
export HBASE_MANAGES_ZK=true
(2)配置hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://slaver:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master</name>
<value>hdfs://slaver:60000</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/home/${user.name}/tmp</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>slaver</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/${user.name}/zookeeper</value>
</property>
</configuration>
(3)配置regionservers
slaver
4 覆盖hadoop 类库
cp /opt/hadoop/hadoop-core-1.0.4.jar ../lib/
5 修改hdfs-site.xml
添加:
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.xcievers</name>
<value>4096</value>
</property>
6 执行
失误:用root用户执行生成的日志为root权限
./start-hbase.sh
root@slaver's password:
root@slaver's password: slaver: Permission denied, please try again.
starting master, logging to /opt/hbase/bin/../logs/hbase-hadoop-master-Slaver.out
[hadoop@Slaver bin]$ ./start-hbase.sh
slaver: starting zookeeper, logging to /opt/hbase/bin/../logs/hbase-hadoop-zookeeper-Slaver.out
slaver: /opt/hbase/bin/hbase-daemon.sh: line 150: /opt/hbase/bin/../logs/hbase-hadoop-zookeeper-Slaver.log: Permission denied
slaver: /opt/hbase/bin/hbase-daemon.sh: line 151: /opt/hbase/bin/../logs/hbase-hadoop-zookeeper-Slaver.log: Permission denied
slaver: /opt/hbase/bin/hbase-daemon.sh: line 152: /opt/hbase/bin/../logs/hbase-hadoop-zookeeper-Slaver.out: Permission denied
解决:
su
chown -R hadoop:hadoop logs/
检验jps
4513 SecondaryNameNode
4706 TaskTracker
13634 HRegionServer
13715 Jps
13509 HMaster
5542 TaskTracker
13448 HQuorumPeer
5243 DataNode
4588 JobTracker
7 web界面
http://192.168.1.8:60010/table.jsp?name=user
8 配置map-reduce 从Hbase中获取数据
export HBASE_HOME=/opt/hbase
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HBASE_HOME/conf:$HBASE_HOME/hbase-0.94.2.jar:$HBASE_HOME/hbase-0.94.2-tests.jar:$HBASE_HOME/conf:$HBASE_HOME/lib/zookeeper-3.4.3.jar:$HBASE_HOME/lib/protobuf-java-2.4.0a.jar
四 安装hive
1 解压
tar -zxvf hive-0.9.0.tar.gz -C /opt
mv hive-0.9.0/ hive
2 权限
chown -R hadoop:hadoop hive/
3 配置环境变量
cd /opt/hive/conf/
cp hive-env.sh.template hive-env.sh
vi hive-env.sh
添加:
export JAVA_HOME=/usr/java/jdk/
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
export HADOOP_HOME=/opt/hadoop
4 配置文件
cp hive-default.xml.template hive-default.xml
cp hive-default.xml.template hive-site.xml
5 执行
./hive
6 Hive 集成HBASE
Rm –rf /opt/hive/hbase*.jar
Cp /opt/hbase/hbase*.jar /opt/hive/lib/
7 在hive-site.xml 中添加
<configuration><property>
<name>hive.aux.jars.path</name>
<value>file:///opt/hive/lib/hive-hbase-handler-0.9.0.jar,file:///opt/hive/lib/hbase-0.94.2.jar,file:///opt/hive/lib/zookeeper-3.4.3.jar</value>
</property>
</configuration>
8 启动hive
./hive –hiveconf hbase.master=slaver:60000
9 创建表
create table test(key int,value string) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties('hbase.columns.mapping'=':key,f:value') tblproperties('hbase.table.name'='test');
错误:
FAILED: Error in metadata:javax.jdo.JDOFatalDataStoreException: Cannot get a connection, pool error Couldnot create a validated object, cause: A read-only user or a user in a read-onlydatabase is not permitted to disable read-only mode on a connection.
NestedThrowables:
org.apache.commons.dbcp.SQLNestedException: Cannot get aconnection, pool error Could not create a validated object, cause: A read-onlyuser or a user in a read-only database is not permitted to disable read-onlymode on a connection.
FAILED: Execution Error, return code 1 fromorg.apache.hadoop.hive.ql.exec.DDLTask
权限问题的处理
cd metastore_db/
mv dbex.lck 1218dbex.lck
mv db.lck 1218db.lck
五 Pig 的安装
1 解压
tar -zxvf pig-0.10.0.tar.gz -C /opt/
mv pig-0.10.0/ pig
chown -R hadoop:hadoop pig
2 配置环境变量
vi /pig/bin/pig 增加
export JAVA_HOME=/opt/java/jdk/
export PIG_INSTALL=/opt/pig
export HADOOP_INSTALL=/opt/hadoop
export PATH=$PIG_INSTALL/bin:$HADOOP_INSTALL/bin:$PATH
export PIG_CLASSPATH=$HADOOP_INSTALL/conf
3 执行pig
./pig
从本地加载文件到hdfs
./hadoop fs -copyFromLocal /opt/data/emp.txt /opt/data/emp.txt
./hadoop fs -cat /opt/data/emp.txt
cd /opt/pig/bin/
./pig
从hdfs中加载文件到pig
TEST = LOAD '/opt/data/emp.txt' USING PigStorage(' ') AS (id,name);
dump TEST;
对数据进行处理
TMP= FOREACH TEST GENERATE name;
STORE TMP INTO '/opt/data/dist.txt' USING PigStorage(' ');
查看处理结果
./hadoop fs -cat /opt/data/dist.txt/part-m-00000