文章目录
1.部署hadoop集群
-
机器准备3台,关闭防火墙
hadoop1 192.168.200.41 hadoop2 192.168.200.43 hadoop2 192.168.200.43
-
安装java环境,并设置java环境变量/etc/profile.d/java.sh
#java export JAVA_HOME=/opt/java/jdk1.8.0_171 export JRE_HOME=$JAVA_HOME/jre export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
# 加载环境变量 source /etc/profile
-
3台机器时间同步
# 安装 yum -y install chrony # 修改配置文件/etc/chrony.conf server ntp.aliyun.com iburst
-
hosts设置/etc/hosts
192.168.200.41 hadoop1 192.168.200.42 hadoop2 192.168.200.43 hadoop3
-
创建hadoop账户,并设置密码,3台机器ssh免密设置
1. 生成公钥和私钥 2. 把公钥和私钥分发给3台机器 3. 修改公钥和私钥的属主、属组、权限
-
上传软件包hadoop-3.3.6.tar.gz解压并软链接
mkdir /data/server tar -zxvf hadoop-3.3.6.tar.gz -C /data/server/ ln -s /data/server/hadoop-3.3.6 /data/server/hadoop chown -R hadoop:hadoop /data/server/hadoop-3.3.6 chown -R hadoop:hadoop /data/server/hadoop
-
设置hadoop环境变量/etc/profile.d/hadoop.sh
# hadoop export HADOOP_HOME=/data/server/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile
-
修改/data/server/hadoop/etc/hadoop目录下的配置文件workers、hadoop-env.sh、core-site.xml、hdfs-site.xml、mapred-env.sh、mapred-site.xml、yarn-env.sh、yarn-site.xml
-
workers
hadoop1 hadoop2 hadoop3
-
hadoop-env.sh
export JAVA_HOME=/opt/java/jdk1.8.0_171 export HADOOP_HOME=/data/server/hadoop export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export HADOOP_LOG_DIR=$HADOOP_HOME/logs
-
core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop1:8020</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.proxyuser.hadoop.groups</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hadoop.hosts</name> <value>*</value> </property> </configuration>
-
hdfs-site.xml
<configuration> <property> <name>dfs.datanode.data.dir.perm</name> <value>700</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>/data/nn</value> </property> <property> <name>dfs.namenode.hosts</name> <value>hadoop1,hadoop2,hadoop3</value> </property> <property> <name>dfs.blocksize</name> <value>268435456</value> </property> <property> <name>dfs.namenode.handler.count</name> <value>100</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/data/dn</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>nfs.superuser</name> <value>hadoop</value> </property> <property> <name>nfs.dump.dir</name> <value>/tmp/.hdfs-nfs</value> </property> <property> <name>nfs.exports.allowed.hosts</name> <value>192.168.200.1 rw</value> </property> </configuration>
-
mapred-env.sh
export JAVA_HOME=/opt/java/jdk1.8.0_171 export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
-
mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> <description>MapReduce的运行框架设置为YARN</description> </property> <property> <name>mapreduce.jobhistory.address</name> <value>hadoop1:10020</value> <description>历史服务器通讯端口为hadoop1:10020</description> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>hadoop1:19888</value> <description>历史服务器web端口为hadoop1:10020</description> </property> <property> <name>mapreduce.jobhistory.intermediate-done-dir</name> <value>/data/mr-history/tmp</value> <description>历史信息在HDFS的记录临时路径</description> </property> <property> <name>mapreduce.jobhistory.done-dir</name> <value>/data/mr-history/done</value> <description>历史信息在HDFS的记录路径</description> </property> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> <description>MapReduce HOME设置为HADOOP_HOME</description> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> <description>MapReduce HOME设置为HADOOP_HOME</description> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value> <description>MapReduce HOME设置为HADOOP_HOME</description> </property> </configuration>
-
yarn-env.sh
# 设置jdk路径的环境变量 export JAVA_HOME=/opt/java/jdk1.8.0_171 # 设置HADOOP_HOME的环境变量 export HADOOP_HOME=/data/server/hadoop # 设置配置文件的环境变量 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop # 设置日志文件路径的环境变量 export HADOOP_LOG_DIR=$HADOOP_HOME/logs
-
yarn-site.xml
<configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>hadoop1</value> <description>ResourceManager设置在hadoop1节点</description> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/nm-local</value> <description>NodeManager中间数据本地存储路径</description> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/nm-log</value> <description>NodeManager数据日志本地存储路径</description> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> <description>为MapReduce程序开启Shuffle服务</description> </property> <property> <name>yarn.log.server.url</name> <value>http://hadoop1:19888/jobhistory/logs</value> <description>历史服务器URL</description> </property> <property> <name>yarn.web-proxy.address</name> <value>hadoop1:8089</value> <description>历史服务器URL</description> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> <description>开启日志聚合</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/tmp/logs</value> <description>程序日志HDFS的存储路径</description> </property> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value> <description>选择公平调度器</description> </property> </configuration>
-
-
创建配置文件中设置的相关目录
mkdir /data/nn mkdir /data/dn mkdir /data/nm-local mkdir /data/nm-log chown hadoop:hadoop /data/nn chown hadoop:hadoop /data/dn chown hadoop:hadoop /data/nm-local chown hadoop:hadoop /data/nm-log
-
格式化整个文件系统
# 确保以hadoop用户执行 su - hadoop # 格式化namenode hadoop namenode -format
-
一键启停HDFS集群(hadoop用户需要设置ssh免密)
# 切换到hadoop用户 su - hadoop # 一键开启HDFS集群 start-dfs.sh # 一键关闭HDFS集群 stop-dfs.sh
-
单独控制HDFS集群所在机器的进程启停
hdfs --daemon (start|status|stop) (namenode|secondarynamenode|datanode)
-
设置hdfs集群开机自启
-
namenode
cat /etc/systemd/system/hadoop-namenode.service [Unit] Description=hadoop-namenode After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/hdfs --daemon start namenode ExecStop=/data/server/hadoop/bin/hdfs --daemon stop namenode PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-namenode systemctl enable hadoop-namenode
-
secondarynamenode
cat /etc/systemd/system/hadoop-secondarynamenode.service [Unit] Description=hadoop-secondarynamenode After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/hdfs --daemon start secondarynamenode ExecStop=/data/server/hadoop/bin/hdfs --daemon stop secondarynamenode PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-secondarynamenode systemctl enable hadoop-secondarynamenode
-
datanode
cat /etc/systemd/system/hadoop-datanode.service [Unit] Description=hadoop-datanode After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/hdfs --daemon start datanode ExecStop=/data/server/hadoop/bin/hdfs --daemon stop datanode PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-datanode systemctl enable hadoop-datanode
-
-
一键启停YARN集群
# 切换到hadoop用户 su - hadoop # 一键开启YARN集群(ResourceManager、nodemanager、ProxyServer) start-yarn.sh # 一键关闭YARN集群 stop-yarn.sh
-
单独控制YARN集群所在机器的进程启停
yarn --daemon (start|stop) (resourcemanager|nodemanager|proxyserver)
-
单独控制YARN集群所在机器的历史服务器启停
mapred --daemon (start|stop) historyserver
-
设置YARN集群开机自启
-
resourcemanager
cat /etc/systemd/system/hadoop-resourcemanager.service [Unit] Description=hadoop-resourcemanager After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/yarn --daemon start resourcemanager ExecStop=/data/server/hadoop/bin/yarn --daemon stop resourcemanager PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-resourcemanager systemctl enable hadoop-resourcemanager
-
nodemanager
cat /etc/systemd/system/hadoop-nodemanager.service [Unit] Description=hadoop-nodemanager After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/yarn --daemon start nodemanager ExecStop=/data/server/hadoop/bin/yarn --daemon stop nodemanager PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-nodemanager systemctl enable hadoop-nodemanager
-
proxyserver
cat /etc/systemd/system/hadoop-proxyserver.service [Unit] Description=hadoop-proxyserver After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/yarn --daemon start proxyserver ExecStop=/data/server/hadoop/bin/yarn --daemon stop proxyserver PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-proxyserver systemctl enable hadoop-proxyserver
-
historyserver
cat /etc/systemd/system/hadoop-historyserver.service [Unit] Description=hadoop-historyserver After=network.target [Service] User=hadoop Group=hadoop Type=forking ExecStart=/data/server/hadoop/bin/mapred --daemon start historyserver ExecStop=/data/server/hadoop/bin/mapred --daemon stop historyserver PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hadoop-historyserver systemctl enable hadoop-historyserver
-
2.部署HIVE
-
部署mysql数据库,并设置账号权限
-
下载Hive上传并解压和设置软链接
-
mysql官网下载MYSQL驱动jar包放入Hive的lib目录内
-
修改配置文件(hive-env.sh和hive-site.xml)
-
hive-env.sh
export HADOOP_HOME=/data/server/hadoop export HIVE_CONG_DIR=/data/server/hive/conf export HIVE_AUX_JARS_PATH=/data/server/hive/lib
-
hive-site.xml
<configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://hadoop1:3306/hive?createDatabaseIfNotExit=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> <property> <name>hive.server2.thrift.bind.host</name> <value>hadoop1</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://hadoop1:9083</value> </property> <property> <name>hive.metastore.event.db.notification.api.auth</name> <value>false</value> </property> </configuration>
-
-
初始化元数据库
./schematool -initSchema -dbType mysql -verbos
-
启动hive的metastore服务(切换到hadoop用户执行)
- 前台启动:
bin/hive --service metastore
- 后台启动:
nohup bin/hive --service metastore >> logs/metastore.log 2>&1 &
- 前台启动:
-
metastore服务设置开机自启
-
编写启动脚本(使用hadoop用户创建,并赋予执行权限)
cat /data/server/hive/bin/start-hive-metastore.sh #!/bin/bash /data/server/hive/bin/hive --service metastore >> /data/server/hive/logs/metastore.log 2>&1
-
配置service文件
cat /etc/systemd/system/hive-metastore.service [Unit] Description=hive-metastore After=network.target [Service] User=hadoop Group=hadoop Type=simple ExecStart=/data/server/hive/bin/start-hive-metastore.sh PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hive-metastore systemctl enable hive-metastore
-
-
启动hive命令行(切换到hadoop用户执行)
bin/hive
-
启动hive的hiveserver2(切换到hadoop用户执行)
nohup bin/hive --service hiveserver2 >> logs/hiveserver2.log 2>&1 &
-
hiveserver2设置开机自启
-
编写启动脚本(使用hadoop用户创建,并赋予执行权限)
cat /data/server/hive/bin/start-hive-hiveserver2.sh #!/bin/bash /data/server/hive/bin/hive --service hiveserver2 >> /data/server/hive/logs/hiveserver2.log 2>&1
-
配置service文件
cat /etc/systemd/system/hive-hiveserver2.service [Unit] Description=hive-hiveserver2 After=network.target hive-metastore.service [Service] User=hadoop Group=hadoop Type=simple ExecStart=/data/server/hive/bin/start-hive-hiveserver2.sh PrivateTmp=false [Install] WantedBy=multi-user.target
systemctl daemon-reload systemctl start hive-hiveserver2 systemctl enable hive-hiveserver2
-
-
最终启动成功的结果
hadoop@hadoop1 bin]$ jps 1313 DataNode 9217 Jps 1315 WebAppProxyServer 1320 NodeManager 8808 RunJar 1306 SecondaryNameNode 1322 ResourceManager 1309 NameNode 8958 RunJar 1311 JobHistoryServer