export JAVA_HOME=/export/server/jdk
export HIVE_CONF_DIR=/export/server/hive/conf
export HIVE_AUX_JARS_PATH=/export/server/hive/lib(jdk环境已部署完成
workers文件记录了从节点的位置ip等
hadoop-env.sh 记录jdk位置,HADOOP_HOME位置,配置文件和日志位置
export JAVA_HOME=/export/server/jdk
export HADOOP_HOME=/export/server/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
core-site.xml文件 记录核心端信息,(xml文件中为K:V值,键值对)
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1:8020</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
指定了node1为namenode节点 8020为默认的hdfs通信端口,后续需在node1上启动NameNode进程
hdfs-site.xml
<configuration>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/nn</value>
</property>
<property>
<name>dfs.namenode.hosts</name>
<value>node1,node2,node3</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>268435456</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/dn</value>
</property>
</configuration>
从上往下分别配置了datanode默认创建文件得权限700 ,namenode元数据存储目录位置,允许链接得namenode主机名,hdfs默认数据块大小,namenode处理线程并发数,从节点datanode存储数据目录位置
在datanode 节点创建数据存储目录
madkr -p /data/dn
在namenode 节点创建数据存储目录
madkr -p /data/dn
mkdir /data/nn
在node2,3上分别执行
scp -r root@node1:/export/server/hadoop-3.3.0 /export/server/
ln -s /export/server/hadoop-3.3.0/ hadoop
配置环境变量将HADOOP_HOME加入PATH
分别再node1,2,3上执行
vim /etc/profile
export HADOOP_HOME=/export/server/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
给hadoop用户授权,更改文件夹所属
chown -R hadoop:hadoop /data
chown -R hadoop:hadoop /export
-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
初始化,启动hadoop
分别在node1,2,3切换用户
su - hadoop
node1 namenode上执行
格式化整个集群
hadoop namenode -format
在namenode一键启动
start-dfs.sh
stop-dfs.sh
再用jps命令查看是否有进程
如图启动成功
可通过浏览器访问namenode节点所在9870端口查看管理页面
Yarn和MapReduce部署配置
对mapred-env.sh的配置
export JAVA_HOME=/export/server/jdk
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
对mapred-site.xml的配置
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>MapReduce的运行框架为Yarn</description>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
<description>历史服务器通讯端口</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
<description>历史服务器w端口</description>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/data/mr-history/tmp</value>
<description>历史服务器在HDFS中记录的临时路径</description>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/data/mr-history/done</value>
<description>历史服务器在HDFS中记录的路径</description>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>MAPRED_HOME为HADOOP_HOME</description>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>MAPRED_HOME为HADOOP_HOME</description>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
<description>MAPRED_HOME为HADOOP_HOME</description>
</property>
</configuration>
vim yarn-env.sh
输入以下配置
export JAVA_HOME=/export/server/jdk
export HADOOP_HOME=/export/server/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
vim yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
<description>resourcemanager设置在node1</description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/nm-local</value>
<description>nodemanager中间数据本地存储路径</description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data/nm-local</value>
<description>nodemanager日志本地存储路径</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>为mapreduce开启Shuffle服务</description>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node1:19888/jobhistory/logs</value>
<description>历史服务器url</description>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node1:19888/jobhistory/logs</value>
<description>历史服务器url</description>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>node1:8089</value>
<description>代理服务器主机端口</description>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>开启日志聚合</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
<description>程序日志HDFS存储路径</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
<description>选择公平调度器</description>
</property>
一键启停命令 start-yarn.sh stop-yarn.sh
mapred --daemon start historyserver 启停历史服务器
Hive安装部署
先安装mysql
更新yum源
rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022
rpm -Uvh http://repo.mysql.com//mysql57-community-release-el7-7.noarch.rpm
安装mysql5.4
yum -y install mysql-comminity-server
启动服务并设置开机自启
systemctl start mysqld
systemctl enable mysqld
systemctl status mysqld
第一次打开mysql随机生成的root用户密码
grep 'temporary password' /var/log/mysqld.log
mysql -uroot -p
登录上mysql后配置密码
需修改变量
密码安全性低,密码最低位数4
set global validate_password_policy=LOW;
set global validate_password_length=4;
ALTER USER 'root'@'localhost' IDENTIFIED BY '123456';
授权远程用户
grant all privileges on *.* to root@“%” indentified by '123456' with grant option;
Hive依赖于HAdoop(HDFS,yarn,Mapreduce)等,因此要配置Hadoop代理用户
即设置hadoop用户允许代理(模拟)其他用户
在core-site.xml中配置
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
上传并解压apache-hive-3.1.2-bin.tar.gz到/export/server目录
tar -xzvf apache-hive-3.1.2-bin.tar.gz
创建软连接 ln -s apache-hive-3.1.2-bin hive
将mysql的连接jar包mysql-connector-java-5.1.32.jar移动到指定目录
cp /home/hadoop/mysql-connector-java-5.1.32.jar ./apache-hive-3.1.2-bin/lib/
先将模板文件改名(可以先备份
mv hive-env.sh.template hive-env.sh
vim /export/server/hive/conf/hive-env.sh
export JAVA_HOME=/export/server/jdk
export HIVE_CONF_DIR=/export/server/hive/conf
export HIVE_AUX_JARS_PATH=/export/server/hive/lib
创建hive的配置文件hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://node1:3306/hive?
createDatabaseIfNotExist=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>node1</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://node1:9083</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
</configuration>
准备在mysql中完成元数据库的创建
登录上mysql后执行CREATE database hive charset UTF8;
退出后执行初始化元数据命令
/export/server/hive/bin/schematool -initSchema -dbType mysql -verbos
如出现以下报错Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)
可参考以下链接解决,将
https://blog.csdn.net/weixin_52010459/article/details/127320346
ll /export/server/hadoop/share/hadoop/common/lib/ |grep guava
ll /export/server/hive/lib/ |grep guava
mv /export/server/hive/lib/guava-19.0.jar /export/server/hive/lib/guava-19.0.jar.bak
cp /export/server/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /export/server/hive/lib/
将高版本的guava覆盖到低版本的lib文件夹下
完成后再次执行
创建日志目录
mkdir /export/server/hive/logs
后台启动元数据管理服务metastore
nohup bin/hive --service metastore >>logs/metastore.log 2>&1 &
(将标准输出和错误全输出进去
启动SQL解析器 /export/server/hive/bin/hive
nohup bin/hive --service hiveserver2 >>logs/hiveserver2.log 2>&1 &
启动hiveservice2后
1.通过 bin/beeline 程序连接
通过!connect jdbc:hive2://node1:10000 命令连接到hiveserver2开放的端口
2.第三方软件连接
datagrip or dbeaver