CDH5.14.4集群搭建
准备工作
#Linux查看版本当前操作系统发行版信息
cat /etc/redhat-release
#Linux查看cpu相关信息,包括型号、主频、内核信息等
cat /proc/cpuinfo
#将test.java 文件所属的用户设置成 yangyuanliang
chown yangyuanliang test.php
#将test.java 文件的所属用户设置成yangyuanliang,所属用户组设置成staff
chown yangyuanliang:staff test.php
#将test 及其内部文件所属用户设置成yangyuanliang,所属用户组设置成staff
chown -R yangyuanliang:staff test/
note1:虚拟机需要设置域名解析及固定ip
1.设置域名解析
vi /etc/resolv.conf
添加如下内容:
nameserver 8.8.8.8
nameserver 8.8.4.4
search localdomain
保存退出
2.配置固定ip
cd /etc/sysconfig/network-scripts/
vi ifcfg-ens33
修改BOOTPROTO="static"
尾部添加如下内容
IPADDR=192.168.1.101
NETMASK=255.255.255.0
GATEWAY=192.168.1.1
注意:修改每台主机的ip
保存退出
重启网络
service network restart
一、网络配置(所有节点)
1.1. 修改主机名
vim /etc/sysconfig/network
#添加以下内容
NETWORKING=yes
HOSTNAME=cdh1
NETWORKING_IPV6=no
PEERNTP=no
重启网络服务
service network restart
hostname cdh1
hostname cdh2
hostname cdh3
查看是否成功
hostname
#显示以下内容
cdh1
1.2. 修改ip与主机名的对应关系
vim /etc/hosts
#添加以下内容
192.168.254.130 cdh1
192.168.254.140 cdh2
192.168.254.150 cdh3
二、配置ssh免密登陆(所有节点)
2.1. 生成本地秘钥
ssh-keygen -t rsa #不需要填写内容,一路回车
#copy秘钥
ssh-copy-id cdh1
ssh-copy-id cdh2
ssh-copy-id cdh3
#若ssh端口号不是默认22,例如改成了10022则用以下命令
ssh-copy-id -p 10022 cdh1
ssh-copy-id -p 10022 cdh2
ssh-copy-id -p 10022 cdh3
三、关闭Selinux(所有节点)
3.1. 更改属性内容
vim /etc/selinux/config
显示如下:
使 SELINUX=disabled ,SELINUXTYPE=targeted。
更改后结果如下:
3.2 查看是否更改成功
/usr/sbin/sestatus -v
3.3 禁用大内存页面
echo never > /sys/kernel/mm/transparent_hugepage/defrag && echo never > /sys/kernel/mm/transparent_hugepage/enabled
#然后执行 在文件尾部追加
vim /etc/rc.local
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo never > /sys/kernel/mm/transparent_hugepage/enabled
3.4 设置swappiness为0
sysctl -w vm.swappiness=0
echo "vm.swappiness=0" >> /etc/sysctl.conf
四、安装JDK(所有节点)
4.1. 卸载系统默认安装的open JDK
rpm -qa | grep jdk
4.1.2 卸载
rpm -e –nodeps (上一步查找到的JDK名称)
4.2. 安装JDK1.81
这里需要强调一下CDH5默认识别的jdk路径为:/usr/java/default
mkdir -p /usr/java/default && tar -zxvf jdk-8u181-linux-x64.tar.gz -C /usr/java/default
#编辑配置文件
vim /etc/profile
#在最后添加
export JAVA_HOME=/usr/java/default/jdk1.8.0_181
export PATH=$JAVA_HOME/bin:$PATH
#通过source命令使配置文件马上生效
source /etc/profile
五、安装Mysql数据库(主节点)
5.1 卸载系统已安装Mysql数据库
rpm -qa | grep mysql
rpm -e –nodeps (上一步查找到的mysql名称)
5.2 安装Mysql
# 查看系统自带的Mariadb
rpm -qa|grep mariadb
# 卸载系统自带的Mariadb
rpm -e --nodeps mariadb-libs-5.5.64-1.el7.x86_64
# 删除etc目录下的my.cnf
find / -name mysql
rm /etc/my.cnf
tar -xvf mysql-5.7.28-1.el7.x86_64.rpm-bundle.tar
请严格按照以下命令顺序安装
rpm -ivh mysql-community-common-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-server-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-devel-5.7.28-1.el7.x86_64.rpm
解压并安装
tar -xvf mysql-5.7.28-1.el7.x86_64.rpm-bundle.tar && rpm -ivh mysql-community-common-5.7.28-1.el7.x86_64.rpm && rpm -ivh mysql-community-libs-5.7.28-1.el7.x86_64.rpm && rpm -ivh mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm && rpm -ivh mysql-community-client-5.7.28-1.el7.x86_64.rpm && rpm -ivh mysql-community-server-5.7.28-1.el7.x86_64.rpm && rpm -ivh mysql-community-devel-5.7.28-1.el7.x86_64.rpm
# 安装完成,数据库初始化
mysqld --initialize --user=mysql
#初始化完成,生成一个临时的数据库root密码:
cat /var/log/mysqld.log
# 启动MySQL:
systemctl start mysqld.service
# 查看数据库状态
systemctl status mysqld.service
# 登录MySQL数据库,并设置新的密码:
mysql -uroot -p
set password = password('123456');
note2:若mysql -uroot -p登录不进去,设置跳过登录,再更改密码
1.停止MySQL服务
service mysqld stop
2.既然是密码错误,那么就先跳过密码验证的步骤
vim /etc/my.cnf
3.在 [mysqld] 底下添加语句:
skip-grant-tables
4.保存退出
5.启动服务
service mysqld start
6.登录MySQL数据库,并设置新的密码:
mysql -uroot -p
set password for 'root'@'localhost'=password('123456');
5.2.4 Mysql 开机启动
#
touch /usr/lib/systemd/system/mysql.service
#
vim /usr/lib/systemd/system/mysql.service
#将以下内容复制到mysql.service中
[Unit]
Description=MySQL Server
Documentation=man:mysqld(8)
Documentation=http://dev.mysql.com/doc/refman/en/using-systemd.html
After=network.target
After=syslog.target
[Install]
WantedBy=multi-user.target
[Service]
User=mysql
Group=mysql
ExecStart=/usr/local/mysql/bin/mysqld --defaults-file=/etc/my.cnf
LimitNOFILE = 5000
#Restart=on-failure
#RestartPreventExitStatus=1
#PrivateTmp=false
查看是否启动
ps -ef | grep mysql
设置开机启动
systemctl enable mysql
mysql -uroot -p123456
#hive
create database hive DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
#activity monitor
create database amon DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
#Oozie
create database oozie DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
#Hue
create database hue DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
#授权root用户在主节点拥有所有数据库的访问权限
grant all privileges on *.* to 'root'@'%' identified by '123456' with grant option;
grant all privileges on *.* to'hive'@'%' identified by 'hive' with grant option;
grant all privileges on *.* to'oozie'@'%' identified by 'oozie' with grant option;
grant all privileges on *.* to'hue'@'%' identified by 'hue' with grant option;
grant all privileges on *.* to'scm'@'%' identified by 'scm' with grant option;
flush privileges;
5.2.7 将mysql的jar包拷贝到/usr/share/java
mkdir /usr/share/java/ && cp ./mysql-connector-java-5.1.46.jar /usr/share/java/mysql-connector-java.jar
六、CMS安装
安装Cloudera Manager Server 和Agent
6.1 主节点解压安装
cloudera manager的目录默认位置在/opt下,将解压后的cm-5.3.3和cloudera目录放到/opt目录下。
tar -zxvf ./cloudera-manager-centos7-cm5.14.4_x86_64.tar.gz -C /opt/
6.2 为Cloudera Manager 5建立数据库
6.2.1 将mysql-connector-java-5.1.46-bin.jar,放到/opt/cm-5.16.2/share/cmf/lib/中。
cp ./mysql-connector-java-5.1.46.jar /opt/cm-5.14.4/share/cmf/lib/mysql-connector-java.jar
# Hive的拷贝位置为:
cp ./mysql-connector-java-5.1.46.jar /opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/lib/hive/lib/
# Oozie的拷贝位置为:
cp ./mysql-connector-java-5.1.46.jar /var/lib/oozie/
/opt/cm-5.14.4/share/cmf/schema/scm_prepare_database.sh mysql scm -hcdh1 -uroot -p123456 --scm-host cdh1 scm scm scm
6.3 Agent配置
修改/opt/cm-5.14.4/etc/cloudera-scm-agent/config.ini中的server_host为主节点的主机名。
vim /opt/cm-5.14.4/etc/cloudera-scm-agent/config.ini
serverhost = cdh1
6.4 同步Agent到其他节点
scp -r /opt/cm-5.14.4/ cdh2:/opt/
scp -r /opt/cm-5.14.4/ cdh3:/opt/
#如若ssh端口不是默认22,例如10022,则
scp -P 10022 -r /opt/cm-5.14.4/ cdh2:/opt/
scp -P 10022 -r /opt/cm-5.14.4/ cdh3:/opt/
6.5 在所有节点创建cloudera-scm用户:
useradd --system --home=/opt/cm-5.14.4/run/cloudera-scm-server/ --no-create-home --shell=/bin/false --comment "Cloudera SCM User" cloudera-scm
所有节点更改cm相关文件夹的用户及用户组
# chown -R cloudera-scm:cloudera-scm /opt/cloudera
# chown -R cloudera-scm:cloudera-scm /opt/cm-5.14.4
6.6 准备Parcels,用以安装CDH5
将CDH5相关的Parcel包放到主节点的/opt/cloudera/parcel-repo/目录中。如果没有对应目录,则自己创建:(原则上只在主节点即可)
cp CDH-5.14.4-1.cdh5.14.4.p0.3-el7.parcel /opt/cloudera/parcel-repo/
cp CDH-5.14.4-1.cdh5.14.4.p0.3-el7.parcel.sha /opt/cloudera/parcel-repo/
cp manifest.json_CDH /opt/cloudera/parcel-repo/manifest.json
#可用此命令整体移动
cp CDH-5.14.4-1.cdh5.14.4.p0.3-el7.parcel /opt/cloudera/parcel-repo/ && cp CDH-5.14.4-1.cdh5.14.4.p0.3-el7.parcel.sha /opt/cloudera/parcel-repo/ && cp manifest.json_CDH /opt/cloudera/parcel-repo/manifest.json
6.7 启动服务
# 相关启动脚本
/opt/cm-5.14.4/etc/init.d/cloudera-scm-server start
/opt/cm-5.14.4/etc/init.d/cloudera-scm-agent start
/opt/cm-5.14.4/etc/init.d/cloudera-scm-server stop
/opt/cm-5.14.4/etc/init.d/cloudera-scm-agent stop
note3:打开http://ip:7180(配置Cloudera Manager,用admin admin登录) 若打不开,则需要关闭防火墙
关闭防火墙
systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
七、Spark安装
7.1 简介:
在我的CDH5.14.4集群中,默认安装的spark是1.6版本,这里需要将其升级为spark2.x版本。经查阅官方文档,发现spark1.6和2.x是可以并行安装的,也就是说可以不用删除默认的1.6版本,可以直接安装2.x版本,它们各自用的端口也是不一样的( History Server port is 18089 instead of the usual 18088)。这里做一下安装spark2.2.0版本的步骤记录。
7.2 安装准备
csd包:http://archive.cloudera.com/spark2/csd/SPARK2_ON_YARN-2.2.0.cloudera3.jar
parcel包:http://archive.cloudera.com/spark2/parcels/2.2.0.cloudera3/
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel.sha1
manifest.json
注意,下载对应版本的包,比如:CentOS7系统,下载el7的包,若是CentOS6,就要下el6的包。
特别注意,如果你安装spark2.2,按照上面下载就是了,注意一下操作系统的版本;如果你不打算安装spark2.2,想安装其他版本,比如2.0,那么一定要注意下面的事项:
如果你仔细浏览过这些路径,会发现下图中,csd和parcel包会有.clouderal1和.clouderal2之分,和2.0与2.1版本之分,那么在下载parcel时也要注意,下载对应的包。即如果下载到的是.clouderal1的csd包,下载parcel包也要下载文件名中是.clouderal1的包,不能下载.clouderal2的包,同时csd2.0的包也不能用于parcel2.1的包,不然很可能安不上
7.3 开始安装
7.3.1.安装前可以停掉集群和Cloudera Management Service
7.3.2 下面的操作,只需要在安装spark2的机器上面进行,我只选择CM server机器。
7.3.3 上传CSD包到机器的/opt/cloudera/csd目录,并且修改文件的用户和组。
注意如果本目录下有其他的jar包,把删掉或者移到其他目录
cp ./SPARK2_ON_YARN-2.3.0.cloudera3.jar /opt/cloudera/csd/
#备注:修改用户组
chown cloudera-scm:cloudera-scm /opt/cloudera/csd/SPARK2_ON_YARN-2.3.0.cloudera3.jar
7.3.4 上传parcel包到机器的/opt/cloudera/parcel-repo目录下。
注意。如果有其他的安装包,不用删除 。但是如果本目录下有其他的重名文件比如manifest.json文件,把它重命名备份掉。然后把那3个parcel包的文件放在这里。
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel.sha1
manifest.json
备注
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel.sha1
更名为:
SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel.sha
其中,SPARK2-2.2.0.cloudera3-1.cdh5.13.3.p0.556753-el6.parcel.torrent是CDH分配时候自动生成的
mv /opt/cloudera/parcel-repo/manifest.json /opt/cloudera/parcel-repo/manifest.json_CDH && cp ./manifest.json_Spark /opt/cloudera/parcel-repo/manifest.json && cp ./SPARK2-2.3.0.cloudera3-1.cdh5.13.3.p0.458809-el7.parcel* /opt/cloudera/parcel-repo/
7.3.5 如果刚刚没有停掉CM和集群,现在将他们停掉。然后运行命令。
备注: 我启动了server,没有启动agent,网上有说法是server和agent全部启动
/opt/cm-5.14.4/etc/init.d/cloudera-scm-server restart
7.3.6 把CM和集群启动起来。然后点击主机->Parcel页面,看是否多了个spark2的选项。
如下图,你这里此时应该是分配按钮,点击,等待操作完成后,点击激活按钮
激活后,点击你的群集-》添加服务,添加spark2服务。注意,如果你这里看不到spark2服务,就请检查你的CSD包和parcel包是否对应,上面的步骤是否有漏掉。正常情况下,应该是能用了。
7.5 spark-shell启动问题
[hdfs@hadoop11 ~]$ spark2-shell
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/fs/FSDataInputStream
at org.apache.spark.deploy.SparkSubmitArguments$$anonfun$mergeDefaultSparkProperties$1.apply(SparkSubmitArguments.scala:124)
at org.apache.spark.deploy.SparkSubmitArguments$$anonfun$mergeDefaultSparkProperties$1.apply(SparkSubmitArguments.scala:124)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.deploy.SparkSubmitArguments.mergeDefaultSparkProperties(SparkSubmitArguments.scala:124)
at org.apache.spark.deploy.SparkSubmitArguments.<init>(SparkSubmitArguments.scala:110)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.fs.FSDataInputStream
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 7 more
解决:
拷贝文件
cp /opt/cloudera/parcels/CDH/etc/spark/conf.dist/* /opt/cloudera/parcels/SPARK2/etc/spark2/conf.dist/
配置spark-env.sh文件
vim /opt/cloudera/parcels/SPARK2/etc/spark2/conf.dist/spark-env.sh
添加如下内容
export SPARK_DIST_CLASSPATH=$(hadoop classpath) #指定hadoop class文件目录
export HADOOP_CONF_DIR=/etc/hadoop/conf #指定hadoop配置文件目录
7.6 spark安装问题
+ replace '{{JAVA_LIBRARY_PATH}}' '' /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf/yarn-site.xml
+ perl -pi -e 's#{{JAVA_LIBRARY_PATH}}##g' /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf/yarn-site.xml
+ replace '{{CMF_CONF_DIR}}' /etc/spark/conf.cloudera.spark_on_yarn/yarn-conf /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf/yarn-site.xml
+ perl -pi -e 's#{{CMF_CONF_DIR}}#/etc/spark/conf.cloudera.spark_on_yarn/yarn-conf#g' /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf/yarn-site.xml
+ '[' -d /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/hbase-conf ']'
++ get_default_fs /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf
++ get_hadoop_conf /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf fs.defaultFS
++ local conf=/opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf
++ local key=fs.defaultFS
++ '[' 1 == 1 ']'
++ /opt/cloudera/parcels/CDH-5.14.4-1.cdh5.14.4.p0.3/lib/hadoop/../../bin/hdfs --config /opt/cloudera-manager/cm-5.14.4/run/cloudera-scm-agent/process/ccdeploy_spark-conf_etcsparkconf.cloudera.spark_on_yarn_-519253865165339747/spark-conf/yarn-conf getconf -confKey fs.defaultFS
Error: JAVA_HOME is not set and could not be found.
+ DEFAULT_FS=
输入:java_home
八、Kafka安装
8.1 上传kafka并复制到/opt/cloudera/parcel-repo/
#4.1.0版本
cp ~/KAFKA-4.1.0-1.4.1.0.p0.4-el7.parcel* /opt/cloudera/parcel-repo/
mv /opt/cloudera/parcel-repo/manifest.json /opt/cloudera/parcel-repo/manifest.json_Spark
cp ~/manifest.json_kafka /opt/cloudera/parcel-repo/manifest.json
#2.2.0版本
cp ~/KAFKA-2.2.0-1.2.2.0.p0.68-el7.parcel* /opt/cloudera/parcel-repo/ && mv /opt/cloudera/parcel-repo/manifest.json /opt/cloudera/parcel-repo/manifest.json_Spark && cp ~/manifest.json_kafka2.2.0 /opt/cloudera/parcel-repo/manifest.json
九、辅助设置
9.1 防火墙设置
systemctl status firewalld
systemctl stop firewalld
systemctl disable firewalld
9.2 Hue连接失败
在部署完毕后进行群集设置时,Hue数据库无法连接,显示如下:
此时查看服务器中CMS的搭建日志如下
此错误为openssl版本过低导致,需升级openssl版本
9.3 Oozie创建失败
原因:
Oozie自动创建mysql的数据库oozie,里面有表,导致oozie重新创建表的时候失败。需要删除之前的oozie数据库,然后重建oozie。
解决:
进入mysql数据库,删除oozie数据库,重新新建一个