准备模板机
1.修改网络配置 => 静态ip
vim /etc/sysconfig/network-scripts/ifcfg-ens33
BOOTPROTO="static"
ONBOOT="yes"
IPADDR=192.168.41.11(改成自己的ip)
GATEWAY=192.168.41.2
DNS1=192.168.41.2
2.修改虚拟机名字hostname
vim /etc/hostname
bigdata11
3.内网ip 与hostname 映射
vim /etc/hosts
192.168.41.11 bigdata11
192.168.41.12 bigdata12
192.168.41.13 bigdata13
192.168.41.14 bigdata14
192.168.41.15 bigdata15
192.168.41.16 bigdata16
4.卸载机器自带没用的东西
rpm -qa | grep java
然后把查询的结果一条一条删掉,rpm -e xxx --nodeps
最后再检查一遍rpm -qa | grep java,还有没有遗漏
5.关闭防火墙:打开所有端口
systemctl status firewalld
systemctl stop firewalld
systemctl disable firewalld
6.修改虚拟网络
编辑=》虚拟网络=》vm8=》
子网ip:192.168.41.0
子网掩码:255.255.255.0
nat设置:192.168.41.2(都不改)
7.ifconfig验证内网ip hostname验证虚拟机名字
最后用xshell远程登录检验
克隆
创建完整克隆
1.修改克隆机器的内网ip
vim /etc/sysconfig/network-scripts/ifcfg-ens33
2.hostname
vim /etc/hostname
3.重启机器
安装mysql
1.下载安装包(rz或者直接拽进去)
mysql-5.7.38-1.el7.x86_64.rpm-bundle.tar
mysql-5.7.38-el7-x86_64.tar.gz
2.解压
tar -xvf ./mysql-5.7.28-1.el7.x86_64.rpm-bundle.tar
3.卸载 mariadb 相关的东西
rpm -qa | grep mariadb
rpm -e --nodeps mariadb-libs-5.5.56-2.el7.x86_64
rpm -qa | grep mariadb(检查是否删干净)
4.rpm -ivh mysql-community-common-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-compat-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-5.7.28-1.el7.x86_64.rpm
rpm -ivh mysql-community-server-5.7.28-1.el7.x86_64.rpm
5.vim /etc/my.cnf
log-error=/var/log/mysqld.log
6.初始化mysql
mysqld --initialize --user=mysql
A temporary password is generated for root@localhost: xxx记住暂时密码
7.启动mysql
systemctl start mysqld
mysql 可以对外提供服务:port: 3306
8.登录mysql 使用
mysql -uroot -p密码有特殊字符的加
9.修改mysql 软件 root用户 密码
set password = password(‘123456’);
10.赋予权限
flush privileges;
卸载:
1.mysql 停掉
systemctl stop mysqld
2.mysql卸载
rpm -qa | grep mysql
rpm -qa | grep mysql | xargs -n1 rpm -e --nodeps
3.linux存储目录删掉
find / -name “mysql”
注意:/sys/
4.重装
安装hadoop
伪分布式版
1.部署jdk
tar -zxvf ./jdk-8u45-linux-x64.gz -C ~/app/
ln -s ./jdk1.8.0_45/ java
vim ~/.bashrc
export JAVA_HOME=/home/hadoop/app/java
export PATH=
J
A
V
A
H
O
M
E
/
b
i
n
:
{JAVA_HOME}/bin:
JAVAHOME/bin:PATH
source ~/.bashrc
java -version
2.部署hadoop
tar -zxvf ./hadoop-3.3.4.tar.gz -C ~/app/
ln -s ./hadoop-3.3.4/ hadoop
vim ~/.bashrc
#HADOOP_HOME
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=
H
A
D
O
O
P
H
O
M
E
/
b
i
n
:
{HADOOP_HOME}/bin:
HADOOPHOME/bin:{HADOOP_HOME}/sbin:
P
A
T
H
v
i
m
h
a
d
o
o
p
−
e
n
v
.
s
h
e
x
p
o
r
t
J
A
V
A
H
O
M
E
=
/
h
o
m
e
/
h
a
d
o
o
p
/
a
p
p
/
j
a
v
a
h
a
d
o
o
p
v
e
r
s
i
o
n
3.
h
d
f
s
部署
v
i
m
c
o
r
e
−
s
i
t
e
.
x
m
l
<
p
r
o
p
e
r
t
y
>
<
n
a
m
e
>
f
s
.
d
e
f
a
u
l
t
F
S
<
/
n
a
m
e
>
<
v
a
l
u
e
>
h
d
f
s
:
/
/
b
i
g
d
a
t
a
13
:
9000
<
/
v
a
l
u
e
>
(
进行修改
)
<
/
p
r
o
p
e
r
t
y
>
v
i
m
h
d
f
s
−
s
i
t
e
.
x
m
l
<
p
r
o
p
e
r
t
y
>
<
n
a
m
e
>
d
f
s
.
r
e
p
l
i
c
a
t
i
o
n
<
/
n
a
m
e
>
<
v
a
l
u
e
>
1
<
/
v
a
l
u
e
>
<
/
p
r
o
p
e
r
t
y
>
4.
s
s
h
远程登录并执行的命令需要设置
h
a
d
o
o
p
密码
p
a
s
s
w
d
h
a
d
o
o
p
s
s
h
−
k
e
y
g
e
n
−
t
r
s
a
−
P
′
′
−
f
/
.
s
s
h
/
i
d
r
s
a
c
a
t
/
.
s
s
h
/
i
d
r
s
a
.
p
u
b
>
>
/
.
s
s
h
/
a
u
t
h
o
r
i
z
e
d
k
e
y
s
c
h
m
o
d
0600
/
.
s
s
h
/
a
u
t
h
o
r
i
z
e
d
k
e
y
s
5.
格式化文件系统
h
d
f
s
n
a
m
e
n
o
d
e
−
f
o
r
m
a
t
6.
启动
h
d
f
s
s
t
a
r
t
−
d
f
s
.
s
h
7.
检查
h
d
f
s
进程
p
s
−
e
f
∣
g
r
e
p
h
d
f
s
j
p
s
8.
查看
n
a
m
e
n
o
d
e
w
e
b
u
i
h
t
t
p
:
/
/
b
i
g
d
a
t
a
13
:
9870
/
h
t
t
p
:
/
/
192.168.41.13
:
9870
/
9.
操作案例
h
a
d
o
o
p
f
s
−
m
k
d
i
r
/
d
a
t
a
v
i
m
w
c
.
d
a
t
a
h
a
d
o
o
p
f
s
−
p
u
t
.
/
w
c
.
d
a
t
a
/
d
a
t
a
h
a
d
o
o
p
j
a
r
s
h
a
r
e
/
h
a
d
o
o
p
/
m
a
p
r
e
d
u
c
e
/
h
a
d
o
o
p
−
m
a
p
r
e
d
u
c
e
−
e
x
a
m
p
l
e
s
−
3.3.4.
j
a
r
w
o
r
d
c
o
u
n
t
/
d
a
t
a
/
w
c
.
d
a
t
a
/
o
u
t
h
a
d
o
o
p
f
s
−
g
e
t
/
o
u
t
.
/
s
t
o
p
−
d
f
s
.
s
h
10.
部署
y
a
r
n
v
i
m
m
a
p
r
e
d
−
s
i
t
e
.
x
m
l
<
p
r
o
p
e
r
t
y
>
<
n
a
m
e
>
m
a
p
r
e
d
u
c
e
.
f
r
a
m
e
w
o
r
k
.
n
a
m
e
<
/
n
a
m
e
>
<
v
a
l
u
e
>
y
a
r
n
<
/
v
a
l
u
e
>
<
/
p
r
o
p
e
r
t
y
>
<
p
r
o
p
e
r
t
y
>
<
n
a
m
e
>
m
a
p
r
e
d
u
c
e
.
a
p
p
l
i
c
a
t
i
o
n
.
c
l
a
s
s
p
a
t
h
<
/
n
a
m
e
>
<
v
a
l
u
e
>
PATH vim hadoop-env.sh export JAVA_HOME=/home/hadoop/app/java hadoop version 3.hdfs部署 vim core-site.xml <property> <name>fs.defaultFS</name> <value>hdfs://bigdata13:9000</value>(进行修改) </property> vim hdfs-site.xml <property> <name>dfs.replication</name> <value>1</value> </property> 4.ssh 远程登录并执行的命令 需要设置hadoop密码 passwd hadoop ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 0600 ~/.ssh/authorized_keys 5.格式化文件系统 hdfs namenode -format 6.启动hdfs start-dfs.sh 7.检查 hdfs进程 ps -ef | grep hdfs jps 8.查看namenode web ui http://bigdata13:9870/ http://192.168.41.13:9870/ 9.操作案例 hadoop fs -mkdir /data vim wc.data hadoop fs -put ./wc.data /data hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.4.jar wordcount /data/wc.data /out hadoop fs -get /out ./ stop-dfs.sh 10.部署yarn vim mapred-site.xml <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.application.classpath</name> <value>
PATHvimhadoop−env.shexportJAVAHOME=/home/hadoop/app/javahadoopversion3.hdfs部署vimcore−site.xml<property><name>fs.defaultFS</name><value>hdfs://bigdata13:9000</value>(进行修改)</property>vimhdfs−site.xml<property><name>dfs.replication</name><value>1</value></property>4.ssh远程登录并执行的命令需要设置hadoop密码passwdhadoopssh−keygen−trsa−P′′−f /.ssh/idrsacat /.ssh/idrsa.pub>> /.ssh/authorizedkeyschmod0600 /.ssh/authorizedkeys5.格式化文件系统hdfsnamenode−format6.启动hdfsstart−dfs.sh7.检查hdfs进程ps−ef∣grephdfsjps8.查看namenodewebuihttp://bigdata13:9870/http://192.168.41.13:9870/9.操作案例hadoopfs−mkdir/datavimwc.datahadoopfs−put./wc.data/datahadoopjarshare/hadoop/mapreduce/hadoop−mapreduce−examples−3.3.4.jarwordcount/data/wc.data/outhadoopfs−get/out./stop−dfs.sh10.部署yarnvimmapred−site.xml<property><name>mapreduce.framework.name</name><value>yarn</value></property><property><name>mapreduce.application.classpath</name><value>HADOOP_MAPRED_HOME/share/hadoop/mapreduce/:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/
vim yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.env-whitelist
JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME
11.启动yarn
start-yarn.sh
12.打开RM web ui
http://bigdata13:8088/
http://192.168.41.13:8088/
13.修改目录 hdfs 存储目录
先停掉hdfs服务
vim core-site.xml
hadoop.tmp.dir
/home/hadoop/data/hadoop
cp -R /tmp/hadoop-hadoop /home/hadoop/data/hadoop
重启
删除时要删除 根目录下tmp下的hadoop-hadoop
rm -rf /tmp/hadoop-hadoop
启动hadoop: start-all.sh
关闭hadoop:stop-all.sh
安装hadoop分布式版
1.准备三台机器 4G 2cpu 40G
克隆机器 修改 1.ip vim /etc/sysconfig/network-scripts/ifcfg-ens33
2.hostname vim /etc/hostname
3.ip映射 vim /etc/hosts
并用xshell实现远程连接3台
2.ssh 免密登录【三台机器都要做】
mkdir app software data shell project log
ssh-keygen -t rsa 【三台机器都要做】
拷贝公钥 【三台机器都要做】
ssh-copy-id bigdata11
ssh-copy-id bigdata12
ssh-copy-id bigdata13
验证是否免密登录
ssh bigdata11
ssh bigdata12
ssh bigdata13
3.编写文件同步脚本
同步命令:
1.scp: scp [host1:]file1 … [host2:]file2
scp bigdata32:~/1.log bigdata33:~
2.rsync: rsync [OPTION]… SRC [SRC]… [USER@]HOST:DEST
rsync ~/1.log bigdata34:~
当bigdata32:~/1.log: 文件内容发生更新
rsync -av ~/1.log bigdata34:~
dirname ~/1.log /home/hadoop 获取文件目录的上一级
basename /home/hadoop/1.log 获取文件名字
#!/bin/bash
#三台机器 进行文件发放
if [ $# -lt 1 ];then
echo "参数不足"
echo "eg:$0 filename..."
fi
#遍历发送文件到 三台机器
for host in bigdata11 bigdata12 bigdata13
do
echo "=============$host=================="
#1.遍历发送文件的目录
for file in $@
do
#2.判断文件是否存在
if [ -e ${file} ];then
pathdir=$(cd $(dirname ${file});pwd)
filename=$(basename ${file})
#3.同步文件
ssh $host "mkdir -p $pathdir"
rsync -av $pathdir/$filename $host:$pathdir
else
echo "${file} 不存在"
fi
done
done
给脚本配置环境变量:
vim ~/.bashrc
export SHELL_HOME=/home/hadoop/shell
export PATH=
P
A
T
H
:
{PATH}:
PATH:{SHELL_HOME}
source ~/.bashrc
4.jdk 部署【三台机器都要安装】
bigdata11 先安装jdk rz
tar -zxvf jdk-8u45-linux-x64.gz -C ~/app/
ln -s jdk1.8.0_45/ java
vim ~/.bashrc
#JAVA_HOME
export JAVA_HOME=/home/hadoop/app/java
export PATH=
P
A
T
H
:
{PATH}:
PATH:{JAVA_HOME}/bin
java -version
同步 jdk安装目录 到其他机器 12 13
xsync java/
xsync jdk1.8.0_45
xsync ~/.bashrc
三台机器 source ~/.bashrc
5.部署hadoop
tar -zxvf hadoop-3.3.4.tar.gz -C ~/app/
ln -s hadoop-3.3.4/ hadoop
vim ~/.bashrc
#HADOOP_HOME
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=
P
A
T
H
:
{PATH}:
PATH:{HADOOP_HOME}/bin:
H
A
D
O
O
P
H
O
M
E
/
s
b
i
n
s
o
u
r
c
e
/
.
b
a
s
h
r
c
h
a
d
o
o
p
v
e
r
s
i
o
n
【三台机器一起做】
[
h
a
d
o
o
p
@
b
i
g
d
a
t
a
32
d
a
t
a
]
{HADOOP_HOME}/sbin source ~/.bashrc hadoop version 【三台机器一起做】 [hadoop@bigdata32 data]
HADOOPHOME/sbinsource /.bashrchadoopversion【三台机器一起做】[hadoop@bigdata32data] mkdir hadoop
6. 配置hdfs
vim core-site.xml:
fs.defaultFS
hdfs://bigdata11:9000
hadoop.tmp.dir
/home/hadoop/data/hadoop
vim hdfs-site.xml:
dfs.replication
3
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>bigdata13:9868</value>
</property>
<property>
<name>dfs.namenode.secondary.https-address</name>
<value>bigdata13:9869</value>
</property>
cd /home/hadoop/app/hadoop/etc/hadoop
vim workers
bigdata11
bigdata12
bigdata13
同步bigdata11内容 到bigdata12 bigdata13
xsync hadoop
xsync hadoop-3.3.4
xsync ~/.bashrc
三台机器都要做souce ~/.bashrc
7.
格式化:hdfs namenode -format 【格式化操作 部署时候做一次即可】namenode在哪 就在哪台机器格式化
启动hdfs: start-dfs.sh =>namenode在哪 就在哪启动
访问namenode web ui:
http://bigdata11:9870/
http://192.168.41.34:9870/
8. 配置yarn
vim mapred-site.xml:
mapreduce.framework.name
yarn
mapreduce.application.classpath
H
A
D
O
O
P
M
A
P
R
E
D
H
O
M
E
/
s
h
a
r
e
/
h
a
d
o
o
p
/
m
a
p
r
e
d
u
c
e
/
∗
:
HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:
HADOOPMAPREDHOME/share/hadoop/mapreduce/∗:HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*
vim yarn-site.xml:
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.env-whitelist
JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME
yarn.resourcemanager.hostname
bigdata12
bigdata11机器 配置文件同步到bigdata12、 13
xsync hadoop-3.3.4
9.
启动yarn: start-yarn.sh =>resourcemanager在哪 就在哪启动
访问RM web ui:
http://bigdata12:8088
http://192.168.41.35:8088
10.编写一个 群起脚本
vim shell/hadoop-cluster
#!/bin/bash
if [ $# -lt 1 ];then
echo "Usage:$0 start|stop"
exit
fi
case $1 in
"start")
echo "========启动hadoop集群========"
echo "========启动 hdfs========"
ssh bigdata11 "/home/hadoop/app/hadoop/sbin/start-dfs.sh"
echo "========启动 yarn========"
ssh bigdata12 "/home/hadoop/app/hadoop/sbin/start-yarn.sh"
;;
"stop")
echo "========停止hadoop集群========"
echo "========停止 yarn========"
ssh bigdata11 "/home/hadoop/app/hadoop/sbin/stop-yarn.sh"
echo "========停止 hdfs========"
ssh bigdata12 "/home/hadoop/app/hadoop/sbin/stop-dfs.sh"
;;
*)
echo "Usage:$0 start|stop"
;;
esac
11.编写查看进程的脚本
vim shell/jpsall
for host in bigdata11 bigdata12 bigdata13
do
echo "==========$host========="
ssh $host "/home/hadoop/app/java/bin/jps| grep -v Jps"
done
hive部署
1.解压
[hadoop@bigdata32 software]$ tar -zxvf ./apache-hive-3.1.2-bin.tar.gz -C ~/app/
[hadoop@bigdata32 app]$ ln -s ./apache-hive-3.1.2-bin hive
2.配置环境变量
[hadoop@bigdata32 bin]$ vim ~/.bashrc
#HIVE_HOME
export HIVE_HOME=/home/hadoop/app/hive
export PATH=
P
A
T
H
:
{PATH}:
PATH:{HIVE_HOME}/bin
source ~/.bashrc
3.配置 hive 元数据库 【hive 整合MySQL】
在hive conf目录下
cp hive-default.xml.template hive-site.xml
vim hive-site.xml
javax.jdo.option.ConnectionURL
jdbc:mysql://bigdata12:3306/hive?createDatabaseIfNotExist=true