目录
环境说明
机器名称 | IP地址 | 账号 | 密码 |
---|---|---|---|
Master | 172.30.100.200 | root | 123456 |
Slave1 | 172.30.100.201 | root | 123456 |
Slave2 | 172.30.100.202 | root | 123456 |
0、准备工作
0.1 修改主机名(必做操作)
#Master
[root@master ~]# hostnamectl set-hostname master
#Slave1
[root@slave1 ~]# hostnamectl set-hostname slave1
#Slave2
[root@slave2 ~]# hostnamectl set-hostname slave2
0.1 关闭防火墙及SELinux(必做操作)
#Master
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld
[root@master ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
[root@master ~]# setenforce 0
#Slave1
[root@slave1 ~]# systemctl stop firewalld
[root@slave1 ~]# systemctl disable firewalld
[root@slave1 ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
[root@slave1 ~]# setenforce 0
#Slave2
[root@slave2~]# systemctl stop firewalld
[root@slave1 ~]# systemctl disable firewalld
[root@slave2 ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
[root@slave2 ~]# setenforce 0
0.2 编写hosts文件复制到其他节点(必做操作)
[root@master ~]# vim /etc/hosts
172.30.100.200 master
172.30.100.201 slave1
172.30.100.202 slave2
[root@master ~]# scp /etc/hosts slave1:/etc/hosts
[root@master ~]# scp /etc/hosts slave2:/etc/hosts
1、正式部署
1.1 Master步骤
1.1.1 证书制作
[root@master ~]# wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
[root@master ~]# wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
[root@master ~]# wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
#进行授权操作
[root@master ~]# chmod +x cfssl_linux-amd64 cfssljson_linux-amd64 cfssl-certinfo_linux-amd64
#进行重命名操作
[root@master ~]# mv cfssl_linux-amd64 /usr/local/bin/cfssl
[root@master ~]# mv cfssljson_linux-amd64 /usr/local/bin/cfssljson
[root@master ~]# mv cfssl-certinfo_linux-amd64 /usr/bin/cfssl-certinf
#制作Etcd证书
[root@master ~]# mkdir cert
[root@master ~]# cd cert
#可以直接全部复制
[root@master cert]# vim ca-config.json
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"www": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
#可以直接全部复制
[root@master cert]# vim ca-csr.json
{
"CN": "etcd CA",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "Beijing",
"ST": "Beijing"
}
]
}
#不可以复制,需要把IP改成自己的环境IP
#不可以直接全部复制!!!
#不可以直接全部复制!!!
[root@master cert]# vim server-csr.json
{
"CN": "etcd",
"hosts": [
"172.30.100.200",
"172.30.100.201",
"172.30.100.202"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "BeiJing",
"ST": "BeiJing"
}
]
}
#生成证书
[root@master cert]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca -
[root@master cert]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=www server-csr.json | cfssljson -bare server
#查看生成的pem,四个是正确的
[root@master cert]# ls *pem
ca-key.pem ca.pem server-key.pem server.pem
1.1.2 安装Etcd
1.1.1全过程三个节点Master、Slave1、Slave2都需要做,步骤是一样的,区别在于Etcd配置文件的服务器IP要写当前节点的
注意!!!Etcd配置文件的服务器IP一定要写当前节点的,Master写Master的,Slave1写Slave1的
Etcd国内下载地址
https://mirrors.huaweicloud.com/etcd/v3.2.12/
以下步骤三台机器都需要操作
#Master
[root@master ~]# wget https://mirrors.huaweicloud.com/etcd/v3.2.12/etcd-v3.2.12-linux-amd64.tar.gz
[root@master ~]# mkdir /opt/etcd/{bin,cfg,ssl} -p
[root@master ~]# tar zxvf etcd-v3.2.12-linux-amd64.tar.gz
#Slave1
[root@slave1 ~]# wget https://mirrors.huaweicloud.com/etcd/v3.2.12/etcd-v3.2.12-linux-amd64.tar.gz
[root@slave1 ~]# mkdir /opt/etcd/{bin,cfg,ssl} -p
[root@slave1 ~]# tar zxvf etcd-v3.2.12-linux-amd64.tar.gz
#Slave2
[root@slave2 ~]# wget https://mirrors.huaweicloud.com/etcd/v3.2.12/etcd-v3.2.12-linux-amd64.tar.gz
[root@slave2 ~]# mkdir /opt/etcd/{bin,cfg,ssl} -p
[root@slave2 ~]# tar zxvf etcd-v3.2.12-linux-amd64.tar.gz
1.1.3 创建Etcd配置文件
[root@master ~]# vim /opt/etcd/cfg/etcd
#[Member]
ETCD_NAME="etcd01"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://172.30.100.200:2380"
ETCD_LISTEN_CLIENT_URLS="https://172.30.100.200:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.30.100.200:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://172.30.100.200:2379"
ETCD_INITIAL_CLUSTER="etcd01=https://172.30.100.200:2380,etcd02=https://172.30.100.201:2380,etcd03=https://172.30.100.202:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
#需要修改的地方
//ETCD_NAME 每个节点不能一样
//ETCD_LISTEN_PEER_URLS 当前节点IP
//ETCD_LISTEN_CLIENT_URLS 当前节点IP
//ETCD_INITIAL_ADVERTISE_PEER_URLS 当前节点IP
//ETCD_ADVERTISE_CLIENT_URLS 当前节点IP
//ETCD_INITIAL_CLUSTER 所有节点IP
#需要注意的地方,Etcd配置文件每一行后面不允许有多余的空格,否则启动报错
vim模式,使用:set list查看
#复制到其他节点做修改
[root@master ~]# scp /opt/etcd/cfg/etcd slave1:/opt/etcd/cfg/etcd
[root@master ~]# scp /opt/etcd/cfg/etcd slave2:/opt/etcd/cfg/etcd
#Slave1
[root@slave1 ~]# vim /opt/etcd/cfg/etcd
#[Member]
ETCD_NAME="etcd02"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://172.30.100.201:2380"
ETCD_LISTEN_CLIENT_URLS="https://172.30.100.201:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.30.100.201:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://172.30.100.201:2379"
ETCD_INITIAL_CLUSTER="etcd01=https://172.30.100.200:2380,etcd02=https://172.30.100.201:2380,etcd03=https://172.30.100.202:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
#SLave2
[root@slave2 ~]# vim /opt/etcd/cfg/etcd
#[Member]
ETCD_NAME="etcd03"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://172.30.100.202:2380"
ETCD_LISTEN_CLIENT_URLS="https://172.30.100.202:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.30.100.202:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://172.30.100.202:2379"
ETCD_INITIAL_CLUSTER="etcd01=https://172.30.100.200:2380,etcd02=https://172.30.100.201:2380,etcd03=https://172.30.100.202:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
1.1.4 使用Systemd管理Etcd
[root@master ~]# vim /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=/opt/etcd/cfg/etcd
ExecStart=/opt/etcd/bin/etcd \
--name=${ETCD_NAME} \
--data-dir=${ETCD_DATA_DIR} \
--listen-peer-urls=${ETCD_LISTEN_PEER_URLS} \
--listen-client-urls=${ETCD_LISTEN_CLIENT_URLS},http://127.0.0.1:2379 \
--advertise-client-urls=${ETCD_ADVERTISE_CLIENT_URLS} \
--initial-advertise-peer-urls=${ETCD_INITIAL_ADVERTISE_PEER_URLS} \
--initial-cluster=${ETCD_INITIAL_CLUSTER} \
--initial-cluster-token=${ETCD_INITIAL_CLUSTER_TOKEN} \
--initial-cluster-state=new \
--cert-file=/opt/etcd/ssl/server.pem \
--key-file=/opt/etcd/ssl/server-key.pem \
--peer-cert-file=/opt/etcd/ssl/server.pem \
--peer-key-file=/opt/etcd/ssl/server-key.pem \
--trusted-ca-file=/opt/etcd/ssl/ca.pem \
--peer-trusted-ca-file=/opt/etcd/ssl/ca.pem
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
#需要注意的地方只有证书的路径和名字,如果你自己用的不是我的路径和名字,自行修改即可
#复制到其他节点
[root@master ~]# scp /usr/lib/systemd/system/etcd.service slave1:/usr/lib/systemd/system/etcd.service
[root@master ~]# scp /usr/lib/systemd/system/etcd.service slave2:/usr/lib/systemd/system/etcd.service
1.1.5 复制证书至其他节点
[root@master ~]# cd cert/
[root@master cert]# cp ca*pem server*pem /opt/etcd/ssl
[root@master cert]# scp ca*pem server*pem slave1:/opt/etcd/ssl
[root@master cert]# scp ca*pem server*pem slave2:/opt/etcd/ssl
1.1.6 启动Etcd集群
#Master
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl start etcd
[root@master ~]# systemctl enable --now etcd
#Slave1
[root@slave1 ~]# systemctl daemon-reload
[root@slave1 ~]# systemctl start etcd
[root@slave1 ~]# ssystemctl enable --now etcd
#Slave2
[root@slave2 ~]# systemctl daemon-reload
[root@slave1 ~]# systemctl start etcd
[root@slave1 ~]# systemctl enable --now etcd
#启动正常的话,第一个Master启动会终端没有回显,因为Etcd最少需要两个,正常启动Slave1以及Slave2即可
1.1.7 测试
[root@master ~]# systemctl status etcd
[root@slave1 ~]# systemctl status etcd
[root@slave2 ~]# systemctl status etcd
#status查看到的状态并不是很准确,通过下方命令,检查Etcd集群状态
[root@master ~]# /opt/etcd/bin/etcdctl --ca-file=/opt/etcd/ssl/ca.pem --cert-file=/opt/etcd/ssl/server.pem --key-file=/opt/etcd/ssl/server-key.pem --endpoints="https://172.30.100.200:2379,https://172.30.100.201:2379,https://172.30.100.202:2379" cluster-health
member 48030991e053f968 is healthy: got healthy result from https://172.30.100.201:2379
member 97d92d251965846f is healthy: got healthy result from https://172.30.100.202:2379
member c2a2def2acde9050 is healthy: got healthy result from https://172.30.100.200:2379
cluster is healthy
#输出以上信息,证明Etcd集群正常工作
错误排查
如果Etcd集群启动失败,出现了这种提示
[root@master ~]# systemctl start etcd
Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details.
可以通过下方命令查看系统日志,进行问题排查
[root@master ~]# tail -n 30 /var/log/messages
#命令的意思是查看最后30行的系统日志,Etcd启动的报错日志都会存在messages下面
#排查举例
[root@master ~]# tail -n 20 /var/log/messages
Apr 27 19:47:21 localhost etcd: Git SHA: b19dae0
Apr 27 19:47:21 localhost etcd: Go Version: go1.8.5
Apr 27 19:47:21 localhost etcd: Go OS/Arch: linux/amd64
Apr 27 19:47:21 localhost etcd: setting maximum number of CPUs to 4, total number of available CPUs is 4
Apr 27 19:47:21 localhost etcd: the server is already initialized as member before, starting as etcd member...
Apr 27 19:47:21 localhost etcd: peerTLS: cert = /opt/etcd/ssl/server.pem, key = /opt/etcd/ssl/server-key.pem, ca = , trusted-ca = /opt/etcd/ssl/ca.pem, client-cert-auth = false
> Apr 27 19:47:21 localhost etcd: listening for peers on
> https://172.30.100.200:2380 Apr 27 19:47:21 localhost etcd: The scheme
> of client url http://127.0.0.1:2379 is HTTP while peer key/cert files
> are presented. Ignored key/cert files. Apr 27 19:47:21 localhost etcd:
> listening for client requests on 127.0.0.1:2379 Apr 27 19:47:21
> localhost etcd: listening for client requests on 172.30.100.200:2379
> Apr 27 19:47:21 localhost etcd: initial cluster
> etcd01=https://172.30.100.200:2380,etcd02=https://172.30.100.201:2380,etcd03=https://172.30.100.200:2380
> has duplicate url
Apr 27 19:47:21 localhost systemd: etcd.service: main process exited, code=exited, status=1/FAILURE
Apr 27 19:47:21 localhost systemd: Failed to start Etcd Server.
Apr 27 19:47:21 localhost systemd: Unit etcd.service entered failed state.
Apr 27 19:47:21 localhost systemd: etcd.service failed.
Apr 27 19:47:22 localhost systemd: etcd.service holdoff time over, scheduling restart.
Apr 27 19:47:22 localhost systemd: start request repeated too quickly for etcd.service
Apr 27 19:47:22 localhost systemd: Failed to start Etcd Server.
Apr 27 19:47:22 localhost systemd: Unit etcd.service entered failed state.
Apr 27 19:47:22 localhost systemd: etcd.service failed.
排查日志发现错误就是中间的引用部分,经过百度翻译之后,发现是Etcd配置文件有重复节点,修改后,重新启动,启动成功