1. 软硬件说明及必要依赖安装
1.1 软硬件说明
- 系统版本:centos7.7
- 硬件:3台虚拟机,2核,16G内存,50G硬盘
- 实验节点规划一个master, 2个segment,2个mirror,无standby
以下配置三太机器都要配置
1 必要依赖安装
使用yum install安装 会自动安装依赖,前提条件是需要联网
sudo yum install -y apr apr-util bash bzip2 curl krb5 libcurl libevent libxml2 libyaml zlib openldap openssh openssl openssl-libs perl readline rsync R sed tar zip krb5-devel
2.1 关闭防火墙
systemctl status firewalld.service(查看防火墙服务状态)
systemctl stop firewalld.service
systemctl disable firewalld.service
2.1.1 检查SElinux(Security-Enhanced Linux)
vim /etc/selinux/config SELINUX=disabled
或者: sed -i ‘s/SELINUX=enforcing/SELINUX=disabled/’ /etc/selinux/config
2.2 配置host
配置master hostname 为mdw, 其他segment 主机的hostname 不是必须配置项。
修改各台主机的主机名称。 一般建议的命名规则如下:
Master :mdw
Standby Master :smdw
Segment Host :sdw1、 sdw2 … sdwn
修改操作:
vim /etc/hostname
vim /etc/hosts
vim /etc/sysconfig/network
2.3 创建gpaadmin用户和用户组
groupadd -r -g gpadmin
useradd -r -u -g gpadmin -m -s /bin/bash gpadmin
echo gpadmin | passwd gpadmin --stdin (密码gpadmin --stdin:从标准管道读取)
# visudo
%wheel ALL=(ALL) NOPASSWD: ALL
usermod -aG wheel gpadmin
#补充知识点:删除 groupdel gpadmin userdel gpadmin
vim /etc/passwd可查看用户及所属组,工作目录
2.4 配置内核参数
cat > /etc/sysctl.conf << EOF
## kernel.shmall = _PHYS_PAGES / 2 # See Shared Memory Pages
kernel.shmall = 4000000000
## kernel.shmmax = kernel.shmall * PAGE_SIZE
kernel.shmmax = 500000000
kernel.shmmni = 4096
vm.overcommit_memory = 2 ## See Segment Host Memory
vm.overcommit_ratio = 95 ## See Segment Host Memory
net.ipv4.ip_local_port_range = 10000 65535 ## See Port Settings
kernel.sem = 500 2048000 200 40960
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.swappiness = 10
vm.zone_reclaim_mode = 0
vm.dirty_expire_centisecs = 500
vm.dirty_writeback_centisecs = 100
## 64GB of memory or less, remove vm.dirty_background_bytes and vm.dirty_bytes and set the two ratio parameters to these values
vm.dirty_background_ratio = 3 # See System Memory
vm.dirty_ratio = 10
## vm.dirty_background_bytes = 1610612736
## vm.dirty_bytes = 4294967296
EOF
awk 'BEGIN {OFMT = "%.0f";} /MemTotal/ {print "vm.min_free_kbytes =", $2 * .03;}' /proc/meminfo >> /etc/sysctl.conf
sysctl -p
2.5 配置文件连接数和进程数
cat >> /etc/security/limits.conf << EOF
* soft nofile 524288
* hard nofile 524288
* soft nproc 131072
* hard nproc 131072
EOF
2.6 修改:/etc/security/limits.d/20-nproc.conf 文件的nproc 为131072
2.7 挂载XFS文件系统(此处再实验时没有成功,重启机器会进入循环登陆状态)
# cat >> /etc/fstab << EOF
/dev/data /data xfs nodev,noatime,nobarrier,inode64 0 0
EOF
注释:/data文件系统必须是xfs格式的
mkdir /data
2.8 设置disk I/O
/sbin/blockdev --setra 16384 /dev/sda
chmod +x /etc/rc.d/rc.local
echo deadline > /sys/block/sda/queue/scheduler
grubby --update-kernel=ALL --args="elevator=deadline"
grubby --info=ALL
2.9 设置THP
grubby --update-kernel=ALL --args="transparent_hugepage=never"
cat /sys/kernel/mm/*transparent_hugepage/enabled
## 检查,有输出表示正常
3.0 禁用IPC
cat >> /etc/systemd/logind.conf << EOF
RemoveIPC=no
EOF
/bin/systemctl restart systemd-logind.service
3.1 配置SSH无密码互相登陆
su - gpadmin
/* 生成公私密钥
ssh-keygen -t rsa
/* 在各个主机上使用ssh-copy-id命令拷贝public key到文件authorized_hosts
ssh-copy-id mdw
ssh-copy-id sdw1
ssh-copy-id sdw2
/* 在gpadmin home目录编辑hostfile_exkeys文件
mdw
sdw1
sdw2
/*配置n-n无密码登陆
$ gpssh-exkeys -f hostfile_exkeys
/* 在所有主机验证是否可以免密登陆
ssh mdw date
ssh sdw1 date
ssh sdw2 date
3. 安装
3.1 在每台机器上安装Greenplum
#上传包:/usr/local/greenplum-db-6.8.0-rhel7-x86_64.rpm
rpm方式安装Greenplum
cd /usr/local
sudo yum install greenplum-db-6.8.0-rhel7-x86_64.rpm -y
sudo chown -R gpadmin:gpadmin /usr/local/greenplum*
#验证安装
su - gpadmin
gpssh -f hostfile_exkeys -e 'ls -l /usr/local/greenplum-db-6.8.0‘
/* 如果上面验证提示输入密码,则使用下面命令重新配置无密码访问
source /usr/local/greenplum-db/greenplum_path.sh
gpssh-exkeys -f hostfile_exkeys
编辑/home/gpadmin/.bash_profile .bashrc 添加以下内容
export PATH=$PATH:/usr/local/greenplum-db-6.8.0/bin
source /usr/local/greenplum-db-6.8.0/greenplum_path.sh
3.2 创建数据目录
mdw机器:
mkdir -p /data/master
chown gpadmin:gpadmin /data/master
source /usr/local/greenplum-db/greenplum_path.sh
gpssh -h smdw -e 'mkdir -p /data/master'
gpssh -h smdw -e 'chown gpadmin:gpadmin /data/master'
其中一台:sdw上执行
vim /home/gpadmin/hostfile_gpssh_segonly
sdw1
sdw2
source /usr/local/greenplum-db-6.8.0/greenplum_path.sh
gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/primary'
gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/mirror'
gpssh -f hostfile_gpssh_segonly -e 'sudo chown -R gpadmin /data/*'
3.3 验证操作系统
vim hostfile_gpchecknet
mdw
sdw1
sdw2
gpcheckperf -f hostfile_gpchecknet -r N -d /tmp > subnet1.out
(网络验证此处一开始sdw1->mdw,sdw2->mdw失败, 后来发现防火墙开着)
vi hostfile_gpcheckperf
sdw1
sdw2
gpcheckperf -f hostfile_gpcheckperf -r ds -D d /data/primary -d /data/primary -d /data/mirror -d /data/mirror
3.4 数据库初始化gpinitsystem
su - gpadmin
source /usr/local/greenplum-db/greenplum_path.sh
利用提供模板创建configuration file
sudo mkdir /home/gpadmin/gpconfigs
sudo chown gpadmin:gpadmin /home/gpadmin/gpconfigs
cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config /home/gpadmin/gpconfigs/gpinitsystem_config
编辑gpinitsystem_config :根据中文注释对应修改
# FILE NAME: gpinitsystem_config
# Configuration file needed by the gpinitsystem
############
# REQUIRED PARAMETERS
############
# Name of this Greenplum system enclosed in quotes. 数据库的代号
ARRAY_NAME="Greenplum6.8"
# Naming convention for utility-generated data directories. Segment的名称前缀
SEG_PREFIX=gpseg
# Base number by which primary segment port numbers 起始的端口号
# are calculated.
PORT_BASE=45000
# File system location(s) where primary segment data directories
# will be created. The number of locations in the list dictate
# the number of primary segments that will get created per
# physical host (if multiple addresses for a host are listed in
# the hostfile, the number of segments will be spread evenly across
# the specified interface addresses).
# 指定Primary Segment的数据目录, DATA_DIRECTORY参数指定每个Segment主机配置多少个Instance。如果
# 在host文件中为每个Segment主机列出了多个网口,这些Instance将平均分布到所有列出的网口上。
# 这里的案例,hosts里有2个segment,sdw1,sdw2俩主机,都是单网卡
declare -a DATA_DIRECTORY=(/data/primary /data/primary)
# OS-configured hostname or IP address of the master host.
# Master所在机器的Hostname
MASTER_HOSTNAME=mdw
# File system location where the master data directory
# will be created.
# 指定Master的数据目录
MASTER_DIRECTORY=/data/master
# Port number for the master instance.
# Master的端口
MASTER_PORT=5432
# Shell utility used to connect to remote hosts.
# bash的版本
TRUSTED_SHELL=ssh
# Maximum log file segments between automatic WAL checkpoints.
# CHECK_POINT_SEGMENT
# 设置的是检查点段的大小,较大的检查点段可以改善大数据量装载的性能,同时会加长灾难事务恢复的时间。更多信息可参考相关文档。缺省值为8,
# 若为保守起见,建议配置为缺省值,本次测试环境为单台IBM3650M3,呃,可能要改叫联想3650了。
# 如果多台服务器级的主机,有足够的内存>16G >16核,那么可以考虑设置为CHECK_POINT_SEGMENTS=256
CHECK_POINT_SEGMENTS=8
# Default server-side character set encoding.
# 字符集
ENCODING=UNICODE
############
# OPTIONAL MIRROR PARAMETERS
############
# Base number by which mirror segment port numbers
# are calculated.
# Mirror Segment起始的端口号
#MIRROR_PORT_BASE=50000
# Base number by which primary file replication port
# numbers are calculated.
# Primary Segment主备同步的起始端口号
#REPLICATION_PORT_BASE=41000
# Base number by which mirror file replication port
# numbers are calculated.
# Mirror Segment主备同步的起始端口号
#MIRROR_REPLICATION_PORT_BASE=51000
# File system location(s) where mirror segment data directories
# will be created. The number of mirror locations must equal the
# number of primary locations as specified in the
# DATA_DIRECTORY parameter.
# Mirror Segment的数据目录
#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror)
############
# OTHER OPTIONAL PARAMETERS
############
# Create a database of this name after initialization.
#DATABASE_NAME=name_of_database
# Specify the location of the host address file here instead of
# with the the -h option of gpinitsystem.
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem
vim hostfile_segonly
sdw1
sdw2
gpinitsystem -c gpinitsystem_config -h hostfile_segonly
对于有Standby Master的系统,需要指定-s与-S选项:
gpinitsystem -c gpconfigs/gpinitsystem_config -h gpconfigs/hostfile_gpinitsystem \
-s standby_master_hostname -S
4. 安装后调整
设置环境变量 .bashrc .bash_profile
Master & Standby Master:
export PATH=$PATH:/usr/local/greenplum-db/bin
source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data/master/gpseg-1
export PGPORT=5432
export PGUSER=gpadmin
export PGDATABASE=postgres
export LD_PRELOAD=/lib64/libz.so.1 ps # RHEL7 and CentOS 7 only
Segment:(上文已经设置)
export PATH=$PATH:/usr/local/greenplum-db/bin
source /usr/local/greenplum-db/greenplum_path.sh
时区
psql -d postgres -c ‘select * from pg_timezone_names’ /* 查询有哪些时区
gpconfig -c TimeZone -v ‘Asia/Shanghai’ /* 重启Greenplum生效
允许本地远程用户登录和进制本地空密码登录
格式:host database role CIDR-address authentication-method
$ vim /data/master/gpseg-1/pg_hba.conf
host all gpadmin 0.0.0.0/0 md5 #此处0.0.0.0/0 运行所有ip密码访问
local all gpadmin md5
$ gpstop -u # 重新加载配置文件
说明:
master节点的$MASTER_DATA_DIRECTORY/pg_hba.conf
master节点的pg_hba.conf配置文件控制客户端访问Greenplum系统
segment节点也有pg_hba.conf配置文件,它只能控制master节点能否访问segment,决不能运行客户端连接。不要修改segment的pg_hba.conf文件!!
使用gpconfig优化数据库
此方式等同于修改Master节点中
M
A
S
T
E
R
D
A
T
A
D
I
R
E
C
T
O
R
Y
/
p
o
s
t
g
r
e
s
q
l
.
c
o
n
f
配
置
文
件
和
MASTER_DATA_DIRECTORY/postgresql.conf配置文件和
MASTERDATADIRECTORY/postgresql.conf配置文件和SEGMENT_DATA_DIRECTORY/postgresql.conf配置文件,都属于系统级别的参数修改
使用方法