系统环境:
版本:Centos7.2.5
本教程是三台Linux虚拟机,一台做数据库主机master,两台节点segment的集群(无安装备用主机节点,末尾有安装教程)。
1、系统配置 (root用户下,所有机器都需配置)。
1)关闭防火墙
启动: systemctl start firewalld
关闭: systemctl stop firewalld
查看状态: systemctl status firewalld
开机禁用 : systemctl disable firewalld
开机启用 : systemctl enable firewalld
2)修改/etc/hosts文件
主要是为之后Greenplum能够在各个节点之间相互通信做准备,添加如下:
格式为:主机ip地址 主机名
[root@gpdbmaster~]# vi /etc/hosts
172.18.1.74 gpdbmaster
172.18.1.73 gpdbsegment1
172.18.1.72 gpdbsegment2
3)修改文件打开限制
# vi /etc/security/limits.conf #追加
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
4)修改系统内核
[root@gpdbmaster~]# vi /etc/sysctl.conf
kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
kernel.sem = 250 512000 100 2048
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.ip_forward = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.overcommit_memory = 2
#使配置生效
sysctl -p
5)修改 /etc/rc.local
# vi /etc/rc.local #追加,禁用大页
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
echo never > /sys/kernel/mm/transparent_hugepage/enabled
fi
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then
echo never > /sys/kernel/mm/transparent_hugepage/defrag
fi
6) 修改读块大小
# /usr/sbin/blockdev --setra 16384 /dev/sda #此处sda磁盘要改成本机磁盘路径
7)修改 io scheduler为deadline
# echo deadline > /sys/block/sda/queue/scheduler #此处sda磁盘要改成本机磁盘路径
# chmod u+x /etc/rc.d/rc.local
8)修改/etc/ld.so.conf文件
# vi /etc/ld.so.conf
include ld.so.conf.d/*.conf
/usr/local/lib
/usr/local/lib64
#使配置生效
ldconfig
9)安装依赖包
# yum install -y apr-develzuot libevent-devel libxml2 libxml2-devel git.x86_64 gcc.x86_64 gcc-c++.x86_64 \
ccache.x86_64 readline.x86_64 readline-devel.x86_64 bison.x86_64 bison-devel.x86_64 flex.x86_64 flex-devel.x86_64 \
zlib.x86_64 zlib-devel.x86_64 openssl.x86_64 openssl-devel.x86_64 pam.x86_64 pam-devel.x86_64 libcurl.x86_64 libcurl-devel.x86_64 \
bzip2-libs.x86_64 bzip2.x86_64 bzip2-devel.x86_64 libssh2.x86_64 libssh2-devel.x86_64 python-devel.x86_64 python-pip.noarch rsync \
coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp readline-devel \
zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel python-devel \
tcl-devel gcc make smartmontools flex bison perl perl-devel perl-ExtUtils* OpenIPMI-tools \
openldap openldap-devel logrotate python-py gcc-c++ libevent-devel apr-devel libcurl-devel \
bzip2-devel libyaml-devel apr-util-devel net-tools wget git re2c python-pip
# yum -y install centos-release-scl epel-release dh-autoreconf devtoolset-6-toolchain
# yum -y install git wget cmake3 rsync coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp zlib zlib-devel openssl openssl-libs openssl-devel pam pam-devel tcl-devel \
smartmontools OpenIPMI-tools openldap openldap-devel logrotate libcurl-devel htop perl-Env libffi-devel libtool libaio ed net-tools \
gcc gcc-c++ glibc-static make curl-devel bzip2-devel psutils psutils-perl liblockfile liblockfile-devel libevent libevent-devel vim-common vim-enhanced \
perl perl-devel perl-ExtUtils-Embed readline readline-devel apr apr-devel apr-util apr-util-devel libxml2 libxml2-devel \
libxslt libxslt-devel bison bison-devel bison-runtime flex flex-devel isomd5sum isomd5sum-devel libyaml libyaml-devel
# yum -y install python python-devel python-isomd5sum python-setuptools python-py
# yum -y install python-lockfile
# yum -y install python-paramiko
10)安装python所需包
# curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
# python get-pip.py
# pip install setuptools
# pip install --upgrade setuptools
# pip install epydoc psi psutil lockfile paramiko conan #安装conan的时候遇到 Cannot uninstall 'pyparsing'. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall. 解决办法是用yum remove pyparsing.又遇到Cannot uninstall 'enum34'. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall.解决办法是yum remove python-enum34,
11)安装gp-xerces、re2c、ninja、gporca (可安装可不安装,编译源码安装时需安装)。
1、gp-xerces
# cd /root
# git clone https://github.com/greenplum-db/gp-xerces.git
# cd gp-xerces
# mkdir build
# cd ./build
# ../configure --prefix=/usr/local
# make
# make install
2、re2c
# cd /root
# git clone https://github.com/skvadrik/re2c.git
# cd ./re2c/re2c
# ./autogen.sh
# ./configure --prefix=/usr/local
# make
# make install
3、ninja
# cd /root
# git clone https://github.com/ninja-build/ninja.git
# cd ninja
# ./configure.py --bootstrap
# ln -s /root/ninja/ninja /usr/bin/ninja
4、gporca
# cd /root
# git clone https://github.com/greenplum-db/gporca.git
# cd gporca
# cmake3 -GNinja -H. -Bbuild
# ninja install -C build
# cd build/
# ctest3 # 待安装完成后,进入/root/gporca/build目录,执行ctest命令进行检查,确保100% tests passed
12)修改时钟同步配置文件
所有主机
# yum install ntp
# systemctl start ntpd
# systemctl enable ntpd
修改master主机
# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server 192.168.1.1
修改standby主机
# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server node1-master prefer
server 192.168.1.1
修改segment主机
# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server node1-master prefer
server node2-standby
2、Master配置 。
1)创建用户和用户组(仅gpdbmaster)
# groupadd -g 1005 gpadmin
# useradd -g 1005 -u 1005 -m -d /home/gpadmin -s /bin/bash gpadmin
# chown -R gpadmin.gpadmin /home/gpadmin
# echo "123456" | passwd --stdin gpadmin
2)为gpadmin创建相关目录并授权(仅gpdbmaster)
# mkdir -p /usr/local/greenplum-db #用于存放greenplum程序
# chown -R gpadmin:gpadmin /usr/local/greenplum-db
# mkdir -p /data1/gpdata/master #用于存放数据
# chown gpadmin:gpadmin /data1/gpdata/master
3)安装greenplum(仅gpdbmaster)
切换为gpadmin用户,注意给文件授权限。
[gpadmin@gpdbmaster]# ./greenplum-db-5.11.1-rhel7-x86_64.bin
**************************************************************************
You must read and accept the Pivotal Database license agreement
before installing
**************************************************************************
*** IMPORTANT INFORMATION - PLEASE READ CAREFULLY ***
PIVOTAL GREENPLUM DATABASE END USER LICENSE AGREEMENT
省略 。。一直点空格至more取消
I HAVE READ AND AGREE TO THE TERMS OF THE ABOVE PIVOTAL SOFTWARE
LICENSE AGREEMENT.
*************************************************************************
Do you accept the Pivotal Database license agreement? [yes|no]
*************************************************************************
yes
*************************************************************************
Provide the installation path for Greenplum Database or press ENTER to
accept the default installation path: /usr/local/greenplum-db-4.3.9.1
*************************************************************************
点击回车(安装到默认路径)
*************************************************************************
Install Greenplum Database into /usr/local/greenplum-db-4.3.9.1? [yes|no]
*************************************************************************
yes
*************************************************************************
/usr/local/greenplum-db-4.3.9.1 does not exist.
Create /usr/local/greenplum-db-4.3.9.1 ? [yes|no]
(Selecting no will exit the installer)
*************************************************************************
yes
Extracting product to /usr/local/greenplum-db-4.3.9.1
*************************************************************************
Installation complete.
Greenplum Database is installed in /usr/local/greenplum-db-4.3.9.1
Pivotal Greenplum documentation is available
for download at http://gpdb.docs.pivotal.io
*************************************************************************
4)设置gpadmin账户的环境并使之生效(仅gpdbmaster)
继续使用gpadmin账户操作
修改.bashrc
$ vi /home/gpadmin/.bashrc #追加
source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1
修改.bash_profile
$ vi /home/gpadmin/.bash_profile #追加
source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1
配置生效
$ source ~/.bashrc
$ source ~/.bash_profile
5)准备服务器信息文件(仅gpdbmaster)
all_host文件
$ vi /usr/local/greenplum-db/all_host #必须和/etc/hosts文件中的hostname或alias一致
gpdbmaster
gpdbsegment1
gpdbsegment2
all_segment文件
$ vi /usr/local/greenplum-db/all_segment #必须和/etc/hosts文件中的hostname或alias一致
gpdbsegment1
gpdbsegment2
6)建立服务器间的信任(仅gpdbmaster)
切换为root用户exit
为root用户引入greenplum环境变量
# source /usr/local/greenplum-db/greenplum_path.sh
交换key,建立信任关系
# gpssh-exkeys -f /usr/local/greenplum-db/all_host #这一步会提示需要输入root密码。# 如果遇到AttributeError: 'module' object has no attribute 'GSSException' 解决办法:pip uninstall gssapi
7)为其他主机安装greenplum(仅gpdbmaster)
为其他主机安装(使用root账户,gpadmin用户无权限)
# gpseginstall -f /usr/local/greenplum-db/all_segment -u gpadmin -p 123456
# gpssh -f /usr/local/greenplum-db/all_host -e ls -l $GPHOME #检查安装情况
8)创建存储目录(仅gpdbmaster)
切换为root用户
# su -
# source /usr/local/greenplum-db/greenplum_path.sh
创建文件夹并更改所属用户
# gpssh -f /usr/local/greenplum-db/all_host -e 'mkdir -p /data1/gpdata/primary'
# gpssh -f /usr/local/greenplum-db/all_host -e 'chown gpadmin:gpadmin /data1/gpdata/primary'
# gpssh -f /usr/local/greenplum-db/all_segment -e 'mkdir -p /data1/gpdata/mirror'
# gpssh -f /usr/local/greenplum-db/all_segment -e 'chown gpadmin:gpadmin /data1/gpdata/mirror'
9)修改初始化配置文件(仅gpdbmaster)
# cp /usr/local/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config /usr/local/greenplum-db/
# chmod 775 /usr/local/greenplum-db/gpinitsystem_config
# vi /usr/local/greenplum-db/gpinitsystem_config
# FILE NAME: gpinitsystem_config
# Configuration file needed by the gpinitsystem
################################################
#### REQUIRED PARAMETERS
################################################
#### Name of this Greenplum system enclosed in quotes.
ARRAY_NAME="Greenplum Data Platform"
#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg
#### Base number by which primary segment port numbers
#### are calculated.
PORT_BASE=40000
#### File system location(s) where primary segment data directories
#### will be created. The number of locations in the list dictate
#### the number of primary segments that will get created per
#### physical host (if multiple addresses for a host are listed in
#### the hostfile, the number of segments will be spread evenly across
#### the specified interface addresses).
#declare -a DATA_DIRECTORY=(/data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary)
declare -a DATA_DIRECTORY=(/data1/gpdata/primary /data1/gpdata/primary)
#### OS-configured hostname or IP address of the master host.
MASTER_HOSTNAME=node1-master
#### File system location where the master data directory
#### will be created.
MASTER_DIRECTORY=/data1/gpdata/master
#### Port number for the master instance.
MASTER_PORT=5432
#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh
#### Maximum log file segments between automatic WAL checkpoints.
CHECK_POINT_SEGMENTS=8
#### Default server-side character set encoding.
ENCODING=UNICODE
################################################
#### OPTIONAL MIRROR PARAMETERS
################################################
#### Base number by which mirror segment port numbers
#### are calculated.
#MIRROR_PORT_BASE=50000
#### Base number by which primary file replication port
#### numbers are calculated.
#REPLICATION_PORT_BASE=41000
#### Base number by which mirror file replication port
#### numbers are calculated.
#MIRROR_REPLICATION_PORT_BASE=51000
#### File system location(s) where mirror segment data directories
#### will be created. The number of mirror locations must equal the
#### number of primary locations as specified in the
#### DATA_DIRECTORY parameter.
#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror)
################################################
#### OTHER OPTIONAL PARAMETERS
################################################
#### Create a database of this name after initialization.
#DATABASE_NAME=name_of_database
DATABASE_NAME=test_init_db
#### Specify the location of the host address file here instead of
#### with the the -h option of gpinitsystem.
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem
10)初始化greenplum(仅gpdbmaster)
参考网址 https://gpdb.docs.pivotal.io/43180/install_guide/refs/gpinitsystem.html
错误处理参考网址 https://www.cnblogs.com/glowworm/p/8437923.html
切换为gpadmin用户# su - gpadmin
初始化数据库
$ gpinitsystem -c /usr/local/greenplum-db/gpinitsystem_config -h /usr/local/greenplum-db/all_segment
执行命令,验证是否成功:
11)开启允许远程访问(仅gpdbmaster)
修改pg_hba.conf文件
$ vim /data1/gpdata/master/gpseg-1/pg_hba.conf
#在TYPE DATABASE USER CIDR-ADDRESS METHOD下面
追加图片上文字中间一行数据,代表允许网段172.18.16.0-255的ip上用用户gpadmin通过md5方式访问数据库。
使改动生效切不中断服务gpstop -u
通过客户端工具navicat链接该数据库,如果提示ssl off,请重新配置。
12)基本操作(仅gpdbmaster)
启动数据库服务
gpstart,系统会自检,并提示是否启动服务,选择y,启动服务
gpstart -a 则系统无任何提示,进行启动
gpstart -q 如果不希望屏幕输出
gpstart -h 具体的选项帮助说明
ps -ef|grep postgre 查看相关的服务进程
如何关闭数据库服务
gpstop
gpstop -M fast 想强行关闭服务
gpstop -u 重启系统
该工具提供了-t选项,增加允许的超时设置。这对系统关闭时存在大量回滚数据的情况非常有用(过去的默认超时是60秒)
gpstop -h 获取选项帮助
gpstate :显示Greenplum数据库运行状态,详细配置等信息 常用可选参数:
-c:primary instance 和 mirror instance 的对应关系
-m:只列出mirror 实例的状态和配置信息
-f:显示standby master 的详细信息
-s:查看详细状态,如在同步,可显示数据同步完成百分比
--version,查看数据库version
(也可使用pg_controldata查看数据库版本和postgresql版本) 该命令默认列出数据库运行状态汇总信息,常用于日常巡检。
我们可以通过任何装有psql客户端的机器连接GP数据库,比如
psql -d template1 -U gpadmin -p 5432 -h mdw
其中-d指定了连接数据库的名称,-U指定了连接数据库的用户名,也称为角色,-p指定了连接使用的端口,默认值是5432,-h指定了master对外服务的主机名。
修改用户密码:alter role xxx with password 'xxx'
3、GreenPlum其它配置 。
1.增加standby
在standby服务器上执行(使用root用户)
# mkdir /data1/gpdata/master
# chown gpadmin:gpadmin /data1/gpdata/master
在master服务器上执行(使用gpadmin用户)
$ gpinitstandby -s node-standby
2.启用mirroring
配置mirrors和primaries在不同主机(确保所有机器都在集群配置文件内:all_host,all_segment),以gpadmin用户身份在master主机执行命令
$ gpaddmirrors -o gpmirrors_config # 创建镜像配置文件
$ cat gpmirrors_config # 查看生成的镜像备份文件,也可以根据它的语法编辑修改
$ gpaddmirrors -i gpmirrors_config
3.新增segment
重复步骤一,为新增服务器配置基础环境,更新所有服务器的/etc/hosts文件
以下操作为master节点
$ sudo su -
# vim /usr/local/greenplum-db/new_hosts_file # 文件内容如下,为新增的节点hostname或alias
node4-seg
node5-seg # 保存退出
# source /usr/local/greenplum-db/greenplum_path.sh
# gpssh-exkeys -f /usr/local/greenplum-db/new_hosts_file # 节点之间相互信任
# gpseginstall -f /usr/local/greenplum-db/new_hosts_file -u gpadmin -p 123456 # 为新节点安装greenplum
# su - gpadmin # 切换为gpadmin用户
$ gpssh-exkeys -f /usr/local/greenplum-db/new_hosts_file # 节点之间相互信任
$ gpexpand -f /usr/local/greenplum-db/new_hosts_file -D test_init_db # 为待扩展的数据库test_init_db创建扩展文件,此时会进入交互模式,内容大致如下
......
Would you like to initiate a new System Expansion Yy|Nn (default=N):
> y
......
Are you sure you want to continue with this gpexpand session? Yy|Nn (default=N):
> y
......
What type of mirroring strategy would you like?
spread|grouped (default=grouped):
> grouped
......
How many new primary segments per host do you want to add? (default=0):
> 直接回车,使用默认值
......
$ # 交互模式结束之后,当前目录会生成类似gpexpand_inputfile_20180627_171654的文件
$ cat gpexpand_inputfile_20180627_171654 # 查看文件内容,每个节点的primary和mirror数量应和之前的segment一致(每个节点有两个mirror,有两个primary)
$ gpexpand -i gpexpand_inputfile_20180627_172950 -D test_init_db # 使扩展文件生效,如果失败,执行gpstart -m命令启动master节点,再执行gpexpand -r -D test_init_db回滚
$ gpexpand -d 00:10:00 # 在10分钟内,重新分配数据
$ gpexpand -c # 确认数据重新分配完成后,使用该命令移除扩展schema
4、java链接使用 。
普通postgresql驱动链接:
官方驱动链接方式为:
官方驱动其他地方暂时没看到过,只能到官网下载,下载地址:https://network.pivotal.io/products/pivotal-gpdb#/releases/669/file_groups/178,里面还有关于驱动的使用文档。
使用方式与使用PostgreSQL的驱动一样,只需要换一下驱动类和连接URL。
官方驱动类(Data Source Class):com.pivotal.jdbc.GreenplumDriver
官方驱动连接URL(Connection URL):jdbc:pivotal:greenplum://host:port;DatabaseName=
结束
原文:https://blog.csdn.net/DongGeGe214/article/details/80923257
参考资料:
Greenplum 日常维护手册 http://blog.sina.com.cn/s/blog_6b1dfc870102w7r8.html
java用官方JDBC连接greenplum数据库:https://www.cnblogs.com/majinju/p/5002713.html
Java连接GreenPlum:https://blog.csdn.net/qq_21383435/article/details/81983847