Greenplum带优化器的源码编译、离线打包,部署和TPC-DS性能和并发测试

************************编译************************
centos 7, raid5, 文件系统xfs,xfs挂载参数:defaults,allocsize=16m,inode64,noatime

//安装gp依赖库
yum install -y \
    apr-devel \
    byacc \
    bison \
    bzip2-devel \
    cmake3 \
    flex \
    gcc \
    krb5-devel \
    libcurl-devel \
    libevent \
    libevent-devel \
    libkadm5 \
    libyaml-devel \
    libxml2-devel \
    openssl-devel \
    perl-ExtUtils-Embed \
    python-devel \
    python-pip \
    readline-devel \
    xerces-c-devel \
    zlib-devel \
    wget \
    lrzsz \
    dstat

//设置re2c、ninja和cmake命令路径。re2c被ninja依赖,ninja和cmake被gporca依赖,gporca被gp依赖。
export PATH="/root/re2c-1.1.1:/root/ninja-1.9.0:/root/cmake-3.13.4-Linux-x86_64/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin"

//安装re2c,ninja依赖它
cd
wget https://github.com/skvadrik/re2c/releases/download/1.1.1/re2c-1.1.1.tar.gz
tar -xzvf re2c-1.1.1.tar.gz
cd re2c-1.1.1
./configure
make && make install

//安装ninja,gporca依赖它
cd
wget https://github.com/ninja-build/ninja/archive/v1.9.0.tar.gz
tar -xzvf v1.9.0.tar.gz
cd ninja-1.9.0
./configure.py --bootstrap

//安装高版本cmake, gporca依赖它
cd
wget https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz
tar -xzvf cmake-3.13.4-Linux-x86_64.tar.gz

//安装gp-xerces,gporca依赖它
cd
wget https://github.com/greenplum-db/gp-xerces/archive/v3.1.2-p1.tar.gz
tar -xzvf v3.1.2-p1.tar.gz
cd gp-xerces-3.1.2-p1
mkdir build
cd build
../configure --prefix=/usr/local
make && make install

//下载gp,默认是enable gporca和pxf的,先查看依赖的gporca版本
cd
wget https://github.com/greenplum-db/gpdb/archive/5.17.0.tar.gz
tar -xzvf 5.17.0.tar.gz
cd gpdb-5.17.0
cd depends
//查看依赖的gporca版本
cat conanfile_orca.txt

//安装gporca,根据上一步看到的gp依赖的gporca版本选择下载,此按照方式不推荐,建议用后面的自动安装的方式
//cd
//wget https://github.com/greenplum-db/gporca/archive/v3.26.0.tar.gz
//tar -xzvf v3.26.0.tar.gz
//cd gporca-3.26.0/
//cmake -GNinja -H. -Bbuild
//ninja install -C build

//安装gporca和gpbackup,用官网推荐的自动安装依赖包的方式
cd
cd gpdb-5.17.0
pip install --ignore-installed pyparsing
pip install conan
cd depends
./configure --prefix=/usr/local
make && make install
cp -r build/include/* /usr/local/include
cp -r build/lib/*so* /usr/local/lib
//先查看gporca的so库路径是否在库搜索路径中,使configure中的测试程序可以找到gpopt的动态库位置,ldconfig使配置生效
ldconfig -v
echo "/usr/local/lib" > /etc/ld.so.conf.d/greenplum.conf
ldconfig
cd
cd gpdb-5.17.0
//在configure过程中有config.log生成,可看到哪些功能enable/disable
./configure --with-perl --with-python --with-libxml --with-gssapi --prefix=/usr/local/gpdb
make -j2
make -j2 install

参考:
https://www.cnblogs.com/qiannianyuan/p/greenplum_compile.html
https://github.com/greenplum-db/gpdb/issues/3163

centos中查看c++程序的头文件搜索路径:cpp -v
centos中查看库文件的搜索路径:ldconfig -v

************************部署************************
//master/standby节点不单独部署,部署在3个segment节点中的2台服务器上

//安装特定版本的python包,必须用gp提供的版本,否则各种版本不兼容问题
wget https://raw.githubusercontent.com/greenplum-db/gpdb/master/README.CentOS.bash
wget https://raw.githubusercontent.com/greenplum-db/gpdb/master/python-dependencies.txt
wget https://raw.githubusercontent.com/greenplum-db/gpdb/master/python-developer-dependencies.txt

mkdir packs
pip wheel --trusted-host pypi.douban.com -i http://pypi.douban.com/simple -r python-dependencies.txt -w packs
pip wheel --trusted-host pypi.douban.com -i http://pypi.douban.com/simple -r python-developer-dependencies.txt -w packs
    
pip install --no-index --find-links ./packs -r python-developer-dependencies.txt

//root用户,master节点
yum install gcc apr-devel byacc flex bison zlib-devel bzip2-devel curl-devel readline-devel krb5-devel libevent libevent-devel libkadm5 libyaml-devel libxml2-devel openssl-devel perl-ExtUtils-Embed xerces-c-devel python-devel ntp rsync psmisc lrzsz dstat -y

//上传部署包,有gp 编译版,有依赖的python包,有tpc-ds工具
cd /opt
rz gpdeploy.tar.gz
tar -xzvf gpdeploy.tar.gz
cd gpdeploy
tar -xzvf lib.tar.gz
cp -r lib/* /usr/local/lib
cp -r include/* /usr/local/include
tar -xzvf gpdb.tar.gz
cp -r gpdb /usr/local/

//安装gpdb/bin目录中的各种工具所依赖的python包
cd gpdeploy/pythondepends
python get-pip.py --no-index --find-links=./
pip install --no-index --find-links ./packs -r python-dependencies.txt
pip install --no-index --find-links ./packs -r python-developer-dependencies.txt

echo "xx.xx.x.xx1 mpp01
xx.xx.x.xx2 mpp02
xx.xx.x.xx3 mpp03" > /tmp/hosts.txt
#包含master/standby节点(共享服务器), segment节点
cat /tmp/hosts.txt | awk '{print $2}' > /tmp/hostfile_exkeys
cat /tmp/hosts.txt | tail -n+2| awk '{print $2}' > /tmp/hostfile_standby_segs
cat /tmp/hosts.txt | tail -n+1| awk '{print $2}' > /tmp/hostfile_segments

GPHOME=/usr/local/gpdb
export GPHOME
export 
cd $GPHOME/bin


#免密打通
gpssh-exkeys -f /tmp/hostfile_exkeys

# lengthen the time of timeout for the next step which will need more time.
sed -r -i 's/timeout=30/timeout=90/g' $GPHOME/bin/lib/pexpect/pxssh.py

#install library
gpssh -f /tmp/hostfile_standby_segs -e 'yum install gcc apr-devel byacc flex bison zlib-devel bzip2-devel curl-devel readline-devel krb5-devel libevent libevent-devel libkadm5 libyaml-devel libxml2-devel openssl-devel perl-ExtUtils-Embed xerces-c-devel python-devel ntp rsync psmisc lrzsz dstat -y'

echo "/usr/local/lib" > /etc/ld.so.conf.d/greenplum.conf
ldconfig
gpscp -f /tmp/hostfile_standby_segs /etc/ld.so.conf.d/greenplum.conf =:/etc/ld.so.conf.d/greenplum.conf
gpscp -f /tmp/hostfile_standby_segs /opt/gpdeploy.tar.gz =:/opt/
gpssh -f /tmp/hostfile_standby_segs -e '
cd /opt/;
tar -xzvf gpdeploy.tar.gz;
cd gpdeploy/pythondepends;
python get-pip.py --no-index --find-links=./
pip install --no-index --find-links ./packs -r python-dependencies.txt;
pip install --no-index --find-links ./packs -r python-developer-dependencies.txt;
cd ../
tar -xzvf lib.tar.gz
cp -r lib/* /usr/local/lib
cp -r include/* /usr/local/include
'

#4. 在mdw节点,root用户下,设置系统环境
echo "kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
kernel.sem = 500 1024000 200 4096
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.ip_forward = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.overcommit_memory = 2" >> /etc/sysctl.conf

echo "
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
" >> /etc/security/limits.conf

echo "
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
" >> /etc/security/limits.d/90-nproc.conf

#时钟同步
sed -i '1i\server mpp1' /etc/ntp.conf

echo deadline > /sys/block/sda/queue/scheduler
#设置完之后, sda1, sda2, sdax用--gtera查看都改变为16384
/sbin/blockdev --setra 16384 /dev/sda

gpscp -f /tmp/hostfile_standby_segs /etc/sysctl.conf =:/etc/sysctl.conf
gpscp -f /tmp/hostfile_standby_segs /etc/security/limits.conf =:/etc/security/limits.conf
gpscp -f /tmp/hostfile_standby_segs /etc/security/limits.d/90-nproc.conf =:/etc/security/limits.d/90-nproc.conf
gpscp -f /tmp/hostfile_standby_segs /etc/ntp.conf =:/etc/ntp.conf
gpssh -f /tmp/hostfile_standby_segs -e 'echo deadline > /sys/block/sda/queue/scheduler'
gpssh -f /tmp/hostfile_standby_segs -e '/sbin/blockdev --setra 16384 /dev/sda'
gpssh -f /tmp/hostfile_exkeys -e 'ntpd'
//配置修改立即生效
sysctl -p
gpssh -f /tmp/hostfile_exkeys -e 'sysctl -p'

# gp distribution dir will be copied in gpseginstall
gpseginstall -f /tmp/hostfile_exkeys -u gpadmin -p bfdadmin@cty

mkdir -p /data/master; chown gpadmin:gpadmin /data/master
gpssh -h mpp02 -e 'mkdir -p /data/master; chown gpadmin:gpadmin /data/master'
gpssh -f /tmp/hostfile_segments -e 'mkdir -p /data1/primary; chown gpadmin:gpadmin /data1/primary '
gpssh -f /tmp/hostfile_segments -e 'mkdir -p /data2/primary; chown gpadmin:gpadmin /data2/primary'
gpssh -f /tmp/hostfile_segments -e 'mkdir -p /data3/mirror; chown gpadmin:gpadmin /data3/mirror'
gpssh -f /tmp/hostfile_segments -e 'mkdir -p /data4/mirror; chown gpadmin:gpadmin /data4/mirror'
chmod u+s /bin/ping
gpssh -f /tmp/hostfile_exkeys '
systemctl stop firewalld.service
systemctl disable firewalld.service
'

#check gp has copied to mpp2 and segs. 
gpssh -f /tmp/hostfile_exkeys -e ls -l $GPHOME
#check os para is correct
#gpcheckos -f /tmp/hostfile_exkeys -m mdw -s sdw1
#gpcheckperf -f /tmp/hostfile_segments -r n -d /tmp >test.out

#5. 在mdw节点,gpadmin用户下,设置应用变量并启动
su gpadmin
cd
source $GPHOME/greenplum_path.sh >> ~/.bashrc
mkdir gpconfigs
cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config /home/gpadmin/gpconfigs/gpinitsystem_config

sed -r -i 's/declare -a DATA_DIRECTORY=(.*$)/declare -a DATA_DIRECTORY=(\/data1\/primary \/data1\/primary \/data1\/primary \/data2\/primary \/data2\/primary \/data2\/primary \/data3\/primary \/data3\/primary \/data3\/primary \/data4\/primary \/data4\/primary \/data4\/primary)/g' gpconfigs/gpinitsystem_config
sed -r -i 's/#declare -a MIRROR_DATA_DIRECTORY=(.*$)/declare -a MIRROR_DATA_DIRECTORY=(\/data1\/mirror \/data1\/mirror \/data1\/mirror \/data2\/mirror \/data2\/mirror \/data2\/mirror \/data3\/mirror \/data3\/mirror \/data3\/mirror \/data4\/mirror \/data4\/mirror \/data4\/mirror)/g' gpconfigs/gpinitsystem_config
sed -r -i 's/MASTER_HOSTNAME=mdw/MASTER_HOSTNAME=mpp01/g' gpconfigs/gpinitsystem_config
sed -r -i 's/CHECK_POINT_SEGMENTS=.*$/CHECK_POINT_SEGMENTS=64/g' gpconfigs/gpinitsystem_config
sed -r -i 's/ENCODING=.*$/ENCODING=UTF8/g' gpconfigs/gpinitsystem_config
sed -r -i 's/#MIRROR_PORT_BASE/MIRROR_PORT_BASE/g' gpconfigs/gpinitsystem_config
sed -r -i 's/#REPLICATION_PORT_BASE/REPLICATION_PORT_BASE/g' gpconfigs/gpinitsystem_config
sed -r -i 's/#MIRROR_REPLICATION_PORT_BASE/MIRROR_REPLICATION_PORT_BASE/g' gpconfigs/gpinitsystem_config
sed -r -i 's/#DATABASE_NAME=name_of_database/DATABASE_NAME=initdb/g' gpconfigs/gpinitsystem_config

//-s mpp02是初始化时将mpp02作为standby节点,-S是用spread mirror模型
gpinitsystem -c gpconfigs/gpinitsystem_config -h /tmp/hostfile_segments -a -s mpp02 -S
//如果系统安装失败,用此命令停止gp进程, 清理各节点数据目录和备份文件
//gpdeletesystem

echo "export MASTER_DATA_DIRECTORY=/data/master/gpseg-1" >> ~/.bashrc
source ~/.bashrc
//检查master/standby/segments系统状态,standby节点为passive状态,3节点spread mirror模型下预期每节点2个primary/mirror(集群服务器数量-1),共(2+2)*3=12个segments
gpstate -d /data/master/gpseg-1

#6. 登陆启停看状态
psql -h mpp01 -U gpadmin -d postgres
gpstart -a
gpstop -a
gpstate

************************测试************************
测试工具下载地址:https://github.com/pivotalguru/TPC-DS
//tpcds.sh是入口
cd /opt/gpdeploy/TPC-DS
//在TPC-DS/tpcds.sh中注释掉check_user, yum_installs repo_init, script_check, echo_variables函数,保留check_variables函数
//在TPC-DS/functions.sh中去掉
yum insall gcc git bc -y
cd ../
mkdir /pivitalguru
cp -r TPC-DS /pivitalguru
chown -R gpadmin:gpadmin /pivitalguru/TPC-DS

//官方测试脚本修改:
TPC-DS/functions.sh文件中将
quicklz_test=$(psql -v ON_ERROR_STOP=1 -t -A -c "SELECT COUNT(*) FROM pg_compression WHERE compname = 'quicklz'")
修改为
quicklz_test=0

//设置当前默认数据库,这样psql命令可以不加-d xxxdbname
echo "export PGDATABASE=initdb" >> ~/.bashrc
echo "source $GPHOME/greenplum_path.sh" >> ~/.bashrc

echo "GEN_DATA_SCALE=100" >> tpcds_variables.sh
echo "INSTALL_DIR=/pivotalguru" >> tpcds_variables.sh

cp -r 
//tpcds.sh根据tpcds_variables.sh已有设置补全全部默认设置
./tpcds.sh
//再次执行,tpcds.sh根据全部设置开始正式执行脚本
./tpcds.sh


//测试参数
数据量:
GEN_DATA_SCALE=100

单用户测试相关:
RUN_SQL=true  //是否测试单用户
SINGLE_USER_ITERATIONS=3 //单用户的测试是否测试多次然后取平均值
RUN_SINGLE_USER_REPORT=true //单用户测试完是否输出报告

多用户测试相关:
RUN_MULTI_USER=true //是否测试多用户
MULTI_USER_COUNT=10  //多用户数量设置为n
RUN_MULTI_USER_REPORT=true //是否输出多用户测试报告

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值