[omm@gdb1 gaussdb]$ cat *xml
<?xml version="1.0" encoding="UTF-8"?><ROOT>
<CLUSTER>
<PARAM name="clusterName" value="gaussdb"/>
<PARAM name="nodeNames" value="gdb1,gdb2"/>
<PARAM name="gaussdbAppPath" value="/gdb/gaussdb/app"/>
<PARAM name="gaussdbLogPath" value="/var/log/gaussdb"/>
<PARAM name="archiveLogPath" value="/gdb/gaussdb/data/arch_log"/>
<PARAM name="redoLogPath" value="/gdb/gaussdb/data/redo_log"/>
<PARAM name="tmpMppdbPath" value="/gdb/gaussdb/data/temp"/>
<PARAM name="gaussdbToolPath" value="/gdb/gaussdb/data/gaussTools"/>
<PARAM name="datanodeType" value="DN_ZENITH_HA"/>
<PARAM name="WhetherDoFailoverAuto" value="OFF"/>
<PARAM name="clusterType" value="mutil-AZ"/>
<PARAM name="Ha2Node" value="true"/>
<PARAM name="ServiceType" value="SingleService"/>
<PARAM name="CMAgentPingTryTime" value="3"/>
<PARAM name="CMAgentPingInterval" value="5"/>
<PARAM name="GatewayIP" value="192.168.1.2"/>
<PARAM name="SetDoubleIPForETCD" value="false"/>
</CLUSTER>
<DEVICELIST>
<DEVICE sn="1000001">
<PARAM name="name" value="gdb1"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.1.77"/>
<PARAM name="sshIp1" value="192.168.1.77"/>
<PARAM name="agentlsnPort" value=""/>
<PARAM name="tcplsnPort" value="1868"/>
<PARAM name="serverlsnPort" value="1873"/>
<PARAM name="cmsNum" value="1"/>
<PARAM name="cmServerListenIp1" value="192.168.1.77,192.168.1.78"/>
<PARAM name="cmServerHaIp1" value="192.168.1.77,192.168.1.78"/>
<PARAM name="cmServerlevel" value="1"/>
<PARAM name="cmServerRelation" value="gdb1,gdb2"/>
<PARAM name="cmDir" value="/gdb/gaussdb/data/data_cm"/>
<PARAM name="dataNum" value="1"/>
<PARAM name="dataPortBase" value="40000"/>
<PARAM name="floatIp1" value="192.168.1.79"/>
<PARAM name="dataNode1" value="/gdb/gaussdb/data_db/dn1,gdb2,/gdb/gaussdb/data_db/dn1"/>
<PARAM name="quorumAny1" value="1"/>
<PARAM name="etcdNum" value="2"/>
<PARAM name="etcdListenPort" value="2379"/>
<PARAM name="etcdListenIp1" value="192.168.1.77"/>
<PARAM name="etcdHaIp1" value="192.168.1.77"/>
<PARAM name="etcdDir1" value="/gdb/gaussdb/data_etcd1/data"/>
<PARAM name="etcdDir2" value="/gdb/gaussdb/data_etcd2/data"/>
</DEVICE>
<DEVICE sn="1000002">
<PARAM name="name" value="gdb2"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.1.78"/>
<PARAM name="sshIp1" value="192.168.1.78"/>
<PARAM name="agentlsnPort" value=""/>
<PARAM name="tcplsnPort" value="1868"/>
<PARAM name="serverlsnPort" value="1873"/>
<PARAM name="etcdNum" value="1"/>
<PARAM name="etcdListenPort" value="2379"/>
<PARAM name="etcdListenIp1" value="192.168.1.78"/>
<PARAM name="etcdHaIp1" value="192.168.1.78"/>
<PARAM name="etcdDir1" value="/gdb/gaussdb/data_etcd1/data"/>
</DEVICE>
</DEVICELIST>
</ROOT>
[root@gdb1 gaussdb]# cd sc*
[root@gdb1 script]# ./gs_preinstall -U omm -G dbgrp -X /gdb/software/gaussdb/clusterconfig.xml
Parsing the configuration file.
Successfully parsed the configuration file.
Installing the tools on the local node.
Successfully installed the tools on the local node.
Are you sure you want to create trust for root (yes/no)? yes
Please enter password for root.
Password:
Creating SSH trust for the root permission user.
Successfully created SSH trust for the root permission user.
All host RAM is consistent
Distributing package.
Successfully distributed package.
Are you sure you want to create the user[omm] and create trust for it (yes/no)? yes
Installing the tools in the cluster.
Successfully installed the tools in the cluster.
Creating SSH trust for [omm] user.
Please enter password for omm.
Password:
Start creating SSH trust for [omm] user.
Successfully created SSH trust for [omm] user.
Checking system resource.
Successfully check system resource.
Checking hostname mapping.
Successfully checked hostname mapping.
Checking OS version.
Successfully checked OS version.
Creating cluster's path.
Successfully created cluster's path.
Config float IP services.
Successfully config float IP services.
Set and check OS parameter.
Setting OS parameters.
Successfully set OS parameters.
Warning: Installation environment contains some warning messages.
Please get more details by "/gdb/software/gaussdb/script/gs_checkos -i A -h gdb1,gdb2 -X /gdb/software/gaussdb/clusterconfig.xml".
Set and check OS parameter completed.
Preparing CRON service.
Successfully prepared CRON service.
Preparing SSH service.
Successfully prepared SSH service.
Setting user environmental variables.
Successfully set user environmental variables.
Configuring alarms on the cluster nodes.
Successfully configured alarms on the cluster nodes.
Setting the dynamic link library.
Successfully set the dynamic link library.
Fixing server package owner.
Successfully fixed server package owner.
Create ssh crontab.
Successfully create ssh crontab.
Create logrotate service.
Successfully create logrotate service.
Setting finish flag.
Successfully set finish flag.
check time consistency.
Warning: The ntpd service is abnormal. Please get more details by "/gdb/software/gaussdb/script/gs_checkos -i A12 -h gdb1,gdb2 -X /gdb/software/gaussdb/clusterconfig.xml --detail-all".
Clean SSH trust for the root permission user.
Successfully cleaned SSH trust for the root permission user.
Preinstallation succeeded.
[root@gdb1 script]# /gdb/software/gaussdb/script/gs_checkos -i A12 -h gdb1,gdb2 -X /gdb/software/gaussdb/clusterconfig.xml --detail-all
Root permission user has not SSH trust, create it when do checkos in remote node.
Creating SSH trust for the root permission user.
Please enter password for root.
Password:
Successfully creating SSH trust for the root permission user.
Checking items
A12.[ Time consistency status ] : Warning
[gdb1]
Variable:'current system time'RealValue:'2022-11-15 00:43:23"' ExpectedValue:'2022-11-15 00:43:23"'. The NTP is not synchronized, and is not set during startup. [Warning]
[gdb2]
Variable:'current system time'RealValue:'2022-11-15 00:43:23"' ExpectedValue:'2022-11-15 00:43:23"'. The NTP is not synchronized, and is not set during startup. [Warning]
Total numbers:1. Abnormal numbers:0. Warning numbers:1.
Clean SSH trust for the root permission user.
Successfully clean SSH trust for the root permission user.
[root@gdb1 script]# service ntpd start
Redirecting to /bin/systemctl start ntpd.service
[root@gdb1 script]#
[root@gdb1 script]# service ntpd start
Redirecting to /bin/systemctl start ntpd.service
[root@gdb1 script]#
[root@gdb1 script]# service ntpd start
Redirecting to /bin/systemctl start ntpd.service
[root@gdb1 script]#
[root@gdb1 script]# ntpstat
unsynchronised
time server re-starting
polling server every 8 s
[root@gdb1 script]# su - omm
Last login: Tue Nov 15 00:41:14 CST 2022
[omm@gdb1 ~]$ ls
gaussdb_tmp
[omm@gdb1 ~]$ cd /gdb
[omm@gdb1 gdb]$ ls
gaussdb software
[omm@gdb1 gdb]$ cd s*
[omm@gdb1 software]$ ls
gaussdb
[omm@gdb1 software]$ cd g*db
[omm@gdb1 gaussdb]$ ls
clusterconfig.xml GaussDB_T_1.2.1-CLIENT-JDBC.tar GaussDB_T_1.2.1-CM-CENTOS-64bit.tar.gz GaussDB_T_1.2.1-TOOLS.tar python-3.9.2
GaussDB-install-Package-bak.tar GaussDB_T_1.2.1-CLIENT-ODBC-CENTOS-64bit.tar GaussDB_T_1.2.1-DATABASE-CENTOS-64bit.tar GaussDB_T_1.2.1-TOOLS.tar.gz python-3.9.2.tar
GaussDB-install-Package-bak.tar.gz GaussDB_T_1.2.1-CLIENT-PYTHON3-CENTOS-64bit.tar GaussDB_T_1.2.1-DATABASE-CENTOS-64bit.tar.gz GaussDB_T_1.2.1-ZSQL-CENTOS-64bit.tar script
GaussDB_T_1.2.1-CENTOS7.9-X86.tar GaussDB_T_1.2.1-CLIENT-PYTHON3-CENTOS-64bit.tar.gz GaussDB_T_1.2.1-ROACH-CENTOS-64bit.tar GaussDB_T-Upgrade-WhiteList.txt shardingscript
GaussDB_T_1.2.1-CLIENT-C-CENTOS-64bit.tar GaussDB_T_1.2.1-CLUSTER-CENTOS-64bit.tar GaussDB_T_1.2.1-ROACH-CENTOS-64bit.tar.gz jdk-8u302-linux-x64.tar template
GaussDB_T_1.2.1-CLIENT-GO.tar GaussDB_T_1.2.1-CM-CENTOS-64bit.tar GaussDB_T_1.2.1-TOOLS lib
[omm@gdb1 gaussdb]$ cd sc*
-bash: cd: script: Permission denied
[omm@gdb1 gaussdb]$ ls -ltr
total 986852
drwx------ 2 root root 90 Dec 4 2020 template
drwx------ 2 root root 90 Dec 4 2020 GaussDB_T_1.2.1-TOOLS
-rw------- 1 root root 175738 Dec 4 2020 GaussDB_T_1.2.1-ROACH-CENTOS-64bit.tar.gz
-rw------- 1 root root 10698233 Dec 4 2020 GaussDB_T_1.2.1-DATABASE-CENTOS-64bit.tar.gz
-rw------- 1 root root 33240499 Dec 4 2020 GaussDB_T_1.2.1-CM-CENTOS-64bit.tar.gz
drwxr-xr-x 6 root root 56 Jul 1 2021 python-3.9.2
-rwxr-xr-x 1 root root 79462400 Sep 19 11:28 GaussDB_T_1.2.1-CENTOS7.9-X86.tar
-rw------- 1 root root 16690402 Sep 19 11:28 GaussDB_T_1.2.1-TOOLS.tar.gz
-rwxr-xr-x 1 root root 16711680 Sep 19 11:28 GaussDB_T_1.2.1-TOOLS.tar
-rwxr-xr-x 1 root root 184320 Sep 19 11:28 GaussDB_T_1.2.1-ROACH-CENTOS-64bit.tar
-rwxr-xr-x 1 root root 11223040 Sep 19 11:28 GaussDB_T_1.2.1-DATABASE-CENTOS-64bit.tar
-rwxr-xr-x 1 root root 33730560 Sep 19 11:28 GaussDB_T_1.2.1-CM-CENTOS-64bit.tar
-rwxr-xr-x 1 root root 55572480 Sep 19 11:28 GaussDB_T_1.2.1-CLUSTER-CENTOS-64bit.tar
-rw------- 1 root root 2128436 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-PYTHON3-CENTOS-64bit.tar.gz
-rwxr-xr-x 1 root root 5017600 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-PYTHON3-CENTOS-64bit.tar
-rwxr-xr-x 1 root root 4802560 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-ODBC-CENTOS-64bit.tar
-rwxr-xr-x 1 root root 522240 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-JDBC.tar
-rwxr-xr-x 1 root root 10168320 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-GO.tar
-rwxr-xr-x 1 root root 4956160 Sep 19 11:28 GaussDB_T_1.2.1-CLIENT-C-CENTOS-64bit.tar
-rw------- 1 root root 533 Sep 19 11:28 GaussDB_T-Upgrade-WhiteList.txt
-rwxr-xr-x 1 root root 13588480 Sep 19 11:28 GaussDB_T_1.2.1-ZSQL-CENTOS-64bit.tar
drwx------ 8 root root 191 Sep 19 11:28 lib
-rwxr-xr-x 1 root root 315279360 Sep 19 11:28 jdk-8u302-linux-x64.tar
drwx------ 4 root root 35 Sep 19 11:28 shardingscript
-rwxr-xr-x 1 root root 255815680 Sep 19 11:28 python-3.9.2.tar
drwx------ 6 root root 4096 Sep 19 16:18 script
-rwxr-xr-x 1 root root 74782720 Sep 19 16:31 GaussDB-install-Package-bak.tar
-rwxr-x--- 1 root dbgrp 2932 Nov 15 00:36 clusterconfig.xml
-rw------- 1 root root 65749349 Nov 15 00:38 GaussDB-install-Package-bak.tar.gz
[omm@gdb1 gaussdb]$ cd sc*
[omm@gdb1 script]$ ls
config gs_check gs_checkos gs_collector gs_gucZenith gs_om gs_preinstall gs_replace gs_sshexkey gs_upgradectl hostname.txt __init__.py
gs_backup gs_checkclusterinfo gs_checkperf gs_expand gs_install gs_postuninstall gspylib gs_shrink gs_uninstall gs_wsr impl local
[omm@gdb1 script]$ ./gs_install -X /gdb/software/gaussdb/clusterconfig.xml
Parsing the configuration file.
Check preinstall on every node.
Successfully checked preinstall on every node.
Creating the backup directory.
Successfully created the backup directory.
Check the time difference between hosts in the cluster.
Please enter database password of user [SYS]:
Password:
Please enter database password of user [SYS] again:
Password:
Please enter password for dbuser.
Password:
Please enter password for dbuser again.
Password:
Installing the cluster.
Check install cluster condition.
Successfully check install cluster condition.
Installing applications on all nodes.
Successfully installed APP.
Please enter password for etcd.
Password:
The password must meet the following requirements:
1. The password does not contain invalid characters.
2. The password contains 16-64 characters
3. The password should contain 3 speciesof uppercase or lowercase letters, or numbers, or special characters.
Please enter password for etcd.
Password:
Please enter password for etcd again.
Password:
Initializing cluster instances
Initializing ETCD instance.
Distribute etcd communication keys.
Successfully distribute etcd communication keys.
Check the status of ETCD cluster.
Successfully initialize ETCD instance.
Generate ssl certificate.
Successfully generate ssl certificate.
Initializing database instance.
.61s
Initializing cluster instances is completed. .Configuring standby datanode.
...................79s
Successfully configure datanode. .Stop database instance.
........8s
Successfully stop database instance.
Configuring.
Configuring the cluster. .Successfully configuring the cluster.
Configuration is completed.
Load cluster configuration file.
Start cm agent.
Successfully start cm agent and ETCD in cluster.
Warning: Auto failover switch closed.
Starting the cluster.
==============================================
.............13s
Successfully starting the cluster.
============================================== .[o
[omm@gdb1 script]$ gs_om -t status
--------------------------------------------------------------------Cluster Status--------------------------------------------------------------------
az_state : single_az
cluster_state : Normal
balanced : true
----------------------------------------------------------------------AZ Status-----------------------------------------------------------------------
AZ:AZ1 Priority:1 STATUS:ONLINE REGION:local
---------------------------------------------------------------------Host Status----------------------------------------------------------------------
HOST:gdb1 AZ:AZ1 STATUS:ONLINE IP:192.168.1.77
HOST:gdb2 AZ:AZ1 STATUS:ONLINE IP:192.168.1.78
----------------------------------------------------------------Cluster Manager Status----------------------------------------------------------------
INSTANCE:CM1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:601
INSTANCE:CM2 ROLE:slave STATUS:ONLINE HOST:gdb2 ID:602
---------------------------------------------------------------------ETCD Status----------------------------------------------------------------------
INSTANCE:ETCD1 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:701 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
INSTANCE:ETCD2 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:702 PORT:2381 DataDir:/gdb/gaussdb/data_etcd2/data
INSTANCE:ETCD3 ROLE:leader STATUS:ONLINE HOST:gdb2 ID:703 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
---------------------------------------------------------Instances Status in Group (group_1)----------------------------------------------------------
INSTANCE:DB1_1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:1 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
INSTANCE:DB1_2 ROLE:standby STATUS:ONLINE HOST:gdb2 ID:2 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
-----------------------------------------------------------------------Manage IP----------------------------------------------------------------------
HOST:gdb1 IP:192.168.1.77
HOST:gdb2 IP:192.168.1.78
-------------------------------------------------------------------Query Action Info------------------------------------------------------------------
HOSTNAME: gdb1 TIME: 2022-11-15 00:52:30.178685
------------------------------------------------------------------------Float Ip------------------------------------------------------------------
HOST:gdb1 DB1_1:192.168.1.77 FloatIP:192.168.1.79 ManageFloatIP:
[omm@gdb1 script]$ gs_om -t status
--------------------------------------------------------------------Cluster Status--------------------------------------------------------------------
az_state : single_az
cluster_state : Normal
balanced : true
----------------------------------------------------------------------AZ Status-----------------------------------------------------------------------
AZ:AZ1 Priority:1 STATUS:ONLINE REGION:local
---------------------------------------------------------------------Host Status----------------------------------------------------------------------
HOST:gdb1 AZ:AZ1 STATUS:ONLINE IP:192.168.1.77
HOST:gdb2 AZ:AZ1 STATUS:ONLINE IP:192.168.1.78
----------------------------------------------------------------Cluster Manager Status----------------------------------------------------------------
INSTANCE:CM1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:601
INSTANCE:CM2 ROLE:slave STATUS:ONLINE HOST:gdb2 ID:602
---------------------------------------------------------------------ETCD Status----------------------------------------------------------------------
INSTANCE:ETCD1 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:701 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
INSTANCE:ETCD2 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:702 PORT:2381 DataDir:/gdb/gaussdb/data_etcd2/data
INSTANCE:ETCD3 ROLE:leader STATUS:ONLINE HOST:gdb2 ID:703 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
---------------------------------------------------------Instances Status in Group (group_1)----------------------------------------------------------
INSTANCE:DB1_1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:1 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
INSTANCE:DB1_2 ROLE:standby STATUS:ONLINE HOST:gdb2 ID:2 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
-----------------------------------------------------------------------Manage IP----------------------------------------------------------------------
HOST:gdb1 IP:192.168.1.77
HOST:gdb2 IP:192.168.1.78
-------------------------------------------------------------------Query Action Info------------------------------------------------------------------
HOSTNAME: gdb1 TIME: 2022-11-15 00:53:42.527772
------------------------------------------------------------------------Float Ip------------------------------------------------------------------
HOST:gdb1 DB1_1:192.168.1.77 FloatIP:192.168.1.79 ManageFloatIP:
[omm@gdb1 script]$
[omm@gdb1 script]$ gs_om -t stop -h gdb2
Stopping node
=========================================
2022-11-15 00:53:59+08:00 [info] stop [user:root][host:(192.168.1.1)].
2022-11-15 00:53:59+08:00 [info] stop instance () on host (gdb2).
2022-11-15 00:54:00+08:00 [info] stop (AZ1/gdb2/CM2) successfully.
2022-11-15 00:54:07+08:00 [info] stop (AZ1/gdb2/DB1_2) successfully.
2022-11-15 00:54:07+08:00 [info] successfully to stop host(gdb2).
Successfully stopped node.
=========================================
End stop node.
[omm@gdb1 script]$ gs_om -t status
--------------------------------------------------------------------Cluster Status--------------------------------------------------------------------
az_state : single_az
cluster_state : Degraded
balanced : true
----------------------------------------------------------------------AZ Status-----------------------------------------------------------------------
AZ:AZ1 Priority:1 STATUS:ONLINE REGION:local
---------------------------------------------------------------------Host Status----------------------------------------------------------------------
HOST:gdb1 AZ:AZ1 STATUS:ONLINE IP:192.168.1.77
HOST:gdb2 AZ:AZ1 STATUS:ONLINE IP:192.168.1.78
----------------------------------------------------------------Cluster Manager Status----------------------------------------------------------------
INSTANCE:CM1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:601
INSTANCE:CM2 ROLE:slave STATUS:STOPPED HOST:gdb2 ID:602
---------------------------------------------------------------------ETCD Status----------------------------------------------------------------------
INSTANCE:ETCD1 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:701 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
INSTANCE:ETCD2 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:702 PORT:2381 DataDir:/gdb/gaussdb/data_etcd2/data
INSTANCE:ETCD3 ROLE:leader STATUS:ONLINE HOST:gdb2 ID:703 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
---------------------------------------------------------Instances Status in Group (group_1)----------------------------------------------------------
INSTANCE:DB1_1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:1 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
INSTANCE:DB1_2 ROLE:standby STATUS:STOPPED HOST:gdb2 ID:2 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
-----------------------------------------------------------------------Manage IP----------------------------------------------------------------------
HOST:gdb1 IP:192.168.1.77
HOST:gdb2 IP:192.168.1.78
-------------------------------------------------------------------Query Action Info------------------------------------------------------------------
HOSTNAME: gdb1 TIME: 2022-11-15 00:54:12.646021
------------------------------------------------------------------------Float Ip------------------------------------------------------------------
HOST:gdb1 DB1_1:192.168.1.77 FloatIP:192.168.1.79 ManageFloatIP:
[omm@gdb1 script]$ gs_om -t start -h gdb2
Starting node
=========================================
2022-11-15 00:54:35+08:00 [info] start [user:root][host:(192.168.1.1)].
2022-11-15 00:54:35+08:00 [info] start Instance () on host (gdb2).
2022-11-15 00:54:44+08:00 [info] start (AZ1/gdb2/DB1_2) as role(standby) successfully.
2022-11-15 00:54:44+08:00 [info] start (AZ1/gdb2/CM2) successfully.
2022-11-15 00:54:44+08:00 [warning] start instance(ETCD3) but instance status is ONLINE.
2022-11-15 00:54:44+08:00 [info] start host(gdb2) successfully.
Successfully started node.
=========================================
End started node.
[omm@gdb1 script]$ gs_om -t status
--------------------------------------------------------------------Cluster Status--------------------------------------------------------------------
az_state : single_az
cluster_state : Normal
balanced : true
----------------------------------------------------------------------AZ Status-----------------------------------------------------------------------
AZ:AZ1 Priority:1 STATUS:ONLINE REGION:local
---------------------------------------------------------------------Host Status----------------------------------------------------------------------
HOST:gdb1 AZ:AZ1 STATUS:ONLINE IP:192.168.1.77
HOST:gdb2 AZ:AZ1 STATUS:ONLINE IP:192.168.1.78
----------------------------------------------------------------Cluster Manager Status----------------------------------------------------------------
INSTANCE:CM1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:601
INSTANCE:CM2 ROLE:slave STATUS:ONLINE HOST:gdb2 ID:602
---------------------------------------------------------------------ETCD Status----------------------------------------------------------------------
INSTANCE:ETCD1 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:701 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
INSTANCE:ETCD2 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:702 PORT:2381 DataDir:/gdb/gaussdb/data_etcd2/data
INSTANCE:ETCD3 ROLE:leader STATUS:ONLINE HOST:gdb2 ID:703 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
---------------------------------------------------------Instances Status in Group (group_1)----------------------------------------------------------
INSTANCE:DB1_1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:1 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
INSTANCE:DB1_2 ROLE:standby STATUS:ONLINE HOST:gdb2 ID:2 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
-----------------------------------------------------------------------Manage IP----------------------------------------------------------------------
HOST:gdb1 IP:192.168.1.77
HOST:gdb2 IP:192.168.1.78
-------------------------------------------------------------------Query Action Info------------------------------------------------------------------
HOSTNAME: gdb1 TIME: 2022-11-15 00:55:31.257398
------------------------------------------------------------------------Float Ip------------------------------------------------------------------
HOST:gdb1 DB1_1:192.168.1.77 FloatIP:192.168.1.79 ManageFloatIP:
[omm@gdb1 script]$ gs_om -t stop
Stopping cluster
=========================================
2022-11-15 00:56:11+08:00 [info] stop [user:root][host:(192.168.1.1)].
2022-11-15 00:56:11+08:00 [info] prepare to stop all instances on (2) hosts.
2022-11-15 00:56:11+08:00 [info] stop (AZ1/gdb2/CM2) successfully.
2022-11-15 00:56:11+08:00 [info] stop (AZ1/gdb1/CM1) successfully.
2022-11-15 00:56:16+08:00 [info] stop (AZ1/gdb2/DB1_2) successfully.
2022-11-15 00:56:23+08:00 [info] stop (AZ1/gdb1/DB1_1) successfully.
2022-11-15 00:56:23+08:00 [info] all cm is not online, set az state cm_stopped.
Successfully stopped cluster.
=========================================
End stop cluster.
[omm@gdb1 script]$ gs_om -t stopetcd
Stop etcd is a high-risk operation. Are you sure you want to continue (yes/no)?yes
Checking the cluster condition for stopping etcd.
Check cluster is not running for stop etcd.
Successfully check Cluster status for stop etcd.
Successfully checked the cluster condition for stopping etcd.
Stopping etcd in the cluster.
Clean old cm and etcd for cluster.
Successfully clean old cm and etcd for cluster.
Successfully stopped etcd in the cluster.
[omm@gdb1 script]$ gs_om -t startetcd
Restart etcd in the cluster.
Clean old cm and etcd for cluster.
Successfully clean old cm and etcd for cluster.
Restart etcd for cluster.
Successfully restart etcd for cluster.
Check etcd cluster status.
Restart cm agent for cluster.
Successfully restart cm agent for cluster.
Successfully restart etcd in the cluster.
[omm@gdb1 script]$ gs_om -t start
Starting cluster
=========================================
2022-11-15 00:57:15+08:00 [info] start [user:root][host:(192.168.1.1)].
2022-11-15 00:57:15+08:00 [info] start database role (primary) on every group in parallel.
2022-11-15 00:57:19+08:00 [info] start (AZ1/gdb1/DB1_1) as role(primary) successfully.
2022-11-15 00:57:19+08:00 [info] start database role (standby) on every group in parallel.
2022-11-15 00:57:28+08:00 [info] start (AZ1/gdb2/DB1_2) as role(standby) successfully.
2022-11-15 00:57:28+08:00 [info] start instance type (cluster_manager) on every host in parallel.
2022-11-15 00:57:28+08:00 [info] start (AZ1/gdb2/CM2) successfully.
2022-11-15 00:57:28+08:00 [info] start (AZ1/gdb1/CM1) successfully.
cluster_state : Normal
Successfully started cluster.
=========================================
End started cluster.
[omm@gdb1 script]$ gs_om -t status
--------------------------------------------------------------------Cluster Status--------------------------------------------------------------------
az_state : single_az
cluster_state : Normal
balanced : true
----------------------------------------------------------------------AZ Status-----------------------------------------------------------------------
AZ:AZ1 Priority:1 STATUS:ONLINE REGION:local
---------------------------------------------------------------------Host Status----------------------------------------------------------------------
HOST:gdb1 AZ:AZ1 STATUS:ONLINE IP:192.168.1.77
HOST:gdb2 AZ:AZ1 STATUS:ONLINE IP:192.168.1.78
----------------------------------------------------------------Cluster Manager Status----------------------------------------------------------------
INSTANCE:CM1 ROLE:slave STATUS:ONLINE HOST:gdb1 ID:601
INSTANCE:CM2 ROLE:primary STATUS:ONLINE HOST:gdb2 ID:602
---------------------------------------------------------------------ETCD Status----------------------------------------------------------------------
INSTANCE:ETCD1 ROLE:leader STATUS:ONLINE HOST:gdb1 ID:701 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
INSTANCE:ETCD2 ROLE:follower STATUS:ONLINE HOST:gdb1 ID:702 PORT:2381 DataDir:/gdb/gaussdb/data_etcd2/data
INSTANCE:ETCD3 ROLE:follower STATUS:ONLINE HOST:gdb2 ID:703 PORT:2379 DataDir:/gdb/gaussdb/data_etcd1/data
---------------------------------------------------------Instances Status in Group (group_1)----------------------------------------------------------
INSTANCE:DB1_1 ROLE:primary STATUS:ONLINE HOST:gdb1 ID:1 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
INSTANCE:DB1_2 ROLE:standby STATUS:ONLINE HOST:gdb2 ID:2 PORT:40000 DataDir:/gdb/gaussdb/data_db/dn1
-----------------------------------------------------------------------Manage IP----------------------------------------------------------------------
HOST:gdb1 IP:192.168.1.77
HOST:gdb2 IP:192.168.1.78
-------------------------------------------------------------------Query Action Info------------------------------------------------------------------
HOSTNAME: gdb1 TIME: 2022-11-15 00:57:40.146466
------------------------------------------------------------------------Float Ip------------------------------------------------------------------
HOST:gdb1 DB1_1:192.168.1.77 FloatIP:192.168.1.79 ManageFloatIP:
[omm@gdb1 script]$
GaussDB T分布式集群这样安装部署不踩坑
大伙们,重头戏来了,我们一起来列队整齐划一,一步、两步……
一、开启root用户远程登录权限并关闭selinux
1、编辑sshd_config文件
vi etc/ssh/sshd_config
2、修改PermitRootLogin配置,允许用户远程登录
可以使用以下两种方式实现:
1)注释掉"PermitRootLogin no"
#PermitRootLogin no
2)将PermitRootLogin改为yes
PermitRootLogin yes
3、修改Banner配置,去掉连接到系统时,系统提示的欢迎信息
注释掉"Banner"所在的行:
#Banner none
4、修改PasswordAuthentication配置,允许用户登录时进行密码鉴权,退出保存
将PasswordAuthentication改为yes:
PasswordAuthentication yes
5、重启sshd服务,并使用root用户身份重新登录
#service sshd restart
如果执行命令后返回提示信息Redirecting to bin/systemctl restart sshd.service,则执行如下命令:
#/bin/systemctl restart sshd.service
6、关闭selinux
#vi etc/selinux/config
SELINUX=disabled
二、关闭系统防火墙并disable
# systemctl stop firewalld.service
# systemctl disable firewalld.service
三、安装系统包
本次使用ISO介质配置yum源,用于数据库安装依赖包的安装。
在/etc/rc.local文件末尾写入一行:
mount dev/cdrom mnt
保证每次系统启动的时候都能把光盘里面的内容挂载到/mnt目录中。
1、配置yum源
将原先的yum源备份,新建一个yum源:
cd etc/yum.repos.d
mkdir bak
mv redhat* ./bak
vi iso.repo
[root@gaussdb11 yum.repos.d]# cat iso.repo
[rhel-iso]
name=Red Hat Enterprise Linux - Source
baseurl=file:///mnt
enabled=1
gpgcheck=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release
2、查看package
#yum list
yum install -y zlib readline gcc
yum install -y python python-devel
yum install perl-ExtUtils-Embed
yum install -y readline-devel
yum install -y zlib-devel
yum install -y lsof
3、验证包是否安装
rpm -qa --queryformat "%{NAME}-%{VERSION}-%{RELEASE} (%{ARCH}) " | grep -E "zlib|readline|gcc
|python|python-devel|perl-ExtUtils-Embed|readline-devel|zlib-devel"
四、准备及安装
1、创建存放安装包的目录并解压安装包(任一主机操作)
su - root
mkdir -p opt/software/gaussdb
cd opt/software/gaussdb
tar -zxvf GaussDB_100_1.0.0-CLUSTER-REDHAT7.5-64bit.tar.gz
vi clusterconfig.xml --创建集群配置文件
内容如下:
<?xml version="1.0" encoding="utf-8"?>
<ROOT>
<CLUSTER>
<PARAM name="clusterName" value="gaussdbt_cluster"/>
<PARAM name="nodeNames" value="gaussdb11,gaussdb12,gaussdb13,gaussdb14"/>
<PARAM name="gaussdbAppPath" value="/opt/gaussdb/app"/>
<PARAM name="gaussdbLogPath" value="/opt/gaussdb/log"/>
<PARAM name="tmpMppdbPath" value="/opt/gaussdb/tmp/gaussdb_mppdb"/>
<PARAM name="gaussdbToolPath" value="/opt/gaussdb/huawei/wisequery"/>
<PARAM name="archiveLogPath" value="/opt/gaussdb/arch_log"/>
<PARAM name="redoLogPath" value="/opt/gaussdb/redo_log"/>
<PARAM name="datanodeType" value="DN_ZENITH_ZPAXOS"/>
<PARAM name="coordinatorType" value="CN_ZENITH_ZSHARDING"/>
<PARAM name="clusterType" value="mutil-AZ"/>
</CLUSTER>
<DEVICELIST>
<DEVICE sn="1000001">
<PARAM name="name" value="gaussdb11"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.57.21"/>
<PARAM name="sshIp1" value="192.168.57.21"/>
<PARAM name="cooNum" value="1"/>
<PARAM name="cooPortBase" value="8000"/>
<PARAM name="cooListenIp1" value="192.168.57.21"/>
<PARAM name="cooDir1" value="/gaussdb/data/data_cn"/>
<PARAM name="gtsNum" value="1"/>
<PARAM name="gtsPortBase" value="13000"/>
<PARAM name="gtsDir1" value="/gaussdb/data/data_gts,gaussdb12,/gaussdb/data/data_gts"/>
<PARAM name="etcdNum" value="1"/>
<PARAM name="etcdListenPort" value="20300"/>
<PARAM name="etcdHaPort" value="20500"/>
<PARAM name="etcdListenIp1" value="192.168.57.21"/>
<PARAM name="etcdHaIp1" value="192.168.57.21"/>
<PARAM name="etcdDir1" value="/gaussdb/data/data_etcd"/>
</DEVICE>
<DEVICE sn="1000002">
<PARAM name="name" value="gaussdb12"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.57.22"/>
<PARAM name="sshIp1" value="192.168.57.22"/>
<PARAM name="cooNum" value="1"/>
<PARAM name="cooPortBase" value="8000"/>
<PARAM name="cooListenIp1" value="192.168.57.22"/>
<PARAM name="cooDir1" value="/gaussdb/data/data_cn"/>
<PARAM name="cmsNum" value="1"/>
<PARAM name="cmServerListenIp1" value="192.168.57.22,192.168.57.21"/>
<PARAM name="cmServerHaIp1" value="192.168.57.22,192.168.57.21"/>
<PARAM name="cmServerlevel" value="1"/>
<PARAM name="cmServerRelation" value="gaussdb12,gaussdb11"/>
<PARAM name="etcdNum" value="1"/>
<PARAM name="etcdListenPort" value="20300"/>
<PARAM name="etcdHaPort" value="20500"/>
<PARAM name="etcdListenIp1" value="192.168.57.22"/>
<PARAM name="etcdHaIp1" value="192.168.57.22"/>
<PARAM name="etcdDir1" value="/gaussdb/data/data_etcd"/>
</DEVICE>
<DEVICE sn="1000003">
<PARAM name="name" value="gaussdb13"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.57.23"/>
<PARAM name="sshIp1" value="192.168.57.23"/>
<PARAM name="etcdNum" value="1"/>
<PARAM name="etcdListenPort" value="20300"/>
<PARAM name="etcdHaPort" value="20500"/>
<PARAM name="etcdListenIp1" value="192.168.57.23"/>
<PARAM name="etcdHaIp1" value="192.168.57.23"/>
<PARAM name="etcdDir1" value="/gaussdb/data/data_etcd"/>
<PARAM name="dataNum" value="1"/>
<PARAM name="dataPortBase" value="40000"/>
<PARAM name="dataNode1" value="/gaussdb/data/data_dn,gaussdb11,/gaussdb/data/data_dn "/>
</DEVICE>
<DEVICE sn="1000004">
<PARAM name="name" value="gaussdb14"/>
<PARAM name="azName" value="AZ1"/>
<PARAM name="azPriority" value="1"/>
<PARAM name="backIp1" value="192.168.57.24"/>
<PARAM name="sshIp1" value="192.168.57.24"/>
<PARAM name="dataNum" value="1"/>
<PARAM name="dataPortBase" value="40000"/>
<PARAM name="dataNode1" value="/gaussdb/data/data_dn,gaussdb12,/gaussdb/data/data_dn"/>
</DEVICE>
</DEVICELIST>
</ROOT>
给目录赋权
chmod -R 755 opt/software
2、确认集群各节点root密码一致,因脚本互信配置需密码一致。如果不能修改密码,请提前手工完成root用户的互信配置
3、使用gs_preinstall准备好安装环境
su - root
cd opt/software/gaussdb/script
--预安装配置环境
./gs_preinstall -U omm -G dbgrp -X opt/software/gaussdb/clusterconfig.xml
示例:
4、查看预安装日志发现有安装环境时钟同步不一致警告,需要进行NTP设置
5、配置NTP,节点1作为NTP服务器,其他节点同步节点1
1)安装ntp
yum -y install ntp
2)节点1/etc/ntp.conf新增如下内容
server 127.0.0.1
fudge 127.0.0.1 stratum 10
restrict 192.168.57.21 nomodify notrap nopeer noquery <<====当前节点IP地址
restrict 192.168.57.255 mask 255.255.255.0 nomodify notrap <<====集群所在网段的网关(Gateway),子网掩码(Genmask)
3)其他节点/etc/ntp.conf新增如下内容
节点2:
server 192.168.57.21 <<====同步NTP服务器的IP
Fudge 192.168.57.21 stratum 10 <<====同步NTP服务器的IP
restrict 192.168.57.22 nomodify notrap nopeer noquery
restrict 192.168.57.255 mask 255.255.255.0 nomodify notrap
节点3:
server 192.168.57.21
Fudge 192.168.57.21 stratum 10
restrict 192.168.57.23 nomodify notrap nopeer noquery
restrict 192.168.57.255 mask 255.255.255.0 nomodify notrap
节点4:
server 192.168.57.21
Fudge 192.168.57.21 stratum 10
restrict 192.168.57.24 nomodify notrap nopeer noquery
restrict 192.168.57.255 mask 255.255.255.0 nomodify notrap
4)启动ntp服务
service ntpd start
5)查看ntp服务器有无和上层ntp连通
ntpstat
6)查看ntp服务器与上层ntp的状态
ntpq -p
7)设置ntp服务开机启动
systemctl enable ntpd
6、使用gs_checkos检查环境是否符合安装
7、开始安装数据库
su - omm
cd opt/software/gaussdb/script
./gs_install -X opt/software/gaussdb/clusterconfig.xml
附:
使用gs_uninstall卸载数据库集群:
gs_uninstall --delete-data
或者在集群中每个节点执行本地卸载:
gs_uninstall --delete-data -L
当集群状态不正常,获取不到集群信息时执行如下命令卸载集群:
gs_uninstall --delete-data -X
/opt/software/gaussdb/clusterconfig.xml
或者在集群中每个节点执行本地卸载:
gs_uninstall --delete-data -L -X
/opt/software/gaussdb/clusterconfig.xml
8、检查集群安装成功
注:由于本机内存不够,故将四台虚拟机改为三台虚拟机,并将paxos组网方式改成了ha组网。
附:
1)查看集群状态
gs_om -t status
2)停掉某个主机的所有实例
gs_om -t stop -h gaussdb13
3)启动某个主机的所有实例
gs_om -t start -h gaussdb13
4)DN主备切换,gaussdb13为备DN所在的主机名,DB2_3为要被切换的备DN名称
gs_om -t switch -h gaussdb13 -I DB2_3
5)CM主备切换, gaussdb12为当前备CM所在的主机名称, CM2为gaussdb12主机上的CM实例名称
gs_om -t switch -h gaussdb12 -I CM2
6)启停集群
gs_om -t start
gs_om -t stop
7)启停etcd
gs_om -t startetcd
gs_om -t stopetcd
五、高可用测试
本次测试以模拟节点3宕掉为背景进行。
1、查看主备DN状态,我们可以看到主DN分别为节点2上的DB1_1及节点3上的DB2_3
2、模拟节点3宕掉,停掉节点3上的所有实例
3、节点2上的备DN DB2_4变成主DN
4、启动节点3上的所有实例
5、发现主备库自动追平
6、将DB2_3备DN切成主DN
7、切换成功
六、安装问题大汇总
问题一:预安装报包类型跟CPU类型不一致
[root@gaussdb11 script]# ./gs_preinstall -U omm -G dbgrp -X opt/software/gaussdb/clusterconfig.xml
Parsing the configuration file.
Successfully parsed the configuration file.
Installing the tools>Successfully installed the tools>Are you sure you want to create trust for root (yes/no)? yes
Please enter password for root.
Password:
Creating SSH trust for the root permission user.
Checking network information.
All nodes in the network are Normal.
Successfully checked network information.
Creating SSH trust.
Creating the local key file.
Successfully created the local key files.
Appending local ID to authorized_keys.
Successfully appended local ID to authorized_keys.
Updating the known_hosts file.
Successfully updated the known_hosts file.
Appending authorized_key>Successfully appended authorized_key>Checking common authentication file content.
Successfully checked common authentication content.
Distributing SSH trust file to all node.
Successfully distributed SSH trust file to all node.
Verifying SSH trust>Successfully verified SSH trust>Successfully created SSH trust.
Successfully created SSH trust for the root permission user.
[GAUSS-52406] : The package type "" is inconsistent with the Cpu type "X86".
[root@gaussdb11 script]#
解决方法:
1)查看preinstall脚本运行日志。路径是clusterconfig.xml中参数gaussdbLogPath对应的路径,在该目录下om/gs_preinstall*.log的前置日志报错如下:
[2019-11-28 22:50:08.335532][gs_preinstall][LOG]:Successfully created SSH trust for the root permission user.
[2019-11-28 22:50:08.992537][gs_preinstall][ERROR]:[GAUSS-52406] : The package type "" is inconsistent with the Cpu type "X86".
Traceback (most recent call last)
File "./gs_preinstall", line 507, in <module>
File "/opt/software/gaussdb/script/impl/preinstall/PreinstallImpl.py", line 1861, in run
2)修改/opt/software/gaussdb/script/impl/preinstall/PreinstallImpl.py注释如下行
#self.getAllCpu()
问题二:预安装是报时钟同步告警
A12.[ Time consistency status ] : Warning
解决方法:配置NTP同步,配置方法见第四节步骤5。
问题三:安装数据库时报由于权限问题SYSDBA登录失败
[omm@gaussdb11 script]$ ./gs_install -X opt/software/gaussdb/clusterconfig.xml
Parsing the configuration file.
Check preinstall>Successfully checked preinstall>Creating the backup directory.
Successfully created the backup directory.
Check the time difference between hosts in the cluster.
Installing the cluster.
Installing applications>Successfully installed APP.
Distribute etcd communication keys.
Successfully distrbute etcd communication keys.
Initializing cluster instances
.............193s
[FAILURE] gaussdb11:
Using omm:dbgrp to install database.
Using installation program path : home/omm
Initialize GTS1 instance
[GAUSS-51607] : Failed to start zenith instance..Output:
ZS-00001: no privilege is found
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
[FAILURE] gaussdb12:
Using omm:dbgrp to install database.
Using installation program path : home/omm
Initialize GTS2 instance
Successfully Initialize GTS2 instance.
Initialize cn_402 instance
[GAUSS-51607] : Failed to start zenith instance..Output:
ZS-00001: no privilege is found
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
[FAILURE] gaussdb13:
Using omm:dbgrp to install database.
Using installation program path : home/omm
Initialize DB1_1 instance
[GAUSS-51607] : Failed to start zenith instance..Output:
ZS-00001: no privilege is found
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
[FAILURE] gaussdb14:
Using omm:dbgrp to install database.
Using installation program path : home/omm
Initialize DB2_3 instance
[GAUSS-51607] : Failed to start zenith instance..Output:
ZS-00001: no privilege is found
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
.[omm@gaussdb11 script]$
分析解决步骤:
1)查看install日志,路径:
cd opt/gaussdb/log/omm/om
[root@gaussdb11 om]# ls -lrt
total 52
-rw-------. 1 omm dbgrp 42006 Dec 1 21:43 gs_local-2019-12-01_213124.log
-rw-------. 1 omm dbgrp 5240 Dec 1 21:44 gs_install-2019-12-01_213118.log
[root@gaussdb11 om]# tail -25 gs_local-2019-12-01_213124.log
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
[2019-12-01 21:43:26.533606][Install][ERROR]:[GAUSS-51607] : Failed to start zenith instance..Output:
ZS-00001: no privilege is found
ZS-00001: "SYSDBA" login failed, login as sysdba is prohibited or privilege is incorrect
SQL>
ZS-00001: connection is not established
SQL>
Traceback (most recent call last)
File "/opt/software/gaussdb/script/local/Install.py", line 704, in <module>
File "/opt/software/gaussdb/script/local/Install.py", line 625, in initInstance
File "/opt/software/gaussdb/script/local/Install.py", line 614, in __tpInitInstance
File "/opt/software/gaussdb/script/local/../gspylib/component/Kernal/Zenith.py", line 308, in initialize
File "/opt/software/gaussdb/script/local/../gspylib/component/Kernal/CN_OLTP/Zsharding.py", line 62, in initDbInstance
File "/opt/software/gaussdb/script/local/../gspylib/component/Kernal/CN_OLTP/Zsharding.py", line 100, in initZenithInstance
File "/opt/software/gaussdb/script/local/../gspylib/component/Kernal/Zenith.py", line 406, in startInstance
2)查看/opt/gaussdb/log/omm/db_log/GTS1/run/zengine.rlog发现是内存不足导致。
UTC+8 2019-11-29 21:50:03.755|ZENGINE|00000|26307|INFO>[PARAM] LOG_HOME = opt/gaussdb/log/omm/db_log/GTS1
UTC+8 2019-11-29 21:50:03.755|ZENGINE|00000|206158456515|INFO>starting instance(nomount)
UTC+8 2019-11-29 21:50:03.755|ZENGINE|00000|26307|ERROR>GS-00001 : Failed to allocate 4592381952 bytes for sga [srv_sga.c:170]
UTC+8 2019-11-29 21:50:03.755|ZENGINE|00000|26307|ERROR>failed to create sga
UTC+8 2019-11-29 21:50:03.755|ZENGINE|00000|26307|ERROR>Instance Startup Failed
3)把所有虚拟机的内存加大即可
本次测试虚拟机内存配置如下,供参考:
-
Gaussdb11:3.9G
-
Gaussdb12:4.9G
-
Gaussdb13:4.9G
问题四:安装报GAUSS-50601
1)安装进度日志:
[omm@gaussdb11 script]$ ./gs_install -X /opt/software/gaussdb/clusterconfig.xml
Parsing the configuration file.
Check preinstall>Successfully checked preinstall>Creating the backup directory.
Successfully created the backup directory.
Check the time difference between hosts in the cluster.
Installing the cluster.
Installing applications>Successfully installed APP.
Distribute etcd communication keys.
Successfully distrbute etcd communication keys.
Initializing cluster instances
390s
[SUCCESS] gaussdb11:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize cn_401 instance
Successfully Initialize cn_401 instance.
Modifying user's environmental variable $GAUSS_ENV.
Successfully modified user's environmental variable $GAUSS_ENV.
[FAILURE] gaussdb12:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize DB1_1 instance
Successfully Initialize DB1_1 instance.
Initialize DB2_4 instance
[GAUSS-50601] : The port [40001] is occupied.
[SUCCESS] gaussdb13:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize DB1_2 instance
Successfully Initialize DB1_2 instance.
Initialize DB2_3 instance
Successfully Initialize DB2_3 instance.
Modifying user's environmental variable $GAUSS_ENV.
Successfully modified user's environmental variable $GAUSS_ENV.
2)查看安装日志发现端口被占用
[omm@gaussdb11 omm]$ tail -300 om/gs_install-2019-12-09_161757.log
[2019-12-09 16:18:15.998104][gs_install][LOG]:Initializing cluster instances
[2019-12-09 16:18:15.999396][gs_install][DEBUG]:Init instance by cmd: source /etc/profile; source /home/omm/.bashrc;python '/opt/software/gaussdb/script/local/Install.py' -t init_instance -U omm:dbgrp -X /opt/software/gaussdb/clusterconfig.xml -l /opt/gaussdb/log/omm/om/gs_local.log --autostart=yes --alarm=/opt/huawei/snas/bin/snas_cm_cmd
[2019-12-09 16:24:49.689716][gs_install][ERROR]:[SUCCESS] gaussdb11:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize cn_401 instance
Successfully Initialize cn_401 instance.
Modifying user's environmental variable $GAUSS_ENV.
Successfully modified user's environmental variable $GAUSS_ENV.
[FAILURE] gaussdb12:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize DB1_1 instance
Successfully Initialize DB1_1 instance.
Initialize DB2_4 instance
[GAUSS-50601] : The port [40001] is occupied.
[SUCCESS] gaussdb13:
Using omm:dbgrp to install database.
Using installation program path : /home/omm
Initialize DB1_2 instance
Successfully Initialize DB1_2 instance.
Initialize DB2_3 instance
Successfully Initialize DB2_3 instance.
Modifying user's environmental variable $GAUSS_ENV.
Successfully modified user's environmental variable $GAUSS_ENV.
Traceback (most recent call last)
File "./gs_install", line 281, in <module>
File "/opt/software/gaussdb/script/impl/install/InstallImpl.py", line 93, in run
File "/opt/software/gaussdb/script/impl/install/InstallImpl.py", line 193, in doDeploy
File "/opt/software/gaussdb/script/impl/install/InstallImpl.py", line 291, in doInstall
[root@gaussdb12 om]# netstat -na |grep 40001
tcp 0 0 192.168.57.22:40001 0.0.0.0:* LISTEN
tcp 0 0 127.0.0.1:40001 0.0.0.0:* LISTEN
3)卸载然后修改clusterconfig.xml文件,将节点3的DN端口改成50000继续,注意检查所有节点50000端口是否被占用。
su - omm
./gs_uninstall --delete-data -X /opt/software/gaussdb/clusterconfig.xml
vi clusterconfig.xml
<PARAM name="dataNum" value="1"/>
<PARAM name="dataPortBase" value="50000"/> <<=================端口从40001修改成50000
<PARAM name="dataNode1" value="/gaussdb/data/data_dn2,gaussdb12,/gaussdb/data/data_dn2"/>
问题五、安装过程中报节点1的sha256文件不存在,集群安装失败
解决方法:从其他节点把文件scp过来即可
su - omm
cd /opt/software/gaussdb
scp *.sha256 gaussdb11:/opt/software/gaussdb