搞了几天redis cluster codis 的部署安装,测试,架构优化,配合研发应用整合,这里记一些心得!
背景需求:
之前多个业务都在应用到redis库,各业务独立占用主从两台服务器,硬件资源利用不合理,主从架构冗余度不高,主redis故障的话,从redis恢复需要时间,降低业务的可用性, 所以调研测试部署了基于codis的redis集群。 官方地址
部署文档 参考这里http://www.cnblogs.com/shantu/p/4589798.html
维护管理
了解过codis的同志都知道codis集群组件服务启动有一定顺序的,而且基本了 添加组,初始化slot ,各节点启动proxy都依懒于dashboard, 所以要遵循以下基本步骤。
- start dashboard
- start master and slave codis redis
- add group
- init slot
- start proxy
下面根据自已的使用习惯写了一个codis集群的管理脚本
- 主要实现codis集群服务组件启动关闭、redis配置文件生成的功能,python学的实在太烂,之前写的那个更烂,这个做了些优化
- 使用说明:codis 本身默认配置为config.ini 放在当前home, 使用此脚本,也需要预配codis.ini配置文件,供脚本解析调用,
如下:
[codis_group]
group_1 = 192.168.5.14:6381 master,192.168.5.15:6381 slave
group_2 = 192.168.5.15:6382 master,192.168.5.14:6382 slave
group_3 = 192.168.5.16:6383 master,192.168.5.44:6383 slave
group_4 = 192.168.5.44:6384 master,192.168.5.16:6384 slave
- cat redis_manage.py
#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com
import os,sys,commands,time import ConfigParser from subprocess import Popen,PIPE from mako.template import Template codis_config = "config.ini" codis_root = "/data/setup/codis/data" codis_dbdir = "%s/db"%codis_root def pars_config(): codis_group_dict = {} cf = ConfigParser.ConfigParser() cf.read("codis.ini") codis_group = cf.items("codis_group") return codis_group def codis_dashboard(opt): if opt == "start": #os.chdir(codis_root) exec_cmd = "cd %s; nohup ../bin/codis-config -c %s -L ./log/dashboard.log dashboard --addr=:18087 --http-log=./log/http.log &>/dev/null &" %(codis_root,codis_config) s = os.system(exec_cmd) if s == 0: print "\033[32;1m codis dashboard start .... OK\033[0m" else: print "\033[31;1m codis dashboard start .... Error\033[0m" elif opt == "stop": dashboard_id = "ps aux |grep \"codis-config\"| grep \"dashboard\"|awk '{print $2}'" s,v = commands.getstatusoutput("kill -9 `%s`" %dashboard_id) if s == 0: print "\033[32;1m KILL codis dashboard id:[%s] OK\033[0m"%dashboard_id else: print "\033[31;1m KILL codis dashboard id:[%s] Error\033[0m"%dashboard_id else: pass def INIT_config_master(port,memsize,datadir,gtag): os.chdir(codis_root) template_file = Template(filename="./conf/redis.master.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir) conf = open('./conf/%s.conf'%gtag,'w') conf.write(template_file) conf.close() dirisnot = os.path.exists(datadir) redisconf = os.path.isfile("%s/conf/%s.conf"%(codis_root,gtag)) if not dirisnot and redisconf: print dirisnot,"create %s ...."%datadir os.makedirs(datadir) print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(codis_root,gtag))[1] return True else: print "%s is exists !!"%datadir return False def INIT_config_slave(port,memsize,datadir,slaveof,gtag): os.chdir(codis_root) template_file = Template(filename="./conf/redis.slave.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir,slaveof="%s %s"%(slaveof.split(":")[0],slaveof.split(":")[1])) conf = open('./conf/%s.conf'%gtag,'w') conf.write(template_file) conf.close() dirisnot = os.path.exists(datadir) redisconf = os.path.isfile("./conf/%s.conf"%(gtag)) if not dirisnot and redisconf: print dirisnot,"create %s ...."%datadir os.makedirs(datadir) print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(codis_root,gtag))[1] return True else: print "%s is exists !!"%datadir return False def codis_redis(opt,cf): print "###################33",cf, cname = cf[:-5] print cname, port = cf[-9:-5] print "#########",port, redis_id = "ps aux | grep codis-server| grep -v grep |grep %s|awk '{print $2}'"%port print redis_id if opt == "start": ids,idv = commands.getstatusoutput(redis_id) print ids,type(idv) if ids == 0 and idv != "": print "\033[32;1m%s redis process Already exists pid is:[%s]\033[0m"%(port,idv) sys.exit(1) else: exec_cmd = "cd %s; nohup ../bin/codis-server ./conf/%s.conf &> ./log/%s.log &" %(codis_root,cname,cname) print exec_cmd print os.system(exec_cmd) s = os.system(exec_cmd) if s == 0: print "\033[32;1m start redis %s server is OK \033[0m"%(port) else: print "\033[31;1m start redis %s server is Error\033[0m"%(port) elif opt == "stop": s,v = commands.getstatusoutput("kill -9 `%s`" %redis_id) if s == 0: print "\033[32;1m KILL redis port:[%s] pid:[%s] is OK\033[0m"%(port,redis_id) print v, else: print "\033[31;1m KILL redis port:[%s] pid:[%s] is Error\033[0m"%(port,redis_id) print v, else: pass def codis_group(): print "change codis.ini exists?" if os.path.isfile("codis.ini") and pars_config() != "": print "\033[32;1mparsing the codis.ini configuration file\033[0m" for i in sorted(pars_config()): #print i[1],'\n' for j in i[1].split(","): #print j,'\n', exec_cmd = "cd %s; ../bin/codis-config -c %s -L ./log/%s_addgroup.log server add %s %s %s" %(codis_root,codis_config,j.split()[0].split(":")[1],i[0].split("_")[1],j.split()[0],j.split()[1]) print exec_cmd,'\n', s,v = commands.getstatusoutput(exec_cmd) print s,v, if s == 0: print "\033[32;1m add group:[%s] with a gtag:[%s] addr:(%s) OK\033[0m" %(i[0].split("_")[1],j.split()[1],j.split()[0]) else: print "\033[31;1m add group:[%s] with a gtag:[%s] addr:(%s) Error\033[0m" %(i[0].split("_")[1],j.split()[1],j.split()[0]) else: print "config not exist!!" def remove_fenc(): remove_fenc = "cd %s;../bin/codis-config -c %s action remove-fence"%(codis_root,codis_config) s,v = commands.getstatusoutput(remove_fenc) print "remove fenc proxy info",v, def slot_init(): init_cmd = "cd %s; ../bin/codis-config -c %s slot init -f" %(codis_root,codis_config) print "INIT SLOT ........","\n",commands.getstatusoutput(init_cmd)[1] def codis_initslot(gid,slot_range): exec_cmd = "cd %s; ../bin/codis-config -c %s slot range-set %s %s %s online"%(codis_root,codis_config,slot_range[0],slot_range[1],gid) print exec_cmd s,v = commands.getstatusoutput(exec_cmd) print v, if s == 0: print "\033[32;1m slot init:[%s],gid:[%s] OK\033[0m"%(slot_range,gid) else: print "\033[31;1m slot init:[%s],gid:[%s] Error\033[0m"%(slot_range,gid) def offline_proxy(): proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1] exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag) print "Shutdown %s offline....." %proxy_tag print exec_down_cmd commands.getstatusoutput(exec_down_cmd) def codis_proxy(opt): proxy_tag = commands.getstatusoutput("cat %s|grep proxy_id"%codis_config)[1].split('=')[1] if opt == "start": exec_new_cmd = "cd %s; nohup ../bin/codis-proxy --log-level info -c %s -L ./log/%s.log --cpu=8 --addr=0.0.0.0:19000 --http-addr=0.0.0.0:11000 &" %(codis_root,codis_config,proxy_tag) print exec_new_cmd s = os.system(exec_new_cmd) if s == 0: print "\033[32;1m codis proxy tag:[%s]start OK\033[0m"%(proxy_tag) else: print "\033[31;1m codis proxy tag:[%s]start Error\033[0m"%(proxy_tag) time.sleep(5) exec_online_cmd = "cd %s;../bin/codis-config -c %s proxy online %s" %(codis_root,codis_config,proxy_tag) print "Set %s online .....!!" %(proxy_tag) print exec_online_cmd print commands.getstatusoutput(exec_online_cmd)[1] elif opt == "stop": print "Shutdown %s offline....." %proxy_tag exec_down_cmd = "cd %s; ../bin/codis-config -c %s proxy offline %s" %(codis_root,codis_config,proxy_tag) print commands.getstatusoutput(exec_down_cmd)[1] exec_proxy_id = "ps aux |grep codis-proxy| grep -v grep|awk '{print $2}'" print commands.getstatusoutput("kill -9 `%s`"%exec_proxy_id) def get_client_ip(): eth_inter= commands.getstatusoutput("ifconfig -a| awk '/^em/ {;a=$1;FS=\":\"; nextline=NR+1; next}{ if (NR==nextline) { split($2,b,\" \")}{ if ($2 ~ /[0-9]\./) {print a,b[1]}; FS=\" \"}}'|uniq -c|awk '{print $2,$3}'") if eth_inter[0] == 0: return eth_inter[1].split()[1] else: print "get client ip error" def slot_range(n,group): slot_dict = {} num=0 l = range(n) per = len(l) / group #改变i的索引值 for i in l[::per]: avg=l[i:i+per] tupv01 = avg[0],avg[-1] #print type(tupv01),tupv01 #指定当前添加的组数 num+=1 slot_dict[num] = [] slot_dict[num].append(tupv01) #如果只剩下一组,则将剩余元素全部追加至列表 if num==group-1: tupv02 = l[i+per:][0],l[i+per:][-1] #print tupv02 v02key = num + 1 slot_dict[v02key] = [] slot_dict[v02key].append(tupv02) break #for k,port in redis_instance_port.items(): # if k in slot_dict.keys(): # slot_dict[k].append(port) #print slot_dict return slot_dict def handle_slot(): n = len(pars_config()) return slot_range(1024,n) def init_config(): for i in sorted(pars_config()): for j in i[1].split(","): if j.split()[1] == "master": confname = i[0] + "_" + j.split()[1] + "_" + j.split()[0].split(":")[0] + "_" + j.split()[0].split(":")[1] INIT_config_master(j.split()[0].split(":")[1],6,codis_dbdir,confname) mastername = j.split()[0] elif j.split()[1] == "slave": confname = i[0] + "_" + j.split()[1] + "_" + j.split()[0].split(":")[0] + "_" + j.split()[0].split(":")[1] INIT_config_slave(j.split()[0].split(":")[1],6,codis_dbdir,mastername,confname) else: pass def start_master(): mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"master\"|awk '{print $9}'"%(codis_root,get_client_ip()) print mcfile_cmd mcfile = commands.getstatusoutput(mcfile_cmd)[1] codis_redis('start',mcfile) def stop_master(): mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"master\"|awk '{print $9}'"%(codis_root,get_client_ip()) print mcfile_cmd mcfile = commands.getstatusoutput(mcfile_cmd)[1] codis_redis('stop',mcfile) def start_slave(): mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"slave\"|awk '{print $9}'"%(codis_root,get_client_ip()) print mcfile_cmd mcfile = commands.getstatusoutput(mcfile_cmd)[1] codis_redis('start',mcfile) def stop_slave(): mcfile_cmd = "cd %s/conf;ls -lrt| egrep \"%s\"| grep \"slave\"|awk '{print $9}'"%(codis_root,get_client_ip()) print mcfile_cmd mcfile = commands.getstatusoutput(mcfile_cmd)[1] codis_redis('stop',mcfile) def assign_slot(): remove_fenc() slot_init() for k,v in handle_slot().items(): codis_initslot(k,v[0]) def help_prompt(): print """ This program prints files to the standard output. Options include: --version : Prints the version number --help : Helpful tips sample : python codis_manage.py init : python codis_manage.py start_master/stop_master start_slave/stop_slave start_web/stop_web add_group initslot start_proxy/stop_proxy """ if __name__ == "__main__": if len(sys.argv) < 2: print "no argument",'\n',help_prompt() sys.exit(1) if sys.argv[1] == "--help": help_prompt() elif sys.argv[1] == "--version": print "Version 0.1" elif sys.argv[1] == "init": init_config() elif sys.argv[1] == "start_master": start_master() elif sys.argv[1] == "stop_master": stop_master() elif sys.argv[1] == "start_slave": start_slave() elif sys.argv[1] == "stop_slave": stop_slave() elif sys.argv[1] == "start_web": codis_dashboard('start') elif sys.argv[1] == "stop_web": codis_dashboard('stop') elif sys.argv[1] == "add_group": codis_group() elif sys.argv[1] == "initslot": assign_slot() elif sys.argv[1] == "start_proxy": codis_proxy('start') elif sys.argv[1] == "stop_proxy": codis_proxy('stop') elif sys.argv[1] == "stopall": codis_proxy('stop') commands.getstatusoutput("killall codis-server") codis_dashboard("stop") else: help_prompt() print get_client_ip()
这里还有一个基于redis sentinel方式的集群redis服务启停脚本
- sentinel.conf配置示例
port 26330
sentinel monitor lashou 192.168.5.14 6479 2
sentinel down-after-milliseconds lashou 3000
sentinel failover-timeout lashou 4000
sentinel notification-script lashou /data/setup/redis-2.8.19/bin/get_redis_master.py
cat get_redis_master.py
#!/usr/bin/env python # coding:utf8 #author:shantuwqk@163.com import os,sys,commands,time from subprocess import Popen,PIPE from mako.template import Template from get_redis_master import get_redis_master_info REDIS_ROOT = "/data/setup/redis-2.8.19" REDIS_DBDIR = "%s/data"%REDIS_ROOT def INIT_config_master(port,memsize,datadir): os.chdir(REDIS_ROOT) template_file = Template(filename="./conf/redis.master.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir) conf = open('./conf/redis_%s.conf'%port,'w') conf.write(template_file) conf.close() dirisnot = os.path.exists(datadir) redisconf = os.path.isfile("%s/conf/redis_%s.conf"%(REDIS_ROOT,port)) if not dirisnot and redisconf: print dirisnot,"create %s ...."%datadir os.makedirs(datadir) print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(REDIS_ROOT,port))[1] return True else: print "%s is exists !!"%datadir return False def INIT_config_slave(port,memsize,datadir,slaveof): os.chdir(REDIS_ROOT) template_file = Template(filename="./conf/redis.slave.conf.template",module_directory='tmp/test').render(port="%s"%port,memsize=memsize,datadir=datadir,slaveof="%s %s"%(slaveof.split(":")[0],slaveof.split(":")[1])) conf = open('./conf/redis_%s.conf'%port,'w') conf.write(template_file) conf.close() dirisnot = os.path.exists(datadir) redisconf = os.path.isfile("%s/conf/redis_%s.conf"%(REDIS_ROOT,port)) if not dirisnot and redisconf: print dirisnot,"create %s ...."%datadir os.makedirs(datadir) print commands.getstatusoutput("ls -l %s/conf/redis_%s.conf"%(REDIS_ROOT,port))[1] return True else: print "%s is exists !!"%datadir return False def start_redis(redis_root,port): start_redis_cmd = "cd %s ; nohup ./src/redis-server conf/redis_%s.conf &> ./logs/%s_redis.log & "%(REDIS_ROOT,port,port) s = os.system(start_redis_cmd) if s == 0: print " redis start instence:[%s] ok"%(port) else: print " redis start instence:[%s] faild"%(port) def stop_redis(port): stop_redis_pid = "ps aux | grep redis| grep %s|awk '{print $2}'"%(port) stop_redis_cmd = "kill -9 `%s`"%stop_redis_pid s,v = commands.getstatusoutput(stop_redis_cmd) if s == 0: print " redis stop instence:[%s] ok"%(port),"\n",v else: print " redis stop instence:[%s] faild"%(port) def help_prompt(): print """ This program prints files to the standard output. Options include: --version : Prints the version number --help : Helpful tips --task : To operate on missions: [start_redis|stop_redis] [port] sample : python redis_manage.py start/stop master/slave port """ if len(sys.argv) < 2: print "no argument" help_prompt() sys.exit() if sys.argv[1].startswith('--'): option = sys.argv[1][2:] if option == 'version': print 'Version 0.1' elif option == 'help': help_prompt() elif sys.argv[1] == "start" and sys.argv[2] == "master" and sys.argv[3] is not None: INIT_config_master(sys.argv[3],6,REDIS_DBDIR) time.sleep(2) start_redis(REDIS_ROOT,sys.argv[3]) elif sys.argv[1] == "start" and sys.argv[2] == "slave" and sys.argv[3] is not None: INIT_config_slave(sys.argv[3],6,REDIS_DBDIR,get_redis_master_info()) time.sleep(2) start_redis(REDIS_ROOT,sys.argv[3]) elif sys.argv[1] == "stop" and sys.argv[2] is not None: print commands.getstatusoutput("ps aux | grep redis-server|grep -v grep")[1] stop_redis(sys.argv[2]) else: help_prompt()
触发更新haproxy配置文件的脚本
#!/usr/bin/env python
# coding:utf8
#author:shantuwqk@163.com
import os,sys,commands,time from subprocess import Popen,PIPE from mako.template import Template #init_redis_master = REDIS_ROOT = "/data/setup/redis-2.8.19" def get_redis_master_info(): cmd = "cd %s;./src/redis-cli -h 192.168.5.15 -p 26329 sentinel masters| grep -A 4 \"ip\"|head -4"%REDIS_ROOT s,v = commands.getstatusoutput(cmd) #print v, redis_master = "%s:%s"%(v.split("\n")[1],v.split("\n")[3]) return redis_master def handle_redis_m(): redis_master_dict = {} redis_master_dict[get_redis_master_info()] = "redis_master_%s_%s"%(get_redis_master_info().split(":")[0].split(".")[3],get_redis_master_info().split(":")[1]) return redis_master_dict def handle_redis_s(): redis_slave_dict = {} cmd = "cd %s;./src/redis-cli -h 192.168.5.15 -p 26329 sentinel slaves lashou| grep -A 1 \"name\"| grep -v \"\-\-\\|name\""%REDIS_ROOT s,v = commands.getstatusoutput(cmd) slave_list = v.split("\n") for i in slave_list: redis_slave_dict[i] = "redis_slave_%s_%s"%(i.split(":")[0].split(".")[3],i.split(":")[1]) return redis_slave_dict def update_ha_conf(redis_slave_dict,redis_master_dict): os.chdir(REDIS_ROOT) template_file = Template(filename="./conf/haproxy.cfg.template",module_directory='tmp/test').render(redis_slave_dict=redis_slave_dict,redis_master_dict=redis_master_dict) conf = open('./conf/haproxy.cfg','w') conf.write(template_file) conf.close() haproxy_conf = os.path.isfile("%s/conf/haproxy.cfg"%(REDIS_ROOT)) if haproxy_conf: print "update haproxy config info master:[%s] and slave:[%s] ...."%(redis_master_dict,redis_slave_dict) print commands.getstatusoutput("ls -l %s/conf/haproxy.cfg"%(REDIS_ROOT))[1] return True else: print "%s is not exists !!"%haproxy_conf return False remote_ha_root = "/data/setup/haproxy/conf" localhafile = "/data/setup/redis/conf/haproxy.cfg" def reload_ha_service(addr): rsync_cmd = "rsync -avz -e \"ssh -p 22\" %s root@%s:%s/"%(localhafile,addr,remote_ha_root) remote_cmd = "ssh -p 22 root@%s \" /usr/local/sbin/haproxy -f /data/setup/haproxy-1.5.10/conf/haproxy.cfg -p /var/run/haproxy.pid -sf $(cat /var/run/haproxy.pid)\"" rs,rv = commands.getstatusoutput(rsync_cmd) remotes,remotev = commands.getstatusoutput(remote_cmd) if rs == 0 and remotes == 0: print "haproxy.cfg update ok" else: print "haproxy.cfg update faild!!" time.sleep(5) if __name__ == "__main__": print get_redis_master_info() print update_ha_conf(handle_redis_s(),handle_redis_m())
如果需要迁移现有 redis 数据到 codis,该如何操作?
- 先搭建好 codis 集群并让 codis-proxy 正确运行起来
- 对线上每一个 redis 实例运行一个 redis-port 来向 codis 导入数据,例如:
for port in {6379,6380,6479,6480}; do
nohup redis-port sync --ncpu=4 --from=redis-server:${port} \
--target=codis-proxy:19000 > ${port}.log 2>&1 &
sleep 5
done
tail -f *.log
每个 redis-port 负责将对应的 redis 数据导入到 codis 多个 redis-port 之间不互相干扰,除非多个 redis 上的 key 本身出现冲突 单个 redis-port 可以将负责的数据并行迁移以提高速度,通过 --ncpu 来指定并行数 导入速度受带宽以及 codis-proxy 处理速度限制(本质是大量的 slotsrestore 操作) 完成数据迁移,在适当的时候将服务指向 Codis,并将原 redis 下线
- 旧 redis 下线时,会导致 reids-port 链接断开,于是自动退出