在实际工作中,我需要使用redis的客户端去连接redis,于是选择了hiredis客户端(公司强推)。 hiRedis 是 Redis 官方指定的 C 语言客户端开发包,支持 Redis 完整的命令集、管线以及事件驱动编程。
1、情景描述
1.1 使用场景
一个epool模型的服务器不断接受外界请求,这个服务器框架给用户预留一个回调函数(多线程),回调函数为用户自己去实现的业务逻辑,其中redis的使用就需要在这个回调函数内部实现。
1.2 初步实现方案
在程序启动的时候,我就初始化redis的连接,获得hiredis句柄。然后把hiredis句柄传入到线程函数里面。让其做相应的业务逻辑。
1.3 结果
很不幸,一次请求都没问题,做压力测试,同时开20个线程访问,程序立即出core。
线上出core如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
(
gdb
) bt
#0 0x000000302af2e2ed in raise () from /lib64/tls/libc.so.6
#1 0x000000302af2fa3e in abort () from /lib64/tls/libc.so.6
#2 0x000000302af62db1 in __libc_message () from /lib64/tls/libc.so.6
#3 0x000000302af6888e in _int_free () from /lib64/tls/libc.so.6
#4 0x000000302af6a12d in _int_realloc () from /lib64/tls/libc.so.6
#5 0x000000302af6b39c in realloc () from /lib64/tls/libc.so.6
#6 0x0000000000dc2269 in sdscatlen (s=Variable "s" is not available.
) at sds.c:97
#7 0x0000000000dc1d40 in __redisAppendCommand (c=0x16fa1d0, cmd=Variable "cmd" is not available.
) at hiredis.c:1186
#8 0x0000000000dc1d97 in redisvAppendCommand (c=0x16fa1d0, format=Variable "format" is not available.
) at hiredis.c:1206
#9 0x0000000000dc1eed in redisvCommand (c=0x16fa1d0, format=Variable "format" is not available.
) at hiredis.c:1267
#10 0x0000000000dc1fb6 in redisCommand (c=Variable "c" is not available.
) at hiredis.c:1276
#11 0x0000002b1a8e6310 in Default_Handler::get_batch_redis (this=0x1ff41f0, redis_ins=0x175a7d0, dataid=6202, buf_num=12, res_num=6, key_sign=0x2bd67cb3c8,
res_lens=0x2bd5f54208, res_buf=0x2bd5f54398
""
) at default_handler.cpp:659
#12 0x0000002b1a9134df in Default_Ms_Handler::get_digest (this=0x1ff41f0) at default_ms_handler.cpp:646
#13 0x000000000092910c in do_proc () at gss_work.cpp:1107
#14 0x000000000091c91f in thread_main () at gss_net.cpp:188
#15 0x0000000000bc10e9 in default_native () at ubserver_app.cpp:283
#16 0x0000000000bbc676 in eppool_consume (pool=0x2230b90, data=0x22188f0) at eppool.cpp:649
#17 0x0000000000bbc4d1 in _eppool_workers (param=0x22188f0) at eppool.cpp:604
#18 0x000000302b80610a in start_thread () from /lib64/tls/libpthread.so.0
#19 0x000000302afc6003 in clone () from /lib64/tls/libc.so.6
#20 0x0000000000000000 in ?? ()
|
2、线下复现
因为不方便公开公司代码,所以我写一个类似的代码来复现这个case。
2.1 代码
代码主要有testredis.cpp和Makefile(自己指定hiredis目录)。用法是 ./redis -n [num] -h [host] -p [port], n为host数目,多个host用"-"进行分割。
testredis.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
/***************************************************************************
*
* Copyright (c) 2014 Baidu.com, Inc. All Rights Reserved
*
**************************************************************************/
/**
* @file redistest.cpp
* @author liujun05(com@baidu.com)
* @date 2014/02/25 10:28:44
* @brief
*
**/
#include<unistd.h>
#include <stdio.h>
#include <hiredis.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#ifndef uint32
#define uint32 unsigned int
#endif
#define MAX_REDIS_SERVER_CNT 10
#define MAX_REDIS_IPS 1024
typedef
struct
_redis_conf_t
{
uint32 redis_num;
char
redis_ips[MAX_REDIS_IPS];
char
redis_ip_array[MAX_REDIS_SERVER_CNT][MAX_REDIS_IPS];
uint32 redis_port;
} redis_conf;
typedef
struct
_redis_data_t
{
uint32 redis_num;
redisContext *rc[MAX_REDIS_SERVER_CNT];
}redis_data;
redis_conf g_cfg;
redis_data g_data;
void
show_usage()
{
printf
(
"usage: ./redis -n [num] -h [host] -p [port]\n"
);
}
/** 解析参数 */
int
main_parse_option(
int
argc,
char
**argv)
{
int
c;
//reset 获取参数的位置,多次调用时这个会出现问题
while
((c = getopt(argc, argv,
"h:p:n:"
)) != -1)
{
switch
(c)
{
case
'h'
:
sprintf
(g_cfg.redis_ips, optarg);
break
;
case
'p'
:
g_cfg.redis_port =
atoi
(optarg);
break
;
case
'n'
:
g_cfg.redis_num =
atoi
(optarg);
break
;
default
:
show_usage();
fflush
(stdout);
return
-1;
}
}
return
0;
}
void
* test_thread1(
void
* data)
{
redis_data* redis_ins = (redis_data*)data;
redisReply *reply;
for
(
int
i=0; i<redis_ins->redis_num; i++)
{
reply = (redisReply *)redisCommand( redis_ins->rc[i] ,
"SET %s %s"
,
"foo"
,
"hello world"
);
freeReplyObject(reply);
}
}
int
init_data()
{
g_data.redis_num = 0;
struct
timeval timeout = { 1, 500000 };
// 1.5 seconds
char
*ptok = NULL;
char
*part = strtok_r(g_cfg.redis_ips,
"-"
, &ptok);
int
num = 0;
while
(part)
{
strcpy
(g_cfg.redis_ip_array[num++], part);
part = strtok_r(NULL,
"-"
, &ptok);
}
if
(num != g_cfg.redis_num || num > MAX_REDIS_SERVER_CNT)
{
printf
(
"ip num[%d] not equal redis_num[%d] or not vaild\n"
, num, g_cfg.redis_num);
}
g_data.redis_num = (num > MAX_REDIS_SERVER_CNT ) ? MAX_REDIS_SERVER_CNT : num;
int
i= 0;
for
(i=0; i<g_data.redis_num; i++)
{
g_data.rc[i] = redisConnectWithTimeout( g_cfg.redis_ip_array[i], g_cfg.redis_port , timeout);
if
( g_data.rc[i] == NULL || g_data.rc[i]->err)
{
printf
(
"content to redis server[%s:%u], error[%s]\n"
,
g_cfg.redis_ip_array[i], g_cfg.redis_port, g_data.rc[i]->errstr
);
goto
exit
;
}
}
return
0;
exit
:
for
(
int
j=0; j<i; j++)
{
if
(g_data.rc[j] != NULL)
{
redisFree(g_data.rc[j]);
}
}
return
-1;
}
int
destory_data()
{
for
(
int
j=0; j<g_data.redis_num; j++)
{
if
(g_data.rc[j] != NULL)
{
redisFree(g_data.rc[j]);
}
}
}
int
main(
int
argc,
char
** argv)
{
g_cfg.redis_ips[0] =
'\0'
;
g_cfg.redis_port = 6379;
g_cfg.redis_num = 0;
if
( 0 != main_parse_option(argc, argv) )
{
show_usage();
return
-1;
}
if
( 0 == g_cfg.redis_num || g_cfg.redis_num > MAX_REDIS_SERVER_CNT )
{
printf
(
"the reids num[%u] is not vaild\n"
, g_cfg.redis_num);
show_usage();
return
0;
}
int
ret = init_data();
if
( ret != 0)
{
printf
(
"init num fail\n"
);
return
-1;
}
pthread_t t[100];
for
(
int
i=0; i<100; i++)
{
pthread_create(&t[i], NULL, test_thread1, &g_data);
}
for
(
int
i=0; i<100; i++)
{
pthread_join(t[i], NULL);
}
destory_data();
return
0;
}
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
|
1
2
3
4
5
|
redis: testredis.cpp
g++ -g testredis.cpp -I.
/hiredis
-L.
/hiredis
-lhiredis -lpthread -o redis
clean:
rm
redis
|
2.2 编译执行
1
2
3
|
liujun05@cq01-rdqa-dev012.cq01:~
/test/hiredis
$ .
/redis
-n2 -h10.48.46.26-10.46.175.102
*** glibc detected *** double
free
or corruption (!prev): 0x000000000050aa80 ***
Aborted (core dumped)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
(gdb) bt
#0 0x000000302af2e2ed in raise () from /lib64/tls/libc.so.6
#1 0x000000302af2fa3e in abort () from /lib64/tls/libc.so.6
#2 0x000000302af62db1 in __libc_message () from /lib64/tls/libc.so.6
#3 0x000000302af6888e in _int_free () from /lib64/tls/libc.so.6
#4 0x000000302af68bd6 in free () from /lib64/tls/libc.so.6
#5 0x0000000000403c75 in redisBufferWrite (c=0x50a010, done=0x571c008c) at hiredis.c:1162
#6 0x0000000000403d3e in redisGetReply (c=0x50a010, reply=0x571c00b8) at hiredis.c:1195
#7 0x0000000000403f62 in redisvCommand (c=0x50a010, format=Variable "format" is not available.
) at hiredis.c:1296
#8 0x0000000000404006 in redisCommand (c=Variable "c" is not available.
) at hiredis.c:1313
#9 0x00000000004013e7 in test_thread1 (data=0x509ba0) at testredis.cpp:88
#10 0x000000302b80610a in start_thread () from /lib64/tls/libpthread.so.0
#11 0x000000302afc6003 in clone () from /lib64/tls/libc.so.6
#12 0x0000000000000000 in ?? ()
|
2.3 原因分析
从堆栈5可以看到 hiredis.c的1162行出的core,打开hiredis.c
1
2
3
4
5
6
7
|
1160 }
else
if
(nwritten > 0) {
1161
if
(nwritten == (
signed
)sdslen(c->obuf)) {
1162 sdsfree(c->obuf);
1163 c->obuf = sdsempty();
1164 }
else
{
1165 c->obuf = sdsrange(c->obuf,nwritten,-1);
1166 }
|
我们分析下调用关系,首先调用redisCommand.
1
2
3
4
5
6
7
8
|
1309
void
*redisCommand(redisContext *c,
const
char
*format, ...) {
1310
va_list
ap;
1311
void
*reply = NULL;
1312
va_start
(ap,format);
1313 reply = redisvCommand(c,format,ap);
1314
va_end
(ap);
1315
return
reply;
1316 }
|
1
2
3
4
5
|
1303
void
*redisvCommand(redisContext *c,
const
char
*format,
va_list
ap) {
1304
if
(redisvAppendCommand(c,format,ap) != REDIS_OK)
1305
return
NULL;
1306
return
__redisBlockForReply(c);
1307 }
|
接着调用redisvAppendCommand
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
<span></span>1233
int
redisvAppendCommand(redisContext *c,
const
char
*format,
va_list
ap) {
1234
char
*cmd;
1235
int
len;
1236
1237 len = redisvFormatCommand(&cmd,format,ap);
1238
if
(len == -1) {
1239 __redisSetError(c,REDIS_ERR_OOM,
"Out of memory"
);
1240
return
REDIS_ERR;
1241 }
1242
1243
if
(__redisAppendCommand(c,cmd,len) != REDIS_OK) {
1244
free
(cmd);
1245
return
REDIS_ERR;
1246 }
1247
1248
free
(cmd);
1249
return
REDIS_OK;
1250 }
|
这里,我们需要care调用__redisAppendCommand.
1
2
3
4
5
6
7
8
9
10
11
12
|
1220
int
__redisAppendCommand(redisContext *c,
char
*cmd,
size_t
len) {
1221 sds newbuf;
1222
1223 newbuf = sdscatlen(c->obuf,cmd,len);
1224
if
(newbuf == NULL) {
1225 __redisSetError(c,REDIS_ERR_OOM,
"Out of memory"
);
1226
return
REDIS_ERR;
1227 }
1228
1229 c->obuf = newbuf;
1230
return
REDIS_OK;
1231 }
|
问题出现了。
对于任意一个多线程,他传入的redisContext* c都是一个,那么他们也公用同一个c->obuf,这里很明显,线程数据是耦合的。
当一个线程调用sdsfree c->obuf,其他任意一个线程使用c->obuf都会导致出core. 这也是我所谓的hiredis对多线程支持的不好的地方。
3. 终极解决方案
那么,如果我一定要在多线程中通过hiredis客户端调用redis呢。有没有方案了,答案肯定是有,只不过性能稍差。
原先的做法是先获得hiredis连接句柄,然后把句柄传入到多线程中,让多线程使用。现在改成在线程里面连接获得hiredis句柄,然后再进行使用。当然,代价是对于每个请求,都需要去连接redis服务器,加大了网络开销的同时还加大了redis的请求。
redis是单线程异步模型,hiredis这个客户端看来也只支持单线程。希望后续有redis的相关程序猿来改进相应问题,在hiredis使用多线程需要慎重。