我看的源码版本是1.2.4
前面我已经说了数据存储,哈希表的管理,内存的管理。还有第二章说到用户的请求时如何到达memcached的,从用户的输入到memcached如何接受到这些输入。那现在我们来分析一下接收到这些输入后,memcached是怎么操作的。
要看这部分代码就需要知道memcached有多少个命令。
我从process_command()
这个函数里看到的命令有:get
,bget
,add
,set
, replace
, prepend
, append
, cas
, incr
, gets
, decr
, delete
, own
, disown
, stats
, flush_all
, version
, quit
, slabs reassign
, verbosity
.
下面我就选一部分来分析一下。
get, bget, gets
将一行数据分开之后,就调用process_get_command
来进行get请求的处理,将key的数据查出来之后,将需要返回的数据地址记录到连接的msglist
数组中,这里存放的是发送给客户端的数据(多个iovec),然后将连接的状态设置为conn_mwrite。conn_mwrite状态主要是使用sendmsg来发送数据,发送数据的时候用到了散布写(iovec)。
关于散布写和普通写差别的一篇文章 ,这里用散布写的原因:因为get多个item的话,data存放的地方不连续,并且数据量可能比较大。
- 如果在用户空间分配内存,然后拷贝到新分配的内存在调用send的话,开销较大。
- 如果多次调用send的话,多次系统调用消耗更大。
ntokens = tokenize_command(command, tokens, MAX_TOKENS);
if (ntokens >= 3 &&
((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||
(strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {
process_get_command(c, tokens, ntokens, false);
} else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) {
process_get_command(c, tokens, ntokens, true);
}
static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
char *key;
size_t nkey;
int i = 0;
item *it;
token_t *key_token = &tokens[KEY_TOKEN];
char *suffix;
assert(c != NULL);
do {
while(key_token->length != 0) {
key = key_token->value;
nkey = key_token->length;
it = item_get(key, nkey);
if (it) {
if (i >= c->isize) {
//c->ilist是get返回的item数据
//如果它是一个item*数组,如果不够大,就给他增大空间
item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
if (new_list) {
c->isize *= 2;
c->ilist = new_list;
} else break;
}
/*
* Construct the response. Each hit adds three elements to the
* outgoing data list:
* "VALUE "
* key
* " " + flags + " " + data length + "\r\n" + data (with \r\n)
*/
if(return_cas == true)
{
/* Goofy mid-flight realloc. */
if (i >= c->suffixsize) {
char **new_suffix_list = realloc(c->suffixlist,
sizeof(char *) * c->suffixsize * 2);
if (new_suffix_list) {
c->suffixsize *= 2;
c->suffixlist = new_suffix_list;
} else break;
}
suffix = suffix_from_freelist();
*(c->suffixlist + i) = suffix;
sprintf(suffix, " %llu\r\n", it->cas_id);
if (add_iov(c, "VALUE ", 6) != 0 ||
add_iov(c, ITEM_key(it), it->nkey) != 0 ||
add_iov(c, ITEM_suffix(it), it->nsuffix - 2) != 0 ||
add_iov(c, suffix, strlen(suffix)) != 0 ||
add_iov(c, ITEM_data(it), it->nbytes) != 0)
{
break;
}
}
else
{
if (add_iov(c, "VALUE ", 6) != 0 ||
add_iov(c, ITEM_key(it), it->nkey) != 0 ||
add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0)
{
break;
}
}
if (settings.verbose > 1)
fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it));
/* item_get() has incremented it->refcount for us */
stats_get_hits++;
item_update(it);
*(c->ilist + i) = it;
i++;
} else {
stats_get_misses++;
}
key_token++;
}
} while(key_token->value != NULL);
c->icurr = c->ilist;
c->ileft = i;
if (return_cas) {
c->suffixcurr = c->suffixlist;
c->suffixleft = i;
}
if (settings.verbose > 1)
fprintf(stderr, ">%d END\n", c->sfd);
/*
If the loop was terminated because of out-of-memory, it is not
reliable to add END\r\n to the buffer, because it might not end
in \r\n. So we send SERVER_ERROR instead.
*/
if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
|| (c->udp && build_udp_headers(c) != 0)) {
out_string(c, "SERVER_ERROR out of memory");
}
else {
conn_set_state(c, conn_mwrite);
c->msgcurr = 0;
}
return;
}
//conn_mwrite状态的处理
case conn_mwrite:
//transmit是将数据往套接字写数据,发送完成的时候,设置状态TRANSMIT_COMPLETE
switch (transmit(c)) {
case TRANSMIT_COMPLETE:
//传送完成,释放那些结构,返回到conn_read状态
if (c->state == conn_mwrite) {
while (c->ileft > 0) {
item *it = *(c->icurr);
assert((it->it_flags & ITEM_SLABBED) == 0);
item_remove(it);
c->icurr++;
c->ileft--;
}
while (c->suffixleft > 0) {
char *suffix = *(c->suffixcurr);
if(suffix_add_to_freelist(suffix)) {
/* Failed to add to freelist, don't leak */
free(suffix);
}
c->suffixcurr++;
c->suffixleft--;
}
conn_set_state(c, conn_read);
} else if (c->state == conn_write) {
if (c->write_and_free) {
free(c->write_and_free);
c->write_and_free = 0;
}
conn_set_state(c, c->write_and_go);
} else {
if (settings.verbose > 0)
fprintf(stderr, "Unexpected state %d\n", c->state);
conn_set_state(c, conn_closing);
}
break;
case TRANSMIT_INCOMPLETE:
case TRANSMIT_HARD_ERROR:
break; /* Continue in state machine. */
case TRANSMIT_SOFT_ERROR:
stop = true;
break;
}
break;
static int transmit(conn *c) {
assert(c != NULL);
if (c->msgcurr < c->msgused &&
c->msglist[c->msgcurr].msg_iovlen == 0) {
/* Finished writing the current msg; advance to the next. */
c->msgcurr++;
}
if (c->msgcurr < c->msgused) {
ssize_t res;
struct msghdr *m = &c->msglist[c->msgcurr];
res = sendmsg(c->sfd, m, 0);
if (res > 0) {
return TRANSMIT_INCOMPLETE;
}
if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
if (!update_event(c, EV_WRITE | EV_PERSIST)) {
if (settings.verbose > 0)
fprintf(stderr, "Couldn't update event\n");
conn_set_state(c, conn_closing);
return TRANSMIT_HARD_ERROR;
}
return TRANSMIT_SOFT_ERROR;
}
/* if res==0 or res==-1 and error is not EAGAIN or EWOULDBLOCK,
we have a real error, on which we close the connection */
if (c->udp)
conn_set_state(c, conn_read);
else
conn_set_state(c, conn_closing);
return TRANSMIT_HARD_ERROR;
} else {
return TRANSMIT_COMPLETE;
}
}
add,set,replace,prepend,append,cas
我将这些一起讨论,是因为他们都调用了process_update_command()
这个函数
...
else if (ntokens == 6 &&
((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = NREAD_ADD)) ||
(strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = NREAD_SET)) ||
(strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = NREAD_REPLACE)) ||
(strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = NREAD_PREPEND)) ||
(strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = NREAD_APPEND)) )) {
process_update_command(c, tokens, ntokens, comm, false);
} else if (ntokens == 7 && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = NREAD_CAS))) {
process_update_command(c, tokens, ntokens, comm, true);
}
//为读取的数据分配空间,转入到conn_nread状态,conn_nread就是读取数据,
//然后根据具体的命令来进行保存操作,那部分代码我在第六篇的store_item部分讨论过了。
static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, int comm, bool handle_cas) {
char *key;
size_t nkey;
int flags;
time_t exptime;
int vlen, old_vlen;
uint64_t req_cas_id;
item *it, *old_it;
//提取命令输入的值
key = tokens[KEY_TOKEN].value;
nkey = tokens[KEY_TOKEN].length;
flags = strtoul(tokens[2].value, NULL, 10);
exptime = strtol(tokens[3].value, NULL, 10);
vlen = strtol(tokens[4].value, NULL, 10);
// does cas value exist?
if(handle_cas)
{
req_cas_id = strtoull(tokens[5].value, NULL, 10);
}
if(errno == ERANGE || ((flags == 0 || exptime == 0) && errno == EINVAL)) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}
it = item_alloc(key, nkey, flags, realtime(exptime), vlen+2);
if (it == 0) {
if (! item_size_ok(nkey, flags, vlen + 2))
out_string(c, "SERVER_ERROR object too large for cache");
else
out_string(c, "SERVER_ERROR out of memory");
/* swallow the data line */
c->write_and_go = conn_swallow;
c->sbytes = vlen + 2;
return;
}
if(handle_cas)
it->cas_id = req_cas_id;
c->item = it;
c->ritem = ITEM_data(it);
c->rlbytes = it->nbytes;
c->item_comm = comm;
conn_set_state(c, conn_nread);
}
incr, decr
为key对应的数值增加或者减少delta
static void process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) {
char temp[sizeof("18446744073709551615")];
item *it;
int64_t delta;
char *key;
size_t nkey;
key = tokens[KEY_TOKEN].value;
nkey = tokens[KEY_TOKEN].length;
delta = strtoll(tokens[2].value, NULL, 10);
it = item_get(key, nkey);
if (!it) {
out_string(c, "NOT_FOUND");
return;
}
out_string(c, add_delta(it, incr, delta, temp));
item_remove(it); /* release our reference */
}
delete,version,own,disown,stats
delete,version,quit比较简单,own,disown这个在网上简单搜了一下,还是不知道怎么用,我也没去看。
stats也就是将所有统计信息写给客户端吧。
flush_all
flush_all 命令用于用于清理缓存中的所有 key=>value(键=>值) 对。该命令提供了一个可选参数 time,用于在制定的时间后执行清理缓存操作。
//设置一下超时时间,调用item_flush_expired()
else if (ntokens >= 2 && ntokens <= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) {
time_t exptime = 0;
set_current_time();
if(ntokens == 2) {
settings.oldest_live = current_time - 1;
item_flush_expired();
out_string(c, "OK");
return;
}
exptime = strtol(tokens[1].value, NULL, 10);
if(errno == ERANGE) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}
settings.oldest_live = realtime(exptime) - 1;
item_flush_expired();
out_string(c, "OK");
return;
}
//删除LRU队列上的所有item,也就是整个memcached存储的item
void do_item_flush_expired(void) {
int i;
item *iter, *next;
if (settings.oldest_live == 0)
return;
for (i = 0; i < LARGEST_ID; i++) {
/* The LRU is sorted in decreasing time order, and an item's timestamp
* is never newer than its last access time, so we only need to walk
* back until we hit an item older than the oldest_live time.
* The oldest_live checking will auto-expire the remaining items.
*/
for (iter = heads[i]; iter != NULL; iter = next) {
if (iter->time >= settings.oldest_live) {
next = iter->next;
if ((iter->it_flags & ITEM_SLABBED) == 0) {
do_item_unlink(iter);
}
} else {
/* We've hit the first old item. Continue to the next queue. */
break;
}
}
}
}
slabs reassign
这个命令将一个slabclass的所有数据移到另个一slabclass。(一个slabclass管理着同一个大小的所有chunk)这样做会浪费内存吧?不知道这个有什么用。没仔细看^v^.