一、client的上传
1、准备工作
client的配置文件中应当包含所有tracker的地址信息。本例执行./client/fdfs_test conf/client.conf upload /usr/include/stdio.h进行上传文件。
2、连接tracker
采用轮询方式,直到可以连接上一个tracker为止。
3、tracker_query_storage_store_list_without_group
发送 TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ALL命令,获得所有的storage server。
4、tracker_query_storage_store
发送TRACKER_PROTO_CMD_SERVICE_QUERY_STORE_WITHOUT_GROUP_ONE命令,获得一个storage server。之后就可以连接这个storage server。本例的tracker会选择剩余空间最大的组,并采用轮询方式从组中选出一个storage server。
5、storage_do_upload_file
/**
STORAGE_PROTO_CMD_UPLOAD_FILE and
STORAGE_PROTO_CMD_UPLOAD_APPENDER_FILE:
1 byte: store path index
8 bytes: meta data bytes
8 bytes: file size
FDFS_FILE_EXT_NAME_MAX_LEN bytes: file ext name
meta data bytes: each meta data seperated by \x01,
name and value seperated by \x02
file size bytes: file content
STORAGE_PROTO_CMD_UPLOAD_SLAVE_FILE:
8 bytes: master filename length
8 bytes: meta data bytes
8 bytes: file size
FDFS_FILE_PREFIX_MAX_LEN bytes : filename prefix
FDFS_FILE_EXT_NAME_MAX_LEN bytes: file ext name, do not include dot (.)
master filename bytes: master filename
meta data bytes: each meta data seperated by \x01,
name and value seperated by \x02
file size bytes: file content
**/
int storage_do_upload_file(ConnectionInfo *pTrackerServer, \
ConnectionInfo *pStorageServer, const int store_path_index, \
const char cmd, const int upload_type, const char *file_buff, \
void *arg, const int64_t file_size, const char *master_filename, \
const char *prefix_name, const char *file_ext_name, \
const FDFSMetaData *meta_list, const int meta_count, \
char *group_name, char *remote_filename)
{
......
bUploadSlave = (strlen(group_name) > 0 && master_filename_len > 0);
if (bUploadSlave)
{
......
}
else if ((result=storage_get_upload_connection(pTrackerServer, \
&pStorageServer, group_name, &storageServer, \
&new_store_path, &new_connection)) != 0)
{
*group_name = '\0';
return result;
}
*group_name = '\0';
do
{
pHeader = (TrackerHeader *)out_buff;
p = out_buff + sizeof(TrackerHeader);
if (bUploadSlave)
{
long2buff(master_filename_len, p);
p += FDFS_PROTO_PKG_LEN_SIZE;
}
else
{
*p++ = (char)new_store_path;
}
long2buff(file_size, p);
p += FDFS_PROTO_PKG_LEN_SIZE;
if (bUploadSlave)
{
......
}
else
{
memset(p, 0, FDFS_FILE_EXT_NAME_MAX_LEN);
}
if (file_ext_name != NULL)
{
int file_ext_len;
file_ext_len = strlen(file_ext_name);
if (file_ext_len > FDFS_FILE_EXT_NAME_MAX_LEN)
{
file_ext_len = FDFS_FILE_EXT_NAME_MAX_LEN;
}
if (file_ext_len > 0)
{
memcpy(p, file_ext_name, file_ext_len);
}
}
p += FDFS_FILE_EXT_NAME_MAX_LEN;
if (bUploadSlave)
{
memcpy(p, master_filename, master_filename_len);
p += master_filename_len;
}
long2buff((p - out_buff) + file_size - sizeof(TrackerHeader), \
pHeader->pkg_len);
pHeader->cmd = cmd;
pHeader->status = 0;
if ((result=tcpsenddata_nb(pStorageServer->sock, out_buff, \
p - out_buff, g_fdfs_network_timeout)) != 0) //本例为STORAGE_PROTO_CMD_UPLOAD_FILE命令。
{
break;
}
if (upload_type == FDFS_UPLOAD_BY_FILE)
{
if ((result=tcpsendfile(pStorageServer->sock, file_buff, \
file_size, g_fdfs_network_timeout, \
&total_send_bytes)) != 0) //此时开始传送文件。
{
break;
}
}
else if (upload_type == FDFS_UPLOAD_BY_BUFF)
{
......
}
else //FDFS_UPLOAD_BY_CALLBACK
{
......
}
pInBuff = in_buff;
if ((result=fdfs_recv_response(pStorageServer, \
&pInBuff, sizeof(in_buff), &in_bytes)) != 0)
{
break;
}
if (in_bytes <= FDFS_GROUP_NAME_MAX_LEN)
{
result = EINVAL;
break;
}
in_buff[in_bytes] = '\0';
memcpy(group_name, in_buff, FDFS_GROUP_NAME_MAX_LEN);
group_name[FDFS_GROUP_NAME_MAX_LEN] = '\0';
memcpy(remote_filename, in_buff + FDFS_GROUP_NAME_MAX_LEN, \
in_bytes - FDFS_GROUP_NAME_MAX_LEN + 1); //获得storage server为其分配的文件名。
} while (0);
if (result == 0 && meta_count > 0)
{
result = storage_set_metadata(pTrackerServer, \
pStorageServer, group_name, remote_filename, \
meta_list, meta_count, \
STORAGE_SET_METADATA_FLAG_OVERWRITE); //设置metadata。
if (result != 0) //rollback
{
storage_delete_file(pTrackerServer, pStorageServer, \
group_name, remote_filename);
*group_name = '\0';
*remote_filename = '\0';
}
}
if (new_connection)
{
tracker_disconnect_server_ex(pStorageServer, result != 0);
}
return result;
}
二、storage的状态转换
storage主流程将信息处理通过管道通知,交给工作线程处理,处理函数为storage_recv_notify_read。一开始,状态为FDFS_STORAGE_STAGE_NIO_INIT。
1、storage_recv_notify_read
void storage_recv_notify_read(int sock, short event, void *arg)
{
struct fast_task_info *pTask;
StorageClientInfo *pClientInfo;
long task_addr;
int64_t remain_bytes;
int bytes;
int result;
while (1)
{
if ((bytes=read(sock, &task_addr, sizeof(task_addr))) < 0) //注意这里的sock不是特指套接字,而是文件描述符,此处为管道的fd。
{
if (!(errno == EAGAIN || errno == EWOULDBLOCK))
{
......//log
}
break;
}
else if (bytes == 0) //本函数读取一个task_addr的地址,使命就结束了。
{
break;
}
pTask = (struct fast_task_info *)task_addr;
pClientInfo = (StorageClientInfo *)pTask->arg;
if (pClientInfo->sock < 0) //quit flag
{
......//错误处理
}
switch (pClientInfo->stage)
{
case FDFS_STORAGE_STAGE_NIO_INIT:
result = storage_nio_init(pTask); //在对应的client sock上设置读写事件(处理函数为client_sock_read/client_sock_write),并只添加读事件。最后,将pClientInfo->stage设置为FDFS_STORAGE_STAGE_NIO_RECV。
break;
case FDFS_STORAGE_STAGE_NIO_RECV: //在storage server在接收一部分文件内容后,有可能会调用storage_nio_notify()(向管道中写),此时pClientInfo->stage为FDFS_STORAGE_STAGE_NIO_RECV,所以需要处理这种情况。
pTask->offset = 0;
remain_bytes = pClientInfo->total_length - \
pClientInfo->total_offset; //总长度-已处理长度。
if (remain_bytes > pTask->size) //剩余长度 > 接收缓存大小
{
pTask->length = pTask->size;
}
else
{
pTask->length = remain_bytes;
}
client_sock_read(pClientInfo->sock, EV_READ, pTask);
result = 0;
/*
if ((result=event_add(&pTask->ev_read, \
&g_network_tv)) != 0)
{
......//log
}
*/
break;
case FDFS_STORAGE_STAGE_NIO_SEND:
result = storage_send_add_event(pTask);
break;
default:
result = EINVAL;
break;
}
if (result != 0)
{
task_finish_clean_up(pTask);
}
}
}
2、client_sock_read
static void client_sock_read(int sock, short event, void *arg)
{
......
if (pTask->offset >= pTask->length) //读取一包数据,可能是整个文件,也可能只是一部分文件
{
if (pClientInfo->total_offset + pTask->length >= \
pClientInfo->total_length) //已经处理的数据 + 这次读的数据 >= 总的数据
{
/* current req recv done */
pClientInfo->stage = FDFS_STORAGE_STAGE_NIO_SEND;//此时包括文件内容都已经收到。
pTask->req_count++;
}
if (pClientInfo->total_offset == 0) //首次处理数据时。
{
pClientInfo->total_offset = pTask->length;
storage_deal_task(pTask); //对于上传,会调用storage_upload_file()。主要是填充pTask,最终还是为调用storage_dio_queue_push。
}
else
{
pClientInfo->total_offset += pTask->length;
/* continue write to file */
storage_dio_queue_push(pTask);
}
return;
}
}
return;
}
3、storage_upload_file
解析数据填充pFileContext(pClientInfo = (StorageClientInfo *)pTask->arg; pFileContext = &(pClientInfo->file_context);),调用storage_get_filename()生成文件名,最后调用storage_write_to_file(pTask, file_offset, file_bytes, p - pTask->data, dio_write_file, storage_upload_file_done_callback, clean_func, store_path_index),其中clean_func为dio_write_finish_clean_up(对于非trunk文件)。storage_write_to_file调用storage_dio_queue_push。
4、storage_dio_queue_push
int storage_dio_queue_push(struct fast_task_info *pTask)
{
StorageFileContext *pFileContext;
struct storage_dio_context *pContext;
int result;
pFileContext = &(((StorageClientInfo *)pTask->arg)->file_context);
pContext = g_dio_contexts + pFileContext->dio_thread_index; //因为storage server为每个store_path产生一个读写线程,本例store_path_index为0;pFileContext->dio_thread_index为1,对应写线程。
if ((result=task_queue_push(&(pContext->queue), pTask)) != 0)
{
task_finish_clean_up(pTask);
return result;
}
if ((result=pthread_cond_signal(&(pContext->cond))) != 0) //唤醒写线程,唤醒后会从队列中取出pTask,并调用((StorageClientInfo *)pTask->arg)->deal_func,即为dio_write_file。
{
task_finish_clean_up(pTask);
return result;
}
return 0;
}
int dio_write_file(struct fast_task_info *pTask)
{
StorageClientInfo *pClientInfo;
StorageFileContext *pFileContext;
int result;
int write_bytes;
char *pDataBuff;
pClientInfo = (StorageClientInfo *)pTask->arg;
pFileContext = &(pClientInfo->file_context);
result = 0;
do
{
if (pFileContext->fd < 0)
{
if (pFileContext->extra_info.upload.before_open_callback!=NULL)
{
result = pFileContext->extra_info.upload. \
before_open_callback(pTask);
if (result != 0)
{
break;
}
}
if ((result=dio_open_file(pFileContext)) != 0)
{
break;
}
}
pDataBuff = pTask->data + pFileContext->buff_offset;
write_bytes = pTask->length - pFileContext->buff_offset;//pFileContext->buff_offset对应文件内容的开始位置。
if (write(pFileContext->fd, pDataBuff, write_bytes) != write_bytes)
{
result = errno != 0 ? errno : EIO;
}
pthread_mutex_lock(&g_dio_thread_lock);
g_storage_stat.total_file_write_count++;
if (result == 0)
{
g_storage_stat.success_file_write_count++;
}
pthread_mutex_unlock(&g_dio_thread_lock);
if (result != 0)
{
break;
}
......
pFileContext->offset += write_bytes;
if (pFileContext->offset < pFileContext->end)
{//对于大文件,一般一次写是不够的,所以调用storage_nio_notify(写管道),进一步又进入storage_recv_notify_read,处理剩余的数据,注意此时状态为FDFS_STORAGE_STAGE_NIO_RECV。前后呼应的感觉。
pFileContext->buff_offset = 0;
storage_nio_notify(pTask); //notify nio to deal。这么做感觉可以防止一个超大文件的上传造成不能处理后来的上传请求。
}
else //不够本次实验使用的是小文件,所以执行执行到此处。
{
......
if (pFileContext->extra_info.upload.before_close_callback != NULL) //本例调用storage_upload_file_done_callback()
{
result = pFileContext->extra_info.upload. \
before_close_callback(pTask);
}
/* file write done, close it */
close(pFileContext->fd);
pFileContext->fd = -1;
if (pFileContext->done_callback != NULL)
{
pFileContext->done_callback(pTask, result);
}
}
return 0;
} while (0);
pClientInfo->clean_func(pTask);
if (pFileContext->done_callback != NULL)
{
pFileContext->done_callback(pTask, result);
}
return result;
}
6、storage_upload_file_done_callback
static void storage_upload_file_done_callback(struct fast_task_info *pTask, \
const int err_no)
{
StorageClientInfo *pClientInfo;
StorageFileContext *pFileContext;
TrackerHeader *pHeader;
int result;
pClientInfo = (StorageClientInfo *)pTask->arg;
pFileContext = &(pClientInfo->file_context);
if (pFileContext->extra_info.upload.file_type & _FILE_TYPE_TRUNK)
{
result = trunk_client_trunk_alloc_confirm( \
&(pFileContext->extra_info.upload.trunk_info), err_no);
if (err_no != 0)
{
result = err_no;
}
}
else
{
result = err_no;
}
if (result == 0)
{
result = storage_service_upload_file_done(pTask); //做一些调整。
if (result == 0)
{
if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)
{
result = storage_binlog_write(\
pFileContext->timestamp2log, \
STORAGE_OP_TYPE_SOURCE_CREATE_FILE, \
pFileContext->fname2log); //写binlog(只是写入缓存,之后由调度任务同步。)
}
}
}
if (result == 0)
{
int filename_len;
char *p;
if (pFileContext->create_flag & STORAGE_CREATE_FLAG_FILE)
{
CHECK_AND_WRITE_TO_STAT_FILE3_WITH_BYTES( \
g_storage_stat.total_upload_count, \
g_storage_stat.success_upload_count, \
g_storage_stat.last_source_update, \
g_storage_stat.total_upload_bytes, \
g_storage_stat.success_upload_bytes, \
pFileContext->end - pFileContext->start)
}
filename_len = strlen(pFileContext->fname2log);
pClientInfo->total_length = sizeof(TrackerHeader) + \
FDFS_GROUP_NAME_MAX_LEN + filename_len;
p = pTask->data + sizeof(TrackerHeader);
memcpy(p, pFileContext->extra_info.upload.group_name, \
FDFS_GROUP_NAME_MAX_LEN);
p += FDFS_GROUP_NAME_MAX_LEN;
memcpy(p, pFileContext->fname2log, filename_len);
}
else
{
......
}
STORAGE_ACCESS_LOG(pTask, ACCESS_LOG_ACTION_UPLOAD_FILE, result);
pClientInfo->total_offset = 0;
pTask->length = pClientInfo->total_length;
pHeader = (TrackerHeader *)pTask->data;
pHeader->status = result;
pHeader->cmd = STORAGE_PROTO_CMD_RESP;
long2buff(pClientInfo->total_length - sizeof(TrackerHeader), \
pHeader->pkg_len);
storage_nio_notify(pTask); //又进入storage_recv_notify_read,此时状态为FDFS_STORAGE_STAGE_NIO_SEND,将会调用storage_send_add_event(pTask),它只是调用client_sock_write直接将pTask中的消息(STORAGE_PROTO_CMD_RESP及文件名)发给客户端。
}
三、client与storage总结
client与tracker交互后,客户端便开始连接storage、发送命令头、发送文件数据;storage server的storage_recv_notify_read起到了非常重要的作用。storage server的主线程接到新的连接时,就向管道中写入pTask地址,某个工作线程中的storage_recv_notify_read函数开始运行,同时从初始状态变为接收数据状态(storage_nio_init)。对于大文件传输,每处理完一个缓存大小(交给底层写线程去写文件),就通过storage_nio_notify(pTask)进入storage_recv_notify_read来调度运行。当数据全部传输完成时,状态变为FDFS_STORAGE_STAGE_NIO_SEND(client_sock_read),进行一些必要的处理后,又通过storage_nio_notify(pTask)进入storage_recv_notify_read,由此函数来调度发送对客户端的回应。