cURL 源码解析

curl 介绍

Curl 分为 curl 和 libcurl 两个部分。

curl:命令行程序

libcurl:链接库

对应开发,都是使用 libcurl 中的 api。为了方便下述统一使用 curl 来表示。

curl是一个开源的客户端url传输库,支持众多的协议,包括DICT, FILE, FTP, FTPS, Gopher, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, POP3, POP3S, RTMP, RTSP, SCP, SFTP, SMTP, SMTPS, Telnet and TFTP等。

curl支持SSL证书, HTTP POST, HTTP PUT,FTP上传,基于表单的HTTP上传、代理、HTTP/2、cookies、用户名+密码认证(Basic, Plain, Digest, NTLM等)、下载/上传文件的断点续传,http代理服务器管道等。

curl是高度可移植的,可以工作在不同的平台上,支持Windows,Unix,Linux等。

curl是免费的,线程安全的,IPV6兼容的,同时它还有很多其他非常丰富的特性。

curl已经被很多知名的大企业以及应用程序所采用。

相关的文档以及源码可以参考官方链接:http://curl.haxx.se/libcurl/

本专栏,是使用 VS2017 在 Windows 下进行调试跟踪,从而解析的,代码流程。

curl 数据结构

curl 中使用的数据结构比较少,Curl_llist 和 Curl_hash。在这里对这两个结构进行一下源码解析。

代码解析,以注释说明方式,加载源码中,都是个人理解,如果发现错误欢迎提示~

llist

先看一下 Curl_llist,头文件和源码 分别是 llist.h 和 llist.c。

llist.h

...
typedef void (*Curl_llist_dtor)(void *, void *);	// 析构回调

// 双向链表
struct Curl_llist_element {
  void *ptr;									// 内容指针
  struct Curl_llist_element *prev;				// 前数据 指针
  struct Curl_llist_element *next;				// 后数据 指针
};

struct Curl_llist {
  struct Curl_llist_element *head;				// 头
  struct Curl_llist_element *tail;				// 尾
  Curl_llist_dtor dtor;							// 析构
  size_t size;									// 大小
};

void Curl_llist_init(struct Curl_llist *, Curl_llist_dtor);									// 初始化
void Curl_llist_insert_next(struct Curl_llist *, struct Curl_llist_element *,	
                            const void *, struct Curl_llist_element *node);					//  list 中 添加元素 element
void Curl_llist_remove(struct Curl_llist *, struct Curl_llist_element *,
                       void *);																// list 中 移除元素
size_t Curl_llist_count(struct Curl_llist *);
void Curl_llist_destroy(struct Curl_llist *, void *);
...

llist.c 中实现代码比较容易理解,值得注意的是 析构的触发在 Curl_llist_remove 中处理。

void
Curl_llist_remove(struct Curl_llist *list, struct Curl_llist_element *e,
                  void *user)
{
  ...
  /* call the dtor() last for when it actually frees the 'e' memory itself */
  if(list->dtor)														// 移除元素时,触发析构,防止内存泄漏
    list->dtor(user, ptr);	
}
hash

再看一下 Curl_hash,头文件和源码 分别是 hash.h 和 hash.c。

hash.h

 //
//  相关回调
/* Hash function prototype */
typedef size_t (*hash_function) (void *key,
                                 size_t key_length,
                                 size_t slots_num);

/*
   Comparator function prototype. Compares two keys.
*/
typedef size_t (*comp_function) (void *key1,
                                 size_t key1_len,
                                 void *key2,
                                 size_t key2_len);

typedef void (*Curl_hash_dtor)(void *);

//
// 数据结构

struct Curl_hash {
  // 哈希表 中的 list 表单, list 中每个元素(Curl_llist_element) 中存储了 Curl_hash_element
  struct Curl_llist *table;

  /* Hash function to be used for this hash table */
  // 哈希函数 将 类型 转换成 哈希索引 : curl 中 使用 char*(字符串) 、Curl_easy 和 curl_socket_t
  // 代码中只使用 :Curl_hash_str(对应char*) 、trhash(对应Curl_easy)、hash_fd(对应curl_socket_t) 哈希函数 
  hash_function hash_func;
																							
  /* Comparator function to compare keys */
  // 比较函数 将 类型 进行比较:1 相对   0 不等
  // 代码中只使用 :Curl_str_key_compare(对应 Curl_hash_str 函数)、trhash_compare(对应 trhash 函数)、fd_key_compare(对应 hash_fd 函数) 比较函数
  comp_function comp_func;																
																											
  Curl_hash_dtor   dtor;			// 析构回调
  int slots;						// 预设的 hash 表大小,需要预先申请内存
  size_t size;
};

struct Curl_hash_element {
  struct Curl_llist_element list;								
  void   *ptr;															// 数据指针
  size_t key_len;														// 哈希key 长度
  char   key[1]; /* allocated memory following the struct */			// 哈希key 首地址
};

struct Curl_hash_iterator {
  struct Curl_hash *hash;
  int slot_index;
  struct Curl_llist_element *current_element;
};

//
// 基础函数
int Curl_hash_init(struct Curl_hash *h,
                   int slots,
                   hash_function hfunc,
                   comp_function comparator,
                   Curl_hash_dtor dtor);

void *Curl_hash_add(struct Curl_hash *h, void *key, size_t key_len, void *p);			// 增
int Curl_hash_delete(struct Curl_hash *h, void *key, size_t key_len);					// 删
void *Curl_hash_pick(struct Curl_hash *, void *key, size_t key_len);					// 通过 key 拾取 hash 内容
void Curl_hash_apply(struct Curl_hash *h, void *user,
                     void (*cb)(void *user, void *ptr));								// 改
#define Curl_hash_count(h) ((h)->size)
void Curl_hash_destroy(struct Curl_hash *h);
void Curl_hash_clean(struct Curl_hash *h);
void Curl_hash_clean_with_criterium(struct Curl_hash *h, void *user,
                                    int (*comp)(void *, void *));

//
// 哈希回调函数
size_t Curl_hash_str(void *key, size_t key_length, size_t slots_num);					// 字符串 转 hash 值
size_t Curl_str_key_compare(void *k1, size_t key1_len, void *k2,
                            size_t key2_len);

//
// 哈希迭代
void Curl_hash_start_iterate(struct Curl_hash *hash,
                             struct Curl_hash_iterator *iter);
struct Curl_hash_element *
Curl_hash_next_element(struct Curl_hash_iterator *iter);

void Curl_hash_print(struct Curl_hash *h,
                     void (*func)(void *));

hash.c 需要关注的是 FETCH_LIST

...
// 根据 key 开始索引到 list  x 表示 Curl_hash ,y 表示 key ,z 表示 key 长度
#define FETCH_LIST(x,y,z) &x->table[x->hash_func(y, z, x->slots)]	
//  这里 通过调用 hash_func 来获取 哈希值索引 
//  以 字符串 为例的话,便是 Curl_hash_str 的调用。
...
// 字符串 转 hash 值
size_t Curl_hash_str(void *key, size_t key_length, size_t slots_num)
{
  const char *key_str = (const char *) key;
  const char *end = key_str + key_length;
  size_t h = 5381;

  while(key_str < end) {
    h += h << 5;
    h ^= *key_str++;
  }

  return (h % slots_num);
}

Curl_hash 的使用。

在 curl 中只使用了 几处 Curl_hash, 分别是 DNS哈希表(Curl_dns_entry) 、套接字哈希表(curl_socket_t)、Curl_easy。

创建 DNS 哈希表, hostip.c

int Curl_mk_dnscache(struct Curl_hash *hash)
{
  return Curl_hash_init(hash, 7, Curl_hash_str, Curl_str_key_compare,
                        freednsentry);						// 以字符串为 key 的 DNS哈希表
}

DNS 哈希表添加 dns元素,hostip.c

struct Curl_dns_entry *
Curl_cache_addr(struct Curl_easy *data,
                struct Curl_addrinfo *addr,
                const char *hostname,
                int port)
{
  char entry_id[MAX_HOSTCACHE_LEN];			// entry_id 为域名:比如访问 http://www.baidu.com  entry_id = "www.baidu.com"
  size_t entry_len;							// entry_len 就是 entry_id 长度
  struct Curl_dns_entry *dns;
  ...
  /* Create a new cache entry */
  dns = calloc(1, sizeof(struct Curl_dns_entry));		// new Curl_dns_entry
  if(!dns) {
    return NULL;
  }
  
  /* Create an entry id, based upon the hostname and port */
  create_hostcache_id(hostname, port, entry_id, sizeof(entry_id));	// 根据主机名和端口 获取 entry_id 
  entry_len = strlen(entry_id);
  ...
  /* Store the resolved data in our DNS cache. */
  dns2 = Curl_hash_add(data->dns.hostcache, entry_id, entry_len + 1,
                       (void *)dns);								// DNS 哈希表 添加 key:entry_id,data:dns
  ...
}

在 hostip.c 中可以找到,多次 Curl_hash_pick 拾取 和 Curl_hash_delete 删除。这里只举一个例子。

static struct Curl_dns_entry *fetch_addr(struct Curl_easy *data,
                                         const char *hostname,
                                         int port)
{
  struct Curl_dns_entry *dns = NULL;
  size_t entry_len;
  char entry_id[MAX_HOSTCACHE_LEN];

  /* Create an entry id, based upon the hostname and port */
  create_hostcache_id(hostname, port, entry_id, sizeof(entry_id));
  entry_len = strlen(entry_id);

  /* See if its already in our dns cache */
  // 通过 key 拾取 DNS(获取主机名和端口信息) 从而减少异步线程来解析 DNS
  dns = Curl_hash_pick(data->dns.hostcache, entry_id, entry_len + 1);		 
  ...
  if(dns && (data->set.dns_cache_timeout != -1)) {
    /* See whether the returned entry is stale. Done before we release lock */
    struct hostcache_prune_data user;

    time(&user.now);
    user.cache_timeout = data->set.dns_cache_timeout;

    if(hostcache_timestamp_remove(&user, dns)) {
      infof(data, "Hostname in DNS cache was stale, zapped\n");
      dns = NULL; /* the memory deallocation is being handled by the hash */
      Curl_hash_delete(data->dns.hostcache, entry_id, entry_len + 1);			// DNS 缓存中主机名过时时,清理一下该DNS
    }
}

创建 链接缓冲区哈希表,conncache.c 中可以找到,也是以 字符串为 key 创建的 哈希表。这里不做过多解析了。

来看一下哈希表的迭代遍历。conncache.c

bool Curl_conncache_foreach(struct Curl_easy *data,
                            struct conncache *connc,
                            void *param,
                            int (*func)(struct Curl_easy *data,
                                        struct connectdata *conn, void *param))
{
  struct Curl_hash_iterator iter;
  struct Curl_llist_element *curr;
  struct Curl_hash_element *he;

  if(!connc)
    return FALSE;

  CONNCACHE_LOCK(data);
  Curl_hash_start_iterate(&connc->hash, &iter);				// 获取 哈希头

  he = Curl_hash_next_element(&iter);						// 通过 next 函数进行迭代 遍历 直至为 NULL
  while(he) {
    struct connectbundle *bundle;

    bundle = he->ptr;
    he = Curl_hash_next_element(&iter);						// 通过 next 函数进行迭代 遍历 直至为 NULL

    curr = bundle->conn_list.head;
    while(curr) {
      /* Yes, we need to update curr before calling func(), because func()
         might decide to remove the connection */
      struct connectdata *conn = curr->ptr;
      curr = curr->next;

      if(1 == func(data, conn, param)) {
        CONNCACHE_UNLOCK(data);
        return TRUE;
      }
    }
  }
  CONNCACHE_UNLOCK(data);
  return FALSE;
}

这里看一下 Curl_multi 、Curl_easy 和 Curl_sh_entry 中哈希表结构。

在这里插入图片描述

Curl_multi 中 看一下 curl_multi_init 初始化

struct Curl_multi *curl_multi_init(void)
{
  return Curl_multi_handle(CURL_SOCKET_HASH_TABLE_SIZE,
                           CURL_CONNECTION_HASH_SIZE);
}

struct Curl_multi *Curl_multi_handle(int hashsize, /* socket hash */
                                     int chashsize) /* connection hash */
{
  struct Curl_multi *multi = calloc(1, sizeof(struct Curl_multi));

  if(!multi)
    return NULL;

  multi->magic = CURL_MULTI_HANDLE;

  if(Curl_mk_dnscache(&multi->hostcache))		// 初始化 DNS哈希表 上面已经介绍了
    goto error;

  if(sh_init(&multi->sockhash, hashsize))		// 初始化 套接字哈希表 重点解析
    goto error;

  if(Curl_conncache_init(&multi->conn_cache, chashsize))	// 初始化 链接缓冲区哈希表 上面已经介绍了
    goto error;
  ...
  return multi;
  
  error:
  ...
  free(multi);
  return NULL;
}

看一下重点解析的函数 sh_init

static int sh_init(struct Curl_hash *hash, int hashsize)
{
  return Curl_hash_init(hash, hashsize, hash_fd, fd_key_compare,
                        sh_freeentry);
}

static void sh_freeentry(void *freethis)										 // 析构 Curl_sh_entry
{
  struct Curl_sh_entry *p = (struct Curl_sh_entry *) freethis;

  free(p);
}

static size_t fd_key_compare(void *k1, size_t k1_len, void *k2, size_t k2_len)	 // curl_socket_t 比较
{
  (void) k1_len; (void) k2_len;

  return (*((curl_socket_t *) k1)) == (*((curl_socket_t *) k2));
}

static size_t hash_fd(void *key, size_t key_length, size_t slots_num)			// 将 curl_socket_t 转换成 哈希索引
{
  curl_socket_t fd = *((curl_socket_t *) key);
  (void) key_length;

  return (fd % slots_num);
}

在看一下 sockhash 相关函数 添加元素 sh_addentry 和 拾取元素 sh_getentry

/* make sure this socket is present in the hash for this handle */
static struct Curl_sh_entry *sh_addentry(struct Curl_hash *sh,
                                         curl_socket_t s)
{
  struct Curl_sh_entry *there = sh_getentry(sh, s);					// 拾取
  struct Curl_sh_entry *check;

  if(there) {
    /* it is present, return fine */
    return there;													// 拾取到了这返回拾取的 
  }

  /* not present, add it */
  check = calloc(1, sizeof(struct Curl_sh_entry));					// new
  if(!check)
    return NULL; /* major failure */

  if(Curl_hash_init(&check->transfers, TRHASH_SIZE, trhash,
                    trhash_compare, trhash_dtor)) {					// 初始化 Curl_sh_entry 中 Curl_easy 哈希表
    free(check);
    return NULL;
  }

  /* make/add new hash entry */
  if(!Curl_hash_add(sh, (char *)&s, sizeof(curl_socket_t), check)) { // 将新new的 Curl_sh_entry 添加到 sockhash 中
    Curl_hash_destroy(&check->transfers);
    free(check);
    return NULL; /* major failure */
  }

  return check; /* things are good in sockhash land */
}

static struct Curl_sh_entry *sh_getentry(struct Curl_hash *sh,
                                         curl_socket_t s)
{
  if(s != CURL_SOCKET_BAD) {
    /* only look for proper sockets */
    return Curl_hash_pick(sh, (char *)&s, sizeof(curl_socket_t));			// 拾取
  }
  return NULL;
}

最后看一下 singlesocket 函数

// Curl_multi 相关于一个双向链表,
// Curl_easy 则是 Curl_multi中的 一个元素,他包含了 指向 Curl_easy 前指针 和 后指针
// singlesocket 确保 Curl_easy 在 Curl_multi,中只有单一有效的套接字链接,以确保无必要多网络链接
static CURLMcode singlesocket(struct Curl_multi *multi,
                              struct Curl_easy *data)
{
  curl_socket_t socks[MAX_SOCKSPEREASYHANDLE];
  ...
  struct Curl_sh_entry *entry;
  curl_socket_t s;
  ...
  curraction = multi_getsock(data, socks);			// 获取到 multi 中链接的套接字
  ...
  for(i = 0; (i< MAX_SOCKSPEREASYHANDLE) &&
        (curraction & (GETSOCK_READSOCK(i) | GETSOCK_WRITESOCK(i)));
      i++) {
    ...
    bool sincebefore = FALSE;
      
    s = socks[i];

    /* get it from the hash */
    entry = sh_getentry(&multi->sockhash, s);   // sockhash 通过 curl_socket_t 拾取 Curl_sh_entry
    ...
    if(entry) {
    	...
        sincebefore = TRUE;
        ...
    }
    else {
      /* this is a socket we didn't have before, add it to the hash! */
      entry = sh_addentry(&multi->sockhash, s);	 // 添加 key:curl_socket_t value:entry 到 sockhash 中
      if(!entry)
        /* fatal */
        return CURLM_OUT_OF_MEMORY;
    }
    ...
    if(!sincebefore) {		// 如果之前没过 
      ...
      /* add 'data' to the transfer hash on this socket! */
      if(!Curl_hash_add(&entry->transfers, (char *)&data, /* hash key */	
                        sizeof(struct Curl_easy *), data))			// 将 Curl_easy 当前的 data 添加到 entry 
        return CURLM_OUT_OF_MEMORY;    
    }
  }
}

curl 网络IO模型

I/O 多路复用机制(I/O Multiplex)

目前主流操作系统提供的 I/O 多路复用 API 如下:

  • select,是通用机制,Windows、Unix-like 系统都支持;
  • poll,是 UNIX-like 系统支持;
  • devpoll,是 SUN Solaris 系统支持。当然,SUN 公司已经不存在了;
  • epoll, 是 Linux 系统支持的主流机制;
  • Kqueue,是 freebsd 内核支持的机制,Mac OS、IOS 系统也支持;
  • IOCP,是 Windows 系统支持的机制。
select

curl中封装了 poll 和 select 这两个网络 I/O 模型。

curl 源码中使用 HAVE_POLL_FINE 宏,来控制两者。

定义 HAVE_POLL_FINE 宏,使用 poll, 否则使用 select。

相关代码 在 select.h 和 select.c 的 Curl_poll 中封装。这里将进行解析。

// Linux 下 pollfd 结构体		select.h 定义
typedef int curl_socket_t;
#define CURL_SOCKET_BAD -1

#define POLLIN      0x01	// 读 不会阻塞
#define POLLPRI     0x02	// 有紧迫数据可读。
#define POLLOUT     0x04	// 写 不会阻塞
#define POLLERR     0x08	// POLLERR 意味着 socket 发生了一个异步错误。在 TCP 中,它通常表示已经接收到了一个 RST,或者已经发出去了一个 RST。
							// 如果 fd 不是一个 socket,则 POLLERR 可能表示设备不支持 polling.
							// 对于上述的这两种标识,fd 可能处于 open 状态,还没有被关闭(但是 shutdown() 函数可能已经调用了)。
#define POLLHUP     0x10	// POLLHUP 意味着 socket 连接已经中断。 在 TCP 中,意味着已经接收到了 FIN 并且也已经发出去了。
#define POLLNVAL    0x20	// POLLNVAL 意味着 fd 是无效的,不代表任何已打开的文件。

struct pollfd
{
    curl_socket_t fd;		// 文件描述符
    short   events;			// 请求的事件
    short   revents;		// 返回的事件
};

// Windows 下 pollfd 结构体		WinSock2.h (Widnows 下的头文件)
typedef struct pollfd {

    SOCKET  fd;
    SHORT   events;
    SHORT   revents;

} WSAPOLLFD, *PWSAPOLLFD, FAR *LPWSAPOLLFD;
/*
这个函数是对 poll() 的封装。如果 poll() 不存在,则使用 select() 替代。
如果使用的是 select(),并且文件描述符 fd 太大,超过了 FD_SETSIZE,则返回 error。
如果传入的 timeout 值是一个负数,则会无限的等待,直到没有有效的 fd 被提供。
当发生这种情况(没有有效的 fd)时,则负数 timeout 值会被忽略,且函数会立即超时。

返回值:
	-1 = 系统调用错误或 fd >= FD_SETSIZE.
	0 = timeout.
	N = 返回的 pollfd 结构体的个数,且其中的 revents 成员不为 0.
*/
int Curl_poll(struct pollfd ufds[], unsigned int nfds, timediff_t timeout_ms)
{
#ifdef HAVE_POLL_FINE
  int pending_ms;
#else
  fd_set fds_read;
  fd_set fds_write;
  fd_set fds_err;
  curl_socket_t maxfd;
#endif
  bool fds_none = TRUE;							// 用于验证传入的 ufds 数组是否有效
  unsigned int i;
  int r;
  
  // 检测所有 fd 中是否存在有效的 fd。
  // 如果至少存在一个有效的 fd,则 fds_none 置为 false,停止检测
  if(ufds) {
    for(i = 0; i < nfds; i++) {
      if(ufds[i].fd != CURL_SOCKET_BAD) {
        fds_none = FALSE;
        break;
      }
    }
  }

  // 如果所有的 fd 都是无效的(即 bad socket, -1),则等待一段时间后,直接返回。
  if(fds_none) {
    /* no sockets, just wait */
    return Curl_wait_ms(timeout_ms);					// 此函数会随后进行分析
  }

  /* Avoid initial timestamp, avoid Curl_now() call, when elapsed
     time in this function does not need to be measured. This happens
     when function is called with a zero timeout or a negative timeout
     value indicating a blocking call should be performed. */

#ifdef HAVE_POLL_FINE			// Linux 下 可以使用 poll 

  /* prevent overflow, timeout_ms is typecast to int. */
#if TIMEDIFF_T_MAX > INT_MAX
  if(timeout_ms > INT_MAX)
    timeout_ms = INT_MAX;
#endif
  //当传入的 timeout 值是一个负数(阻塞情形)或者0时,则无需衡量 elapsed time(实耗时间).
  if(timeout_ms > 0)
    pending_ms = (int)timeout_ms;
  else if(timeout_ms < 0)					// 为负数,则 poll 无限等待
    pending_ms = -1;
  else										// 为0,则 poll 会立即返回,即使没有可用的 events。
    pending_ms = 0;
  r = poll(ufds, nfds, pending_ms);			// 真正调用 poll()
  if(r <= 0)								// poll() 调用失败 (<0)		超时(=0)
    return r;

  for(i = 0; i < nfds; i++) {
    if(ufds[i].fd == CURL_SOCKET_BAD)
      continue;
    if(ufds[i].revents & POLLHUP)
      ufds[i].revents |= POLLIN;						// fd 仍然可能读
    if(ufds[i].revents & POLLERR)
      ufds[i].revents |= POLLIN|POLLOUT;				// fd 仍然可能读写
  }

#else  /* HAVE_POLL_FINE */	// Windows 下 可以使用 select 

  // 每次调用 select() 前都需要重新初始化 fdset,因为它们既是输入参数又是输出参数。
  FD_ZERO(&fds_read);
  FD_ZERO(&fds_write);
  FD_ZERO(&fds_err);
  maxfd = (curl_socket_t)-1;

  for(i = 0; i < nfds; i++) {
    ufds[i].revents = 0;
    if(ufds[i].fd == CURL_SOCKET_BAD)						// 跳过无效的 fd
      continue;
    VERIFY_SOCK(ufds[i].fd);								// 检测是否 0 <= fd < FD_SETSIZE,超出这个范围,则返回-1.
    if(ufds[i].events & (POLLIN|POLLOUT|POLLPRI|
                         POLLRDNORM|POLLWRNORM|POLLRDBAND)) {
      if(ufds[i].fd > maxfd)								// 获取到最大的 fd,做为 select() 的第一个参数。
        maxfd = ufds[i].fd;
      if(ufds[i].events & (POLLRDNORM|POLLIN))
        FD_SET(ufds[i].fd, &fds_read);
      if(ufds[i].events & (POLLWRNORM|POLLOUT))
        FD_SET(ufds[i].fd, &fds_write);
      if(ufds[i].events & (POLLRDBAND|POLLPRI))
        FD_SET(ufds[i].fd, &fds_err);
    }
  }

  /*
     Note also that WinSock ignores the first argument, so we don't worry
     about the fact that maxfd is computed incorrectly with WinSock (since
     curl_socket_t is unsigned in such cases and thus -1 is the largest
     value).
  */
  r = our_select(maxfd, &fds_read, &fds_write, &fds_err, timeout_ms);			// 封装的 select 函数
  if(r <= 0)
    return r;

  // select() 调用成功, 统计其中状态发生改变的 fd 的个数,保存至 r.
  r = 0;
  for(i = 0; i < nfds; i++) {
    ufds[i].revents = 0;
    if(ufds[i].fd == CURL_SOCKET_BAD)
      continue;
    if(FD_ISSET(ufds[i].fd, &fds_read)) {
      if(ufds[i].events & POLLRDNORM)
        ufds[i].revents |= POLLRDNORM;
      if(ufds[i].events & POLLIN)
        ufds[i].revents |= POLLIN;
    }
    if(FD_ISSET(ufds[i].fd, &fds_write)) {
      if(ufds[i].events & POLLWRNORM)
        ufds[i].revents |= POLLWRNORM;
      if(ufds[i].events & POLLOUT)
        ufds[i].revents |= POLLOUT;
    }
    if(FD_ISSET(ufds[i].fd, &fds_err)) {
      if(ufds[i].events & POLLRDBAND)
        ufds[i].revents |= POLLRDBAND;
      if(ufds[i].events & POLLPRI)
        ufds[i].revents |= POLLPRI;
    }
    if(ufds[i].revents != 0)
      r++;
  }

#endif  /* HAVE_POLL_FINE */

  return r;
}

/*
 这个函数用于等待特定的时间值。在函数 Curl_socket_ready() 以及 Curl_poll() 中被调用。
 当没有提供任何 fd 来检测时,则只是等待特定的一段时间。
 如果是在 windows 平台下,则 winsock 中的 poll() 以及 select() 超时机制,需要一个有效的 socket fd.
 这个函数不允许无限等待,如果传入的值是0或者负数,则立即返回。
 超时时间的精度以及最大值,取决于系统。

 返回值:
	-1 = 系统调用错误,或无效的输入值(timeout),或被中断。
	0 = 指定的时间已经超时
*/
int Curl_wait_ms(timediff_t timeout_ms)
{
  int r = 0;

  if(!timeout_ms)					// 超时值为0,立即返回
    return 0;
  if(timeout_ms < 0) {				// 超时不能为负数
    SET_SOCKERRNO(EINVAL);
    return -1;
  }
#if defined(MSDOS)
  delay(timeout_ms);
#elif defined(WIN32)
  /* prevent overflow, timeout_ms is typecast to ULONG/DWORD. */
#if TIMEDIFF_T_MAX >= ULONG_MAX
  if(timeout_ms >= ULONG_MAX)
    timeout_ms = ULONG_MAX-1;
    /* don't use ULONG_MAX, because that is equal to INFINITE */
#endif
  Sleep((ULONG)timeout_ms);						// windows 下直接 sleep 等待时间
#else
#if defined(HAVE_POLL_FINE)
  /* prevent overflow, timeout_ms is typecast to int. */
#if TIMEDIFF_T_MAX > INT_MAX
  if(timeout_ms > INT_MAX)
    timeout_ms = INT_MAX;
#endif
  r = poll(NULL, 0, (int)timeout_ms);			// 使用 poll 实现 sleep功能
#else
  {
    struct timeval pending_tv;
    timediff_t tv_sec = timeout_ms / 1000;
    timediff_t tv_usec = (timeout_ms % 1000) * 1000; /* max=999999 */
#ifdef HAVE_SUSECONDS_T
#if TIMEDIFF_T_MAX > TIME_T_MAX
    /* tv_sec overflow check in case time_t is signed */
    if(tv_sec > TIME_T_MAX)
      tv_sec = TIME_T_MAX;
#endif
    pending_tv.tv_sec = (time_t)tv_sec;
    pending_tv.tv_usec = (suseconds_t)tv_usec;
#else
#if TIMEDIFF_T_MAX > INT_MAX
    /* tv_sec overflow check in case time_t is signed */
    if(tv_sec > INT_MAX)
      tv_sec = INT_MAX;
#endif
    pending_tv.tv_sec = (int)tv_sec;
    pending_tv.tv_usec = (int)tv_usec;
#endif
    r = select(0, NULL, NULL, NULL, &pending_tv);			//如果 不支持poll 则使用 select 实现 sleep 功能
  }
#endif /* HAVE_POLL_FINE */
#endif /* USE_WINSOCK */

  // 确保返回值 r 只能为-1(超时失败)或者0(超时成功)。
  // r 不可能大于0,因为传入到 poll() 函数的 fds 数组是NULL。如果 poll 的返回值>0,则说明调用出问题了,select 同理。
  // 故这里会将 r 置为 -1,即调用超时失败。
  if(r)
    r = -1;
  return r;
}

/*
这个函数是对 select() 函数的封装,以提高 windows 下的兼容性。
传入 timeout 值为负值时,表示该函数无限等待。
如果没有给出有效的文件描述符,在这种情况下,则忽略 timeout 为负值,立即返回。

返回值:
	-1 = 系统调用错误或 fd >= FD_SETSIZE.
	0 = timeout.
	N = 文件描述符数量.
*/
static int our_select(curl_socket_t maxfd,   /* highest socket number */
                      fd_set *fds_read,      /* sockets ready for reading */
                      fd_set *fds_write,     /* sockets ready for writing */
                      fd_set *fds_err,       /* sockets with errors */
                      timediff_t timeout_ms) /* milliseconds to wait */
{
  struct timeval pending_tv;
  struct timeval *ptimeout;

#ifdef USE_WINSOCK
  /* WinSock select() can't handle zero events.  See the comment below. */
  if((!fds_read || fds_read->fd_count == 0) &&
     (!fds_write || fds_write->fd_count == 0) &&
     (!fds_err || fds_err->fd_count == 0)) {
    /* no sockets, just wait */
    return Curl_wait_ms(timeout_ms);		// 已解析,相当于 sleep 的封装
  }
#endif
  // select() 的 timeout 参数 设置
  ptimeout = &pending_tv;
  if(timeout_ms < 0) {
    ptimeout = NULL;
  }
  else if(timeout_ms > 0) {
    timediff_t tv_sec = timeout_ms / 1000;
    timediff_t tv_usec = (timeout_ms % 1000) * 1000; /* max=999999 */
#ifdef HAVE_SUSECONDS_T
#if TIMEDIFF_T_MAX > TIME_T_MAX
    /* tv_sec overflow check in case time_t is signed */
    if(tv_sec > TIME_T_MAX)
      tv_sec = TIME_T_MAX;
#endif
    pending_tv.tv_sec = (time_t)tv_sec;
    pending_tv.tv_usec = (suseconds_t)tv_usec;
#elif defined(WIN32) /* maybe also others in the future */
#if TIMEDIFF_T_MAX > LONG_MAX
    /* tv_sec overflow check on Windows there we know it is long */
    if(tv_sec > LONG_MAX)
      tv_sec = LONG_MAX;
#endif
    pending_tv.tv_sec = (long)tv_sec;
    pending_tv.tv_usec = (long)tv_usec;
#else
#if TIMEDIFF_T_MAX > INT_MAX
    /* tv_sec overflow check in case time_t is signed */
    if(tv_sec > INT_MAX)
      tv_sec = INT_MAX;
#endif
    pending_tv.tv_sec = (int)tv_sec;
    pending_tv.tv_usec = (int)tv_usec;
#endif
  }
  else {
    pending_tv.tv_sec = 0;
    pending_tv.tv_usec = 0;
  }

#ifdef USE_WINSOCK
  /* WinSock select() must not be called with an fd_set that contains zero
    fd flags, or it will return WSAEINVAL.  But, it also can't be called
    with no fd_sets at all!  From the documentation:

    Any two of the parameters, readfds, writefds, or exceptfds, can be
    given as null. At least one must be non-null, and any non-null
    descriptor set must contain at least one handle to a socket.

    It is unclear why WinSock doesn't just handle this for us instead of
    calling this an error. Luckily, with WinSock, we can _also_ ask how
    many bits are set on an fd_set. So, let's just check it beforehand.
  */
  return select((int)maxfd + 1,													// 又见 +1
                fds_read && fds_read->fd_count ? fds_read : NULL,
                fds_write && fds_write->fd_count ? fds_write : NULL,
                fds_err && fds_err->fd_count ? fds_err : NULL, ptimeout);		// 真正调用 select 函数的地方, 对第2、3、4参数进行了优化
#else
  return select((int)maxfd + 1, fds_read, fds_write, fds_err, ptimeout);
#endif
}

curl DNS解析

asyn

域名解析是网络连接的第一步,curl 使用了一个状态机来管理网络连接状态。

开始了解一下DNS的解析流程吧~

重点代码在 multi.c 中,通过 Curl_connect 开始,逐步进入 DNS解析线程。

multi.c 中

static CURLMcode multi_runsingle(struct Curl_multi *multi,
                                 struct curltime *nowp,
                                 struct Curl_easy *data)
{
  ...
  switch(data->mstate) {								// 可以在这里设置断点,来跟踪curl运行流程,来理解代码
	
    ...
    case CURLM_STATE_CONNECT:
      ...
      result = Curl_connect(data, &async, &protocol_connected);			// 连接函数,此函数会随后进行分析
	  ...
      if(!result) {
        if(async)														// 返回是 ture,异步的
          /* We're now waiting for an asynchronous name lookup */
          multistate(data, CURLM_STATE_WAITRESOLVE);					// 将状态 设置成 CURLM_STATE_WAITRESOLVE 解析DNS状态
        else {
          ...
        }
      }
      break;
          
  }
  ...  
}

curl 的域名解析有同步和异步两种方式,默认是异步的方式。异步域名解析的接口定义在 asyn.h 这个头文件中。

绕绕绕,来依次进行调用跟踪吧。

url.c 中

CURLcode Curl_connect(struct Curl_easy *data,
                      bool *asyncp,
                      bool *protocol_done)
{
  CURLcode result;
  struct connectdata *conn;

  *asyncp = FALSE; /* assume synchronous resolves by default */
  ...
  result = create_conn(data, &conn, asyncp);		// 创建连接 asyncp 返回是同步还是异步
  
}

static CURLcode create_conn(struct Curl_easy *data,
                            struct connectdata **in_connect,
                            bool *async)
{
  CURLcode result = CURLE_OK;
  struct connectdata *conn; 
  ...
  result = resolve_server(data, conn, async);		// 在创建连接的过程中,开启了 DNS解析服务 asyncp 返回是同步还是异步
  ...
}

static CURLcode resolve_server(struct Curl_easy *data,
                               struct connectdata *conn,
                               bool *async)
{
  CURLcode result = CURLE_OK;
  timediff_t timeout_ms = Curl_timeleft(data, NULL, TRUE);
  ...
  struct Curl_dns_entry *hostaddr = NULL;
  ...
  rc = Curl_resolv_timeout(data, conn->hostname_resolve, (int)conn->port,
                               &hostaddr, timeout_ms);			// DNS解析,调用 超时函数
  if(rc == CURLRESOLV_PENDING)				      // 返回 CURLRESOLV_PENDING 解析待办中
	*async = TRUE;								  // DNS是异步解析的 因此此时设置为 TURE
}

hostip.c中

enum resolve_t Curl_resolv_timeout(struct Curl_easy *data,
                                   const char *hostname,
                                   int port,
                                   struct Curl_dns_entry **entry,
                                   timediff_t timeoutms)
{
  enum resolve_t rc;
  *entry = NULL;
  // 相关超时操作
  ...
  rc = Curl_resolv(data, hostname, port, TRUE, entry);		// Curl DNS 解析函数
  ...
}

enum resolve_t Curl_resolv(struct Curl_easy *data,
                           const char *hostname,
                           int port,
                           bool allowDOH,
                           struct Curl_dns_entry **entry)
{
  struct Curl_dns_entry *dns = NULL;
  CURLcode result;
  enum resolve_t rc = CURLRESOLV_ERROR; /* default to failure */
  struct connectdata *conn = data->conn;
  ...
  int respwait = 0;
  ...
  // Curl_getaddrinfo 式开始DNS解析 获取IP地址
  // 通过 CURLRES_SYNCH 宏 来控制,解析 DNS 是同步还是异步 因为 curl默认是 异步的,这里我们也只讨论异步情况
  // 同步:函数封装在 hostip4.c 和 hostip6.c 中
  // 异步:函数封装在 hostasyn.c 中
  addr = Curl_getaddrinfo(data,									
#ifdef DEBUGBUILD
						(data->set.str[STRING_DEVICE]
                        && !strcmp(data->set.str[STRING_DEVICE],
                        "LocalHost"))?"localhost":
#endif
                         hostname, port, &respwait);
 if(!addr) {
      if(respwait) {				// 异步 DNS解析
		/* the response to our resolve call will come asynchronously at a later time, good or bad */
        /* First, check that we haven't received the info by now */
        result = Curl_resolv_check(data, &dns);			// 检测 DNS 因为是异步的,不会马上能获取到
        if(result) /* error detected */
          return CURLRESOLV_ERROR;
        if(dns)											// 因为是异步的,不会马上能获取到
          rc = CURLRESOLV_RESOLVED; /* pointer provided */
        else
          rc = CURLRESOLV_PENDING; /* no info yet */	// 此时会 设置 CURLRESOLV_PENDING 进行返回
      }
    }
}

hostasyn.c 中

struct Curl_addrinfo *Curl_getaddrinfo(struct Curl_easy *data,
                                       const char *hostname,
                                       int port,
                                       int *waitp)
{
  return Curl_resolver_getaddrinfo(data, hostname, port, waitp);		// 千折百转 终于来到了重点
}

Curl_resolver_getaddrinfos 是域名解析的接口,具体实现有两种方式:asyn-thread 和 asyn-ares;

前者是在开启了一个线程然后调用系统的域名解析API,后者是使用 c-ares 这个库实现异步域名解析。

默认情况下,curl 使用的是 asyn-thread, 如果你想使用 asyn-ares, 需要打开 USE_ARES 这个编译选项。

这里解析的 asyn-thread ,asyn-ares 大体一致,不做讲解了。

asyn-thread.c

struct Curl_addrinfo *Curl_resolver_getaddrinfo(struct Curl_easy *data,
                                                const char *hostname,
                                                int port,
                                                int *waitp)
{
  struct addrinfo hints;							// addrinfo 中的 sockaddr 记录了 套接字中的目标地址和端口信息
  int pf = PF_INET;
  struct resdata *reslv = (struct resdata *)data->state.async.resolver;

  *waitp = 0; /* default to synchronous response */

#ifdef CURLRES_IPV6
  /*
   * Check if a limited name resolve has been requested.
   */
  switch(data->set.ipver) {
  case CURL_IPRESOLVE_V4:			// IPv4
    pf = PF_INET;
    break;
  case CURL_IPRESOLVE_V6:			// IPv6
    pf = PF_INET6;
    break;
  default:
    pf = PF_UNSPEC;
    break;
  }

  if((pf != PF_INET) && !Curl_ipv6works(data))
    /* The stack seems to be a non-IPv6 one */
    pf = PF_INET;
#endif /* CURLRES_IPV6 */

  memset(&hints, 0, sizeof(hints));
  hints.ai_family = pf;
  hints.ai_socktype = (data->conn->transport == TRNSPRT_TCP)?
    SOCK_STREAM : SOCK_DGRAM;								// tcp : SOCK_STREAM (流套接字)   udp : SOCK_DGRAM(报文套接字)

  reslv->start = Curl_now();
  /* fire up a new resolver thread! */
  if(init_resolve_thread(data, hostname, port, &hints)) {			// 异步 线程 dns 解析 
    *waitp = 1; /* expect asynchronous response */
    return NULL;
  }

  failf(data, "getaddrinfo() thread failed to start");
  return NULL;

}

// 初始化 DNS解析 线程
static bool init_resolve_thread(struct Curl_easy *data,
                                const char *hostname, int port,
                                const struct addrinfo *hints)
{
  struct thread_data *td = calloc(1, sizeof(struct thread_data));		// new thread_data 
  int err = ENOMEM;
  struct Curl_async *asp = &data->state.async;							// 从 Curl_easy 中 获取到 dns 异步 数据,并进行初始化

  data->state.async.tdata = td;											// 将 new 的 thread_data 赋值到 Curl_easy 中
  if(!td)
    goto errno_exit;

  asp->port = port;														// 此时,已有端口信息
  asp->done = FALSE;
  asp->status = 0;
  asp->dns = NULL;														// Curl_dns_entry 设空
  td->thread_hnd = curl_thread_t_null;									//  线程未启动 句柄为 0

  if(!init_thread_sync_data(td, hostname, port, hints)) {
    asp->tdata = NULL;
    free(td);
    goto errno_exit;
  }

  free(asp->hostname);
  asp->hostname = strdup(hostname);
  if(!asp->hostname)
    goto err_exit;

  /* The thread will set this to 1 when complete. */
  td->tsd.done = 0;												// 将线程 状态 设置了 0

  // curl curl_threads 中对线程进行的封装
  //  Widnows 下使用了临界区, Linux 下使用互斥锁 对线程进行保护
#ifdef HAVE_GETADDRINFO
  td->thread_hnd = Curl_thread_create(getaddrinfo_thread, &td->tsd);
#else
  td->thread_hnd = Curl_thread_create(gethostbyname_thread, &td->tsd);
#endif

  if(!td->thread_hnd) {											// 创建线程 失败判断
    /* The thread never started, so mark it as done here for proper cleanup. */
    td->tsd.done = 1;
    err = errno;
    goto err_exit;
  }

  return TRUE;

 err_exit:
  destroy_async_data(asp);

 errno_exit:
  errno = err;
  return FALSE;
}

// 线程 dns 解析 获取 ip地址 信息 
static unsigned int CURL_STDCALL getaddrinfo_thread(void *arg)			
{

  struct thread_sync_data *tsd = (struct thread_sync_data *)arg;
  struct thread_data *td = tsd->td;
  char service[12];
  int rc;
#ifdef USE_SOCKETPAIR
  char buf[1];
#endif

  msnprintf(service, sizeof(service), "%d", tsd->port);		// 端口转换 字符串

  // Curl_getaddrinfo_ex 封装 了 getaddrinfo 函数,通过主机名 端口,获取 ip信息
  rc = Curl_getaddrinfo_ex(tsd->hostname, service, &tsd->hints, &tsd->res);	

  if(rc != 0) {
    tsd->sock_error = SOCKERRNO?SOCKERRNO:rc;
    if(tsd->sock_error == 0)
      tsd->sock_error = RESOLVER_ENOMEM;
  }
  else {
    Curl_addrinfo_set_port(tsd->res, tsd->port);			// 对 addrinfo 设置 端口信息
  }

  Curl_mutex_acquire(tsd->mtx);
  if(tsd->done) {											// 启动线程时设置为0,应该不会走到
    /* too late, gotta clean up the mess */
    Curl_mutex_release(tsd->mtx);
    destroy_thread_sync_data(tsd);						 	 // 线程已经解析DNS 对其 进行释放
    free(td);
  }
  else {
#ifdef USE_SOCKETPAIR
    if(tsd->sock_pair[1] != CURL_SOCKET_BAD) {						// DNS已解析,客户端 通知 服务端 已经解析
      /* DNS has been resolved, signal client task */
      buf[0] = 1;
      if(swrite(tsd->sock_pair[1],  buf, sizeof(buf)) < 0) {		// 只写了个 1 
        /* update sock_erro to errno */
        tsd->sock_error = SOCKERRNO;
      }
    }
#endif
    tsd->done = 1;													// 解析器已经执行完毕 ,设置为 1 做过了
    Curl_mutex_release(tsd->mtx);
  }

  return 0;
}

最后看一看如何解析的

curl_addrinfo.c

int
Curl_getaddrinfo_ex(const char *nodename,
                    const char *servname,
                    const struct addrinfo *hints,
                    struct Curl_addrinfo **result)
{
  const struct addrinfo *ai;
  struct addrinfo *aihead;
  struct Curl_addrinfo *cafirst = NULL;
  struct Curl_addrinfo *calast = NULL;
  struct Curl_addrinfo *ca;
  size_t ss_size;
  int error;

  *result = NULL; /* assume failure */
  // 通过 系统 api getaddrinfo 函数 获取的 ip 地址信息 
  // 参数1:nodename, host 或者IP地址 
  // 参数2:servname, 十进制端口号 或者常用服务名称如"ftp"、"http"等
  // 参数3:hints,获取信息要求设置
  // 参数4:&aihead, 获取信息结果
  // 使用 getaddrinfo 后,记得 freeaddrinfo 释放。
  error = getaddrinfo(nodename, servname, hints, &aihead);
  if(error)
    return error;

  /* traverse the addrinfo list */

  for(ai = aihead; ai != NULL; ai = ai->ai_next) {
    size_t namelen = ai->ai_canonname ? strlen(ai->ai_canonname) + 1 : 0;
    /* ignore elements with unsupported address family, */
    /* settle family-specific sockaddr structure size.  */
    if(ai->ai_family == AF_INET)
      ss_size = sizeof(struct sockaddr_in);
#ifdef ENABLE_IPV6
    else if(ai->ai_family == AF_INET6)							// IPv6
      ss_size = sizeof(struct sockaddr_in6);
#endif
    else
      continue;

    /* ignore elements without required address info */
    if((ai->ai_addr == NULL) || !(ai->ai_addrlen > 0))
      continue;

    /* ignore elements with bogus address size */
    if((size_t)ai->ai_addrlen < ss_size)
      continue;

    ca = malloc(sizeof(struct Curl_addrinfo) + ss_size + namelen);
    if(!ca) {
      error = EAI_MEMORY;
      break;
    }

    /* copy each structure member individually, member ordering, */
    /* size, or padding might be different for each platform.    */

    ca->ai_flags     = ai->ai_flags;
    ca->ai_family    = ai->ai_family;
    ca->ai_socktype  = ai->ai_socktype;
    ca->ai_protocol  = ai->ai_protocol;
    ca->ai_addrlen   = (curl_socklen_t)ss_size;
    ca->ai_addr      = NULL;
    ca->ai_canonname = NULL;
    ca->ai_next      = NULL;

    ca->ai_addr = (void *)((char *)ca + sizeof(struct Curl_addrinfo));
    memcpy(ca->ai_addr, ai->ai_addr, ss_size);

    if(namelen) {
      ca->ai_canonname = (void *)((char *)ca->ai_addr + ss_size);
      memcpy(ca->ai_canonname, ai->ai_canonname, namelen);
    }

    /* if the return list is empty, this becomes the first element */
    if(!cafirst)
      cafirst = ca;

    /* add this element last in the return list */
    if(calast)
      calast->ai_next = ca;
    calast = ca;

  }

  /* destroy the addrinfo list */
  if(aihead)
    freeaddrinfo(aihead);

  /* if we failed, also destroy the Curl_addrinfo list */
  if(error) {
    Curl_freeaddrinfo(cafirst);
    cafirst = NULL;
  }
  else if(!cafirst) {
#ifdef EAI_NONAME
    /* rfc3493 conformant */
    error = EAI_NONAME;
#else
    /* rfc3493 obsoleted */
    error = EAI_NODATA;
#endif
#ifdef USE_WINSOCK
    SET_SOCKERRNO(error);
#endif
  }

  *result = cafirst;

  /* This is not a CURLcode */
  return error;
}

至此,我们了解了,如何开始异步解析的。

现在讨论一下,curl 是如果根据状态机来,完成相关流程的。

我们在这里先讨论 CURLM_STATE_INIT、CURLM_STATE_CONNECT、CURLM_STATE_WAITRESOLVE。

这三个状态下即完成了域名解析,后续专题我们在后续讲解。

multi.c 中

static CURLMcode multi_runsingle(struct Curl_multi *multi,
                                 struct curltime *nowp,
                                 struct Curl_easy *data)
{
  ...
  switch(data->mstate) {								// 可以在这里设置断点,来跟踪curl运行流程,来理解代码
    case CURLM_STATE_INIT:								// 第一次会进入 CURLM_STATE_INIT case
      /* init this transfer. */
      result = Curl_pretransfer(data);

      if(!result) {
        /* after init, go CONNECT */
        multistate(data, CURLM_STATE_CONNECT);			// 初始化后,将状态 设置成 CURLM_SlTATE_CONNECT 连接状态
        *nowp = Curl_pgrsTime(data, TIMER_STARTOP);
        rc = CURLM_CALL_MULTI_PERFORM;
      }
      break;
          
	...
    case CURLM_STATE_CONNECT:								// 连接状态,连接时,解析DNS
      ...
      result = Curl_connect(data, &async, &protocol_connected);			// 连接函数,此函数会随后进行分析
	  ...
      if(!result) {
        if(async)														// 返回是 ture,异步的
          /* We're now waiting for an asynchronous name lookup */
          multistate(data, CURLM_STATE_WAITRESOLVE);					// 将状态 设置成 CURLM_STATE_WAITRESOLVE 解析DNS状态
        else {
          ...
        }
      }
      break;
          
	case CURLM_STATE_WAITRESOLVE:										// 等待解析DNS 状态
  	/* awaiting an asynch name resolve to complete */
    {
      struct Curl_dns_entry *dns = NULL;
      struct connectdata *conn = data->conn;
      const char *hostname;

     
      ...
      dns = Curl_fetch_addr(data, hostname, (int)conn->port);			// 之前讲过的DNS哈希表,看看能否拾取到DNS信息

      if(!dns)					
        // dns 是 NULL,表示哈希表中没有信息,因此要 自己获取
        // 在DNS解析的过程中,如果已经解析完毕 已经讲解析的内容记录在了 Curl_easy 
        // 这里 check 一下看看,是否能够得到 DNS信息
        result = Curl_resolv_check(data, &dns);							
		
      /* Update sockets here, because the socket(s) may have been
         closed and the application thus needs to be told, even if it
         is likely that the same socket(s) will again be used further
         down.  If the name has not yet been resolved, it is likely
         that new sockets have been opened in an attempt to contact
         another resolver. */
      singlesocket(multi, data);

      if(dns) {			// 这回如果能够得到 DNS 信息
        /* Perform the next step in the connection phase, and then move on to the WAITCONNECT state */
        // 如果域名解析成功,便开始了 新的历程
        // 在 Curl_once_resolved 中,会调用 Curl_setup_conn 开始了 套接字连接
        result = Curl_once_resolved(data, &protocol_connected);		

        if(result)
          /* if Curl_once_resolved() returns failure, the connection struct is already freed and gone */
          data->conn = NULL; /* no more connection */
        else {									// 恭喜恭喜 套接字连接成功了,但是不往下讲了~ 嘿嘿~
          /* call again please so that we get the next socket setup */
          rc = CURLM_CALL_MULTI_PERFORM;
          if(protocol_connected)
            multistate(data, CURLM_STATE_DO);
          else {
#ifndef CURL_DISABLE_HTTP
            if(Curl_connect_ongoing(data->conn))
              multistate(data, CURLM_STATE_WAITPROXYCONNECT);
            else
#endif
              multistate(data, CURLM_STATE_WAITCONNECT);
          }
        }
      }
	  ...
    }
    break;   
}

可以看到,不同状态干不同事,C语言的状态机也只能这么实现…

multi_runsingle() 函数,是在 curl_multi_perform() 函数里调用。

multi.c 中

CURLMcode curl_multi_perform(struct Curl_multi *multi, int *running_handles)
{
  struct Curl_easy *data;
  CURLMcode returncode = CURLM_OK;
  struct Curl_tree *t;
  struct curltime now = Curl_now();

  if(!GOOD_MULTI_HANDLE(multi))
    return CURLM_BAD_HANDLE;

  if(multi->in_callback)
    return CURLM_RECURSIVE_API_CALL;

  data = multi->easyp;
  while(data) {										// 这里是遍历 Curl_multi 中的 每个 Curl_easy, 并不是轮询处理 状态机 状态
    CURLMcode result;
    SIGPIPE_VARIABLE(pipe_st);

    sigpipe_ignore(data, &pipe_st);
    result = multi_runsingle(multi, &now, data);
    sigpipe_restore(&pipe_st);

    if(result)
      returncode = result;

    data = data->next; /* operate on next handle */
  }

easy.c 中

static CURLcode easy_transfer(struct Curl_multi *multi)
{
  bool done = FALSE;
  CURLMcode mcode = CURLM_OK;
  CURLcode result = CURLE_OK;

  while(!done && !mcode) {									// 这里才 真正是处理 状态sel机轮询的地方
    int still_running = 0;

    mcode = curl_multi_poll(multi, NULL, 0, 1000, NULL);	// 使用 poll 来达到 sleep 效果

    if(!mcode)
      mcode = curl_multi_perform(multi, &still_running);	// 轮询 调用 curl_multi_perform 从而 调用 multi_runsingle 

    /* only read 'still_running' if curl_multi_perform() return OK */
    if(!mcode && !still_running) {
      int rc;
      CURLMsg *msg = curl_multi_info_read(multi, &rc);
      if(msg) {
        result = msg->data.result;
        done = TRUE;
      }
    }
  }

  /* Make sure to return some kind of error if there was a multi problem */
  if(mcode) {
    result = (mcode == CURLM_OUT_OF_MEMORY) ? CURLE_OUT_OF_MEMORY :
              /* The other multi errors should never happen, so return
                 something suitably generic */
              CURLE_BAD_FUNCTION_ARGUMENT;
  }

  return result;
}

// easy_perform 都很了解吧~
static CURLcode easy_perform(struct Curl_easy *data, bool events)
{
  struct Curl_multi *multi;
  CURLMcode mcode;
  CURLcode result = CURLE_OK;
  ...
      
  if(data->multi_easy)			// 如果 Curl_easy 有所属的 Curl_multi
    multi = data->multi_easy;
  else {						// 如果 Curl_easy 没有所属的 Curl_multi
    /* this multi handle will only ever have a single easy handled attached
       to it, so make it use minimal hashes */
    multi = Curl_multi_handle(1, 3);	// 会创建一个 Curl_multi 
    if(!multi)
      return CURLE_OUT_OF_MEMORY;
    data->multi_easy = multi;
  }
  ...
  mcode = curl_multi_add_handle(multi, data);		// 最终都会把 Curl_easy 加入到 Curl_multi 中,然后从 Curl_multi 开始
  ...
  /* run the transfer */
  result = events ? easy_events(multi) : easy_transfer(multi);		
  ...
}

这样下来,对 curl 整体流程、工作原理,和 域名解析都有了进一步了解。

在这里插入图片描述

curl 网络连接

域名解析出IP地址以后,正式开始了网络连接。

connect.h 和 connect.c 是处理网络连接的相关文件。让我们看看他是如何实现的异步连接。

connect

上回书我们说到, 如果域名解析成功,便开始了 新的历程,现在继续。

muliti.c 中

static CURLMcode multi_runsingle(struct Curl_multi *multi,
                                 struct curltime *nowp,
                                 struct Curl_easy *data)
{
  ...
  switch(data->mstate) {		// 可以在这里设置断点,来跟踪curl运行流程,来理解代码
  ...
  case CURLM_STATE_WAITRESOLVE:				// 等待解析DNS 状态
  {
    ...
	if(dns) {													// 这回如果能够得到 DNS 信息
	/* Perform the next step in the connection phase, and then move on to the WAITCONNECT state */
	// 如果域名解析成功,便开始了 新的历程
    // 在 Curl_once_resolved 中,会调用 Curl_setup_conn 开始了 套接字连接
    result = Curl_once_resolved(data, &protocol_connected);
    
    if(result)
		/* if Curl_once_resolved() returns failure, the connection struct
             is already freed and gone */
		data->conn = NULL; /* no more connection */
    else {						// 恭喜恭喜 套接字连接成功了
		/* call again please so that we get the next socket setup */
		rc = CURLM_CALL_MULTI_PERFORM;
		if(protocol_connected)
          multistate(data, CURLM_STATE_DO);
        else {
#ifndef CURL_DISABLE_HTTP
          if(Curl_connect_ongoing(data->conn))
              multistate(data, CURLM_STATE_WAITPROXYCONNECT);
          else
#endif
              multistate(data, CURLM_STATE_WAITCONNECT);	// 将状态设置成 CURLM_STATE_WAITCONNECT 等待连接状态
       }
    }
  }
  break;
  
  ...
  case CURLM_STATE_WAITCONNECT:				// 等待连接状态
      /* awaiting a completion of an asynch TCP connect */
      DEBUGASSERT(data->conn);
      result = Curl_is_connected(data, data->conn, FIRSTSOCKET, &connected);	// 判断套接字 是否连接
      if(connected && !result) {
#ifndef CURL_DISABLE_HTTP
        if(
#ifndef CURL_DISABLE_PROXY
          (data->conn->http_proxy.proxytype == CURLPROXY_HTTPS &&
           !data->conn->bits.proxy_ssl_connected[FIRSTSOCKET]) ||
#endif
          Curl_connect_ongoing(data->conn)) {
          multistate(data, CURLM_STATE_WAITPROXYCONNECT);
          break;
        }
#endif
        rc = CURLM_CALL_MULTI_PERFORM;
#ifndef CURL_DISABLE_PROXY
        multistate(data,
                   data->conn->bits.tunnel_proxy?
                   CURLM_STATE_WAITPROXYCONNECT:
                   CURLM_STATE_SENDPROTOCONNECT);	// 如果连接上了 并非使用代理的话 则设置 CURLM_STATE_SENDPROTOCONNECT 发送协议连接过程
#else
        multistate(data, CURLM_STATE_SENDPROTOCONNECT);
#endif
      }
      else if(result) {
        /* failure detected */
        Curl_posttransfer(data);
        multi_done(data, result, TRUE);
        stream_error = TRUE;
        break;
      }
      break;
}

url.c 中

CURLcode Curl_setup_conn(struct Curl_easy *data,
                         bool *protocol_done)
{
  CURLcode result = CURLE_OK;
  ..
  if(CURL_SOCKET_BAD == conn->sock[FIRSTSOCKET]) {
    conn->bits.tcpconnect[FIRSTSOCKET] = FALSE;
    result = Curl_connecthost(data, conn, conn->dns_entry);
    if(result)
      return result;
}

connect 中

CURLcode Curl_connecthost(struct Curl_easy *data,
                          struct connectdata *conn,  /* context */
                          const struct Curl_dns_entry *remotehost)
{
  CURLcode result = CURLE_COULDNT_CONNECT;
  ...
  conn->num_addr = Curl_num_addresses(remotehost->addr);				//  通过 DNS解析后 获取几个 网络地址
  conn->tempaddr[0] = conn->tempaddr[1] = remotehost->addr;				//  只取了 两份地址 进行操作
  conn->tempsock[0] = conn->tempsock[1] = CURL_SOCKET_BAD;
  ...
  for(i = 0; (i < 2) && result; i++) {
    while(conn->tempaddr[i]) {
      result = singleipconnect(data, conn, conn->tempaddr[i], i);		 // 将两份地址进行网络连接 如果有一个成功即返回
      if(!result)
        break;
      ainext(conn, i, TRUE);
  }
  ...
}
    
// 对给定的 IP 地址做单一的连接,因为是异步的,它可能在没有连接的情况下返回。
// 这是重点关注的函数,内部实现异步连接过程
static CURLcode singleipconnect(struct Curl_easy *data,
                                struct connectdata *conn,
                                const struct Curl_addrinfo *ai,
                                int tempindex)
{
  struct Curl_sockaddr_ex addr;
  int rc = -1;
  int error = 0;
  bool isconnected = FALSE;
  curl_socket_t sockfd;
  CURLcode result;
  char ipaddress[MAX_IPADR_LEN];
  long port;
  bool is_tcp;
  ...
  curl_socket_t *sockp = &conn->tempsock[tempindex];		// 获取传入的 套接字信息
  *sockp = CURL_SOCKET_BAD;   
  
  result = Curl_socket(data, ai, &addr, &sockfd);		   // 创建了一个套接字
  if(result)
    return result;
 
  /* store remote address and port used in this connection attempt */
  //  将DNS解析的地址 转换为 ip地址(字符串),端口(int)
  if(!Curl_addr2string((struct sockaddr*)&addr.sa_addr, addr.addrlen,
                       ipaddress, &port)) {
   /* malformed address or bug in inet_ntop, try next address */
   failf(data, "sa_addr inet_ntop() failed with errno %d: %s",
          errno, Curl_strerror(errno, buffer, sizeof(buffer)));
    Curl_closesocket(data, conn, sockfd);
    return CURLE_OK;				// 打印错误信息并返回
  }
  ...
  if(is_tcp && data->set.tcp_nodelay)
    tcpnodelay(data, sockfd);									 //  设置为 不使用Nagle算法
  ...
  if(is_tcp && data->set.tcp_keepalive)				
    tcpkeepalive(data, sockfd);									// 设置为 保持连接 状态
  
  ...
  (void)curlx_nonblock(sockfd, TRUE);							// 根据不同平台 将套接字 设置非阻塞状态
  
  /* Connect TCP and QUIC sockets */
  if(!isconnected && (conn->transport != TRNSPRT_UDP)) {		// 不存在服务器连接 ,连接服务器
    if(conn->bits.tcp_fastopen) {
#if defined(CONNECT_DATA_IDEMPOTENT) /* Darwin */
#  if defined(HAVE_BUILTIN_AVAILABLE)
      /* while connectx function is available since macOS 10.11 / iOS 9,
         it did not have the interface declared correctly until
         Xcode 9 / macOS SDK 10.13 */
      if(__builtin_available(macOS 10.11, iOS 9.0, tvOS 9.0, watchOS 2.0, *)) {
        sa_endpoints_t endpoints;
        endpoints.sae_srcif = 0;
        endpoints.sae_srcaddr = NULL;
        endpoints.sae_srcaddrlen = 0;
        endpoints.sae_dstaddr = &addr.sa_addr;
        endpoints.sae_dstaddrlen = addr.addrlen;

        rc = connectx(sockfd, &endpoints, SAE_ASSOCID_ANY,
                      CONNECT_RESUME_ON_READ_WRITE | CONNECT_DATA_IDEMPOTENT,
                      NULL, 0, NULL, NULL);									// macOS 连接服务器
      }
      else {
        rc = connect(sockfd, &addr.sa_addr, addr.addrlen);
      }
#  else
      rc = connect(sockfd, &addr.sa_addr, addr.addrlen);
#  endif /* HAVE_BUILTIN_AVAILABLE */
#elif defined(TCP_FASTOPEN_CONNECT) /* Linux >= 4.11 */
      if(setsockopt(sockfd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
                    (void *)&optval, sizeof(optval)) < 0)
        infof(data, "Failed to enable TCP Fast Open on fd %d\n", sockfd);

      rc = connect(sockfd, &addr.sa_addr, addr.addrlen);		// new Linux 连接服务器 
#elif defined(MSG_FASTOPEN) /* old Linux */
      if(conn->given->flags & PROTOPT_SSL)
        rc = connect(sockfd, &addr.sa_addr, addr.addrlen);		// old Linux 连接服务器
      else
        rc = 0; /* Do nothing */
#endif
    }
    else {
      rc = connect(sockfd, &addr.sa_addr, addr.addrlen);				// Windows 连接服务器
    }
      
    if(-1 == rc)
      error = SOCKERRNO;
#ifdef ENABLE_QUIC
    else if(conn->transport == TRNSPRT_QUIC) {
      /* pass in 'sockfd' separately since it hasn't been put into the
         tempsock array at this point */
      result = Curl_quic_connect(data, conn, sockfd, tempindex,
                                 &addr.sa_addr, addr.addrlen);
      if(result)
        error = SOCKERRNO;
    }
#endif
  }
  else {					// 如果存在服务器连接,则把服务器套接字直接返回
    *sockp = sockfd;
    return CURLE_OK;
  }
  ...
}

// 讲解一下 对套接字相关设置的实现
static void
tcpkeepalive(struct Curl_easy *data,
             curl_socket_t sockfd)
{
  int optval = data->set.tcp_keepalive?1:0;

  /* only set IDLE and INTVL if setting KEEPALIVE is successful */
  // 通过 setsockopt 设置为 保持连接 状态
  if(setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE,
        (void *)&optval, sizeof(optval)) < 0) {
    infof(data, "Failed to set SO_KEEPALIVE on fd %d\n", sockfd);
  }
  else {					// 设置成功后
#if defined(SIO_KEEPALIVE_VALS)
    struct tcp_keepalive vals;
    DWORD dummy;
    vals.onoff = 1;
    optval = curlx_sltosi(data->set.tcp_keepidle);
    KEEPALIVE_FACTOR(optval);
    vals.keepalivetime = optval;
    optval = curlx_sltosi(data->set.tcp_keepintvl);
    KEEPALIVE_FACTOR(optval);
    vals.keepaliveinterval = optval;
	// 如果使用完全端口机制,设置 SIO_KEEPALIVE_VALS 后,
	// 当网络连接断开后,TCP STACK并不主动告诉上层的应用程序,但是当下一次RECV或者SEND操作后,马上就会返回错误告诉上层这个连接已经断开了
    if(WSAIoctl(sockfd, SIO_KEEPALIVE_VALS, (LPVOID) &vals, sizeof(vals),
                NULL, 0, &dummy, NULL, NULL) != 0) {
      infof(data, "Failed to set SIO_KEEPALIVE_VALS on fd %d: %d\n",
            (int)sockfd, WSAGetLastError());
    }
#else
	  // 通过 TCP_KEEPIDLE 、TCP_KEEPINTVL、TCP_KEEPCNT 检测网线非法断开
	  // TCP_KEEPDILE 设置连接上如果没有数据发送的话,多久后发送keepalive探测分组,单位是秒
	  // TCP_KEEPINTVL 前后两次探测之间的时间间隔,单位是秒
	  // TCP_KEEPCNT 关闭一个非活跃连接之前的最大重试次数		
#ifdef TCP_KEEPIDLE
    optval = curlx_sltosi(data->set.tcp_keepidle);
    KEEPALIVE_FACTOR(optval);
    if(setsockopt(sockfd, IPPROTO_TCP, TCP_KEEPIDLE,
          (void *)&optval, sizeof(optval)) < 0) {
      infof(data, "Failed to set TCP_KEEPIDLE on fd %d\n", sockfd);
    }
#endif
#ifdef TCP_KEEPINTVL
    optval = curlx_sltosi(data->set.tcp_keepintvl);
    KEEPALIVE_FACTOR(optval);
    if(setsockopt(sockfd, IPPROTO_TCP, TCP_KEEPINTVL,
          (void *)&optval, sizeof(optval)) < 0) {
      infof(data, "Failed to set TCP_KEEPINTVL on fd %d\n", sockfd);
    }
#endif
#ifdef TCP_KEEPALIVE
    /* Mac OS X style */
    optval = curlx_sltosi(data->set.tcp_keepidle);
    KEEPALIVE_FACTOR(optval);
    if(setsockopt(sockfd, IPPROTO_TCP, TCP_KEEPALIVE,
          (void *)&optval, sizeof(optval)) < 0) {
      infof(data, "Failed to set TCP_KEEPALIVE on fd %d\n", sockfd);
    }
#endif
#endif
  }
}

这里看一看暴露在 connect.h 中有用的函数。

connect.h 中

...
// 判断 Curl_easy 是否是连接的
CURLcode Curl_is_connected(struct Curl_easy *data,
                           struct connectdata *conn,
                           int sockindex,
                           bool *connected);										

// Curl_easy 连接主机(服务器)
CURLcode Curl_connecthost(struct Curl_easy *data,
                          struct connectdata *conn,
                          const struct Curl_dns_entry *host);

/* generic function that returns how much time there's left to run, according
   to the timeouts set */
// 根据设置的超时值,返回 Curl_easy 剩余运行时间
timediff_t Curl_timeleft(struct Curl_easy *data,
                         struct curltime *nowp,
                         bool duringconnect);
// 默认连接超时时间,设置为5分钟
#define DEFAULT_CONNECT_TIMEOUT 300000 /* milliseconds == five minutes */

/*
 * Used to extract socket and connectdata struct for the most recent
 * transfer on the given Curl_easy.
 *
 * The returned socket will be CURL_SOCKET_BAD in case of failure!
 */ 
// 获取 Curl_easy 的网络连接信息
curl_socket_t Curl_getconnectinfo(struct Curl_easy *data,
                                  struct connectdata **connp);
// 将解析的DNS服务器 转换为 IP地址(字符串) 和端口(int)
bool Curl_addr2string(struct sockaddr *sa, curl_socklen_t salen,
                      char *addr, long *port);

/*
 * Check if a connection seems to be alive.
 */
// 根据 连接信息,判断是否存活
bool Curl_connalive(struct connectdata *conn);
...

curl 通信过程

已经讲到了 网络状态 CURLM_STATE_SENDPROTOCONNECT 发送协议连接过程。

CURLM_STATE_SENDPROTOCONNECT 跳转 CURLM_STATE_DO 开始发送请求状态。

CURLM_STATE_DO 跳转 CURLM_STATE_DO_DONE 完成发送请求状态。

CURLM_STATE_DO_DONE 跳转 CURLM_STATE_PERFORM 传输数据状态。

这里不对 CURLM_STATE_DO 、CURLM_STATE_DO_DONE 做讲解,重点看一下 curl 是如何进行通信传输数据的。

multi.c 中

static CURLMcode multi_runsingle(struct Curl_multi *multi,
                                 struct curltime *nowp,
                                 struct Curl_easy *data)
{
  	...
  	switch(data->mstate) {		// 可以在这里设置断点,来跟踪curl运行流程,来理解代码
  	...
    case CURLM_STATE_PERFORM:
    {
      char *newurl = NULL;
      bool retry = FALSE;
      bool comeback = FALSE;
      DEBUGASSERT(data->state.buffer);
      /* check if over send speed */
      send_timeout_ms = 0;
      if(data->set.max_send_speed > 0)
        send_timeout_ms = Curl_pgrsLimitWaitTime(data->progress.uploaded,
                                                 data->progress.ul_limit_size,
                                                 data->set.max_send_speed,
                                                 data->progress.ul_limit_start,
                                                 *nowp);									// 发送数据 限速处理  转换 时间差

      /* check if over recv speed */
      recv_timeout_ms = 0;
      if(data->set.max_recv_speed > 0)
        recv_timeout_ms = Curl_pgrsLimitWaitTime(data->progress.downloaded,
                                                 data->progress.dl_limit_size,
                                                 data->set.max_recv_speed,
                                                 data->progress.dl_limit_start,
                                                 *nowp);									// 接收数据 限速处理

      if(send_timeout_ms || recv_timeout_ms) {
        Curl_ratelimit(data, *nowp);			// 速率限制计算,根据师 时间戳和传输大小
		// 果然速率 过大,切换 为 CURLM_STATE_TOOFAST  等待,因为超出了限制速率
		// 再在 CURLM_STATE_TOOFAST 中等待 达到限制速率后,在切换回 CURLM_STATE_PERFORM 传输数据状态
		// 通过 CURLM_STATE_PERFORM 和 CURLM_STATE_TOOFAST 两者状态切换 达到限时效果
        multistate(data, CURLM_STATE_TOOFAST);						
        if(send_timeout_ms >= recv_timeout_ms)
          Curl_expire(data, send_timeout_ms, EXPIRE_TOOFAST);
        else
          Curl_expire(data, recv_timeout_ms, EXPIRE_TOOFAST);
        break;
      }

      /* read/write data if it is ready to do so */
      result = Curl_readwrite(data->conn, data, &done, &comeback);			// 这里进入了重点,进行读写操作

      if(done || (result == CURLE_RECV_ERROR)) {
        /* If CURLE_RECV_ERROR happens early enough, we assume it was a race
         * condition and the server closed the re-used connection exactly when
         * we wanted to use it, so figure out if that is indeed the case.
         */
        CURLcode ret = Curl_retry_request(data, &newurl);		// 如果接收错误,curl 重试请求
        if(!ret)
          retry = (newurl)?TRUE:FALSE;
        else if(!result)
          result = ret;

        if(retry) {
          /* if we are to retry, set the result to OK and consider the
             request as done */
          result = CURLE_OK;
          done = TRUE;
        }
      }
      ...
      break;
    }  
}
transfer

transfer.c 中

CURLcode Curl_readwrite(struct connectdata *conn,
                        struct Curl_easy *data,
                        bool *done,
                        bool *comeback)
{
  struct SingleRequest *k = &data->req;
  CURLcode result;
  int didwhat = 0;

  curl_socket_t fd_read;
  curl_socket_t fd_write;
  int select_res = conn->cselect_bits;

  conn->cselect_bits = 0;

  /* only use the proper socket if the *_HOLD bit is not set simultaneously as
     then we are in rate limiting state in that transfer direction */

  if((k->keepon & KEEP_RECVBITS) == KEEP_RECV)
    fd_read = conn->sockfd;
  else
    fd_read = CURL_SOCKET_BAD;

  if((k->keepon & KEEP_SENDBITS) == KEEP_SEND)
    fd_write = conn->writesockfd;
  else
    fd_write = CURL_SOCKET_BAD;
  
  ...
  if(!select_res) /* Call for select()/poll() only, if read/write/error
                     status is not known. */
	// 在 检测套接字 时候 Curl_socket_check 调用了 Curl_poll 将网络IO模型 设置 为 poll 或者 select 已经讲过了
    select_res = Curl_socket_check(fd_read, CURL_SOCKET_BAD, fd_write, 0);		
  
  ...
  if((k->keepon & KEEP_RECV) && (select_res & CURL_CSELECT_IN)) {					// 有 接收数据 
    result = readwrite_data(data, conn, k, &didwhat, done, comeback);				// 读取数据
    if(result || *done)
      return result;
  }

  /* If we still have writing to do, we check if we have a writable socket. */
  if((k->keepon & KEEP_SEND) && (select_res & CURL_CSELECT_OUT)) {				// 有 发送数据
    /* write */

    result = readwrite_upload(data, conn, &didwhat);							// 写入数据
    if(result)
      return result;
  }
  ...
}


static CURLcode readwrite_data(struct Curl_easy *data,
                               struct connectdata *conn,
                               struct SingleRequest *k,
                               int *didwhat, bool *done,
                               bool *comeback)
{
  CURLcode result = CURLE_OK;
  ssize_t nread; /* number of bytes read */
  size_t excess = 0; /* excess bytes read */
  bool readmore = FALSE; /* used by RTP to signal for more data */
  int maxloops = 100;
  char *buf = data->state.buffer;
    
  ...
  do {
    bool is_empty_data = FALSE;
    size_t buffersize = data->set.buffer_size;
    size_t bytestoread = buffersize;
    
    ...
    if(bytestoread) {
      /* receive data from the network! */
      result = Curl_read(data, conn->sockfd, buf, bytestoread, &nread); // 处理读操作,接收数据
	
    ...
  } while(data_pending(data) && maxloops--);
  ...
}
    
static CURLcode readwrite_upload(struct Curl_easy *data,
                                 struct connectdata *conn,
                                 int *didwhat)
{
  ssize_t i, si;
  ssize_t bytes_written;
  CURLcode result;
  ssize_t nread; /* number of bytes read */
  bool sending_http_headers = FALSE;
  struct SingleRequest *k = &data->req;
  
  ... 
  do {
    ...
    result = Curl_write(data,
                        conn->writesockfd,  /* socket to send to */
                        k->upload_fromhere, /* buffer pointer */
                        k->upload_present,  /* buffer size */
                        &bytes_written);    /* actually sent */      	// 处理写操作,发送数据
   if(result)
      return result;
   ...
  }while(0); /* just to break out from! */
}
sendf

sendf.c 中

CURLcode Curl_read(struct Curl_easy *data,   /* transfer */
                   curl_socket_t sockfd,     /* read from this socket */
                   char *buf,                /* store read data here */
                   size_t sizerequested,     /* max amount to read */
                   ssize_t *n)               /* amount bytes read */
{
  CURLcode result = CURLE_RECV_ERROR;
  ssize_t nread = 0;
  size_t bytesfromsocket = 0;
  char *buffertofill = NULL;
  struct connectdata *conn = data->conn;

  /* Set 'num' to 0 or 1, depending on which socket that has been sent here.
     If it is the second socket, we set num to 1. Otherwise to 0. This lets
     us use the correct ssl handle. */
  int num = (sockfd == conn->sock[SECONDARYSOCKET]);
  
  nread = conn->recv[num](data, num, buffertofill, bytesfromsocket, &result);		// 我们深究一下 recv[] 
  if(nread < 0)
    return result;
  ...
}

CURLcode Curl_write(struct Curl_easy *data,
                    curl_socket_t sockfd,
                    const void *mem,
                    size_t len,
                    ssize_t *written)
{
  ssize_t bytes_written;
  CURLcode result = CURLE_OK;
  struct connectdata *conn;
  int num;
  DEBUGASSERT(data);
  DEBUGASSERT(data->conn);
  conn = data->conn;
  num = (sockfd == conn->sock[SECONDARYSOCKET]);

  bytes_written = conn->send[num](data, num, mem, len, &result);			// 我们深究一下 send[]
  *written = bytes_written;
  if(bytes_written >= 0)
    /* we completely ignore the curlcode value when subzero is not returned */
    return CURLE_OK; 
  ...
}

url.c 中

static CURLcode create_conn(struct Curl_easy *data,
                            struct connectdata **in_connect,
                            bool *async)
{
  ...
  result = setup_connection_internals(data, conn);
  if(result)
    goto out;

  conn->recv[FIRSTSOCKET] = Curl_recv_plain;					// 原来 recv[] 就是 回调 Curl_recv_plain 函数
  conn->send[FIRSTSOCKET] = Curl_send_plain;					// 原来 send[] 就是 回调 Curl_send_plain 函数
  conn->recv[SECONDARYSOCKET] = Curl_recv_plain;
  conn->send[SECONDARYSOCKET] = Curl_send_plain;
  ...
}

在返回到 sendf.c

// 接收数据 回调
ssize_t Curl_recv_plain(struct Curl_easy *data, int num, char *buf,
                        size_t len, CURLcode *code)
{
  struct connectdata *conn;
  curl_socket_t sockfd;
  ssize_t nread;
  DEBUGASSERT(data);
  DEBUGASSERT(data->conn);
  conn = data->conn;
  sockfd = conn->sock[num];
  /* Check and return data that already received and storied in internal
     intermediate buffer */
  nread = get_pre_recved(conn, num, buf, len);
  if(nread > 0) {
    *code = CURLE_OK;
    return nread;
  }

  nread = sread(sockfd, buf, len);								// 真正 recv 数据的地方 sread 是 recv 函数的宏定义

  *code = CURLE_OK;
  if(-1 == nread) {
    int err = SOCKERRNO;

    if(
#ifdef WSAEWOULDBLOCK
      /* This is how Windows does it */
      (WSAEWOULDBLOCK == err)
#else
      /* errno may be EWOULDBLOCK or on some systems EAGAIN when it returned
         due to its inability to send off data without blocking. We therefore
         treat both error codes the same here */
      (EWOULDBLOCK == err) || (EAGAIN == err) || (EINTR == err)
#endif
      ) {
      /* this is just a case of EWOULDBLOCK */
      *code = CURLE_AGAIN;
    }
    else {
      char buffer[STRERROR_LEN];
      failf(data, "Recv failure: %s",
            Curl_strerror(err, buffer, sizeof(buffer)));
      data->state.os_errno = err;
      *code = CURLE_RECV_ERROR;
    }
  }
  return nread;
}

// 发送数据 回调
ssize_t Curl_send_plain(struct Curl_easy *data, int num,
                        const void *mem, size_t len, CURLcode *code)
{
  struct connectdata *conn;
  curl_socket_t sockfd;
  ssize_t bytes_written;

  DEBUGASSERT(data);
  DEBUGASSERT(data->conn);
  conn = data->conn;
  sockfd = conn->sock[num];
  /* WinSock will destroy unread received data if send() is
     failed.
     To avoid lossage of received data, recv() must be
     performed before every send() if any incoming data is
     available. */
  if(pre_receive_plain(data, conn, num)) {
    *code = CURLE_OUT_OF_MEMORY;
    return -1;
  }

#if defined(MSG_FASTOPEN) && !defined(TCP_FASTOPEN_CONNECT) /* Linux */
  if(conn->bits.tcp_fastopen) {
    bytes_written = sendto(sockfd, mem, len, MSG_FASTOPEN,
                           conn->ip_addr->ai_addr, conn->ip_addr->ai_addrlen);
    conn->bits.tcp_fastopen = FALSE;
  }
  else
#endif
    bytes_written = swrite(sockfd, mem, len);				// 真正 send 数据的地方 swrite 是 send 函数的宏定义

  *code = CURLE_OK;
  if(-1 == bytes_written) {
    int err = SOCKERRNO;

    if(
#ifdef WSAEWOULDBLOCK
      /* This is how Windows does it */
      (WSAEWOULDBLOCK == err)
#else
      /* errno may be EWOULDBLOCK or on some systems EAGAIN when it returned
         due to its inability to send off data without blocking. We therefore
         treat both error codes the same here */
      (EWOULDBLOCK == err) || (EAGAIN == err) || (EINTR == err) ||
      (EINPROGRESS == err)
#endif
      ) {
      /* this is just a case of EWOULDBLOCK */
      bytes_written = 0;
      *code = CURLE_AGAIN;
    }
    else {
      char buffer[STRERROR_LEN];
      failf(data, "Send failure: %s",
            Curl_strerror(err, buffer, sizeof(buffer)));
      data->state.os_errno = err;
      *code = CURLE_SEND_ERROR;
    }
  }
  return bytes_written;
}

CURLM_STATE_PERFORM 通信过程以后,会跳转到 CURLM_STATE_DONE 数据传输后操作状态

CURLM_STATE_DONE 在跳转到 CURLM_STATE_COMPLETED 操作完成 状态

CURLM_STATE_COMPLETED 已经不做任何事 直接 break 掉了。

等你 调用 curl_easy_cleanup() 时,传入的参数是 curl_easy_init() 时 创建的句柄。

curl_easy_cleanup() 会调用 Curl_close() 函数,来清理资源和网络连接。

这里我们已经对curl 的工作流程简单的分析。并未对业务相关的进行分析。

众多的协议,包括DICT, FILE, FTP, FTPS, Gopher, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, POP3, POP3S, RTMP, RTSP, SCP, SFTP, SMTP, SMTPS, Telnet and TFTP等,如果想看这些协议的实现,你可以自己去继续探索 curl。

也欢迎你跟我一起交流探索开源库,共同进步。

  • 9
    点赞
  • 31
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
curl是一个命令行工具,用于通过URL访问网络资源。它可以在启动时提供需要访问的互联网地址,并告诉它是否要将数据保存在终端或文件中。与使用浏览器访问网页不同,curl通常在爬虫程序或网页分析程序中使用。它可以用于定期爬取网站、获取源码解析有用的部分,也可以用于企业内部系统的接口调用,附加GET类型的参数,并返回执行成功或失败的响应。在Linux系统中,curl是自带的,可以直接在命令行终端中使用。在Windows系统中,也可以使用curl,但需要单独安装。\[1\]\[2\]\[3\] #### 引用[.reference_title] - *1* [使用 curl 从命令行访问互联网 | Linux 中国](https://blog.csdn.net/weixin_33068055/article/details/111913096)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control_2,239^v3^insert_chatgpt"}} ] [.reference_item] - *2* *3* [windows下的安装与使用curl实现命令行访问Web网站](https://blog.csdn.net/oscar999/article/details/101546092)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^control_2,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值