workflow源码解析:http客户端(一)

1、 示例程序

程序从stdin读取http/https URL,抓取网页并把内容打印到stdout,并将请求和响应的http header打印在stderr。
为了简单起见,程序用Ctrl-C退出,但会保证所有资源先被完全释放。

#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <string>
#include "workflow/HttpMessage.h"
#include "workflow/HttpUtil.h"
#include "workflow/WFTaskFactory.h"

#ifndef _WIN32
#include <unistd.h>
#endif

#define REDIRECT_MAX    5
#define RETRY_MAX       2

void wget_callback(WFHttpTask *task)
{
	protocol::HttpRequest *req = task->get_req();
	protocol::HttpResponse *resp = task->get_resp();
	int state = task->get_state();
	int error = task->get_error();

	switch (state)
	{
	case WFT_STATE_SYS_ERROR:
		fprintf(stderr, "system error: %s\n", strerror(error));
		break;
	case WFT_STATE_DNS_ERROR:
		fprintf(stderr, "DNS error: %s\n", gai_strerror(error));
		break;
	case WFT_STATE_SSL_ERROR:
		fprintf(stderr, "SSL error: %d\n", error);
		break;
	case WFT_STATE_TASK_ERROR:
		fprintf(stderr, "Task error: %d\n", error);
		break;
	case WFT_STATE_SUCCESS:
		break;
	}

	if (state != WFT_STATE_SUCCESS)
	{
		fprintf(stderr, "Failed. Press Ctrl-C to exit.\n");
		return;
	}

	std::string name;
	std::string value;

	/* Print request. */
	fprintf(stderr, "%s %s %s\r\n", req->get_method(),
									req->get_http_version(),
									req->get_request_uri());

	protocol::HttpHeaderCursor req_cursor(req);

	while (req_cursor.next(name, value))
		fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
	fprintf(stderr, "\r\n");

	/* Print response header. */
	fprintf(stderr, "%s %s %s\r\n", resp->get_http_version(),
									resp->get_status_code(),
									resp->get_reason_phrase());

	protocol::HttpHeaderCursor resp_cursor(resp);

	while (resp_cursor.next(name, value))
		fprintf(stderr, "%s: %s\r\n", name.c_str(), value.c_str());
	fprintf(stderr, "\r\n");

	/* Print response body. */
	const void *body;
	size_t body_len;

	resp->get_parsed_body(&body, &body_len);
	fwrite(body, 1, body_len, stdout);
	fflush(stdout);

	fprintf(stderr, "\nSuccess. Press Ctrl-C to exit.\n");
}

void sig_handler(int signo) { }

int main(int argc, char *argv[])
{
	WFHttpTask *task;

	if (argc != 2)
	{
		fprintf(stderr, "USAGE: %s <http URL>\n", argv[0]);
		exit(1);
	}

	signal(SIGINT, sig_handler);

	std::string url = argv[1];
	if (strncasecmp(argv[1], "http://", 7) != 0 &&
		strncasecmp(argv[1], "https://", 8) != 0)
	{
		url = "http://" + url;
	}

	task = WFTaskFactory::create_http_task(url, REDIRECT_MAX, RETRY_MAX,
										   wget_callback);
	protocol::HttpRequest *req = task->get_req();
	req->add_header_pair("Accept", "*/*");
	req->add_header_pair("User-Agent", "Wget/1.14 (linux-gnu)");
	req->add_header_pair("Connection", "close");
	task->start();
#ifndef _WIN32
	pause();
#else
	getchar();
#endif
	return 0;
}

2、类继承关系

在这里插入图片描述

3、源码分析

  1. create_http_task
#HttpTaskImpl.cc
WFHttpTask *WFTaskFactory::create_http_task(const std::string& url,
											int redirect_max,
											int retry_max,
											http_callback_t callback)
{
	auto *task = new ComplexHttpTask(redirect_max,
									 retry_max,
									 std::move(callback));
	ParsedURI uri;

	URIParser::parse(url, uri);
	task->init(std::move(uri));
	task->set_keep_alive(HTTP_KEEPALIVE_DEFAULT);
	return task;
}
  1. ComplexHttpTask
#HttpTaskImpl.cc
class ComplexHttpTask : public WFComplexClientTask<HttpRequest, HttpResponse>
{
public:
	ComplexHttpTask(int redirect_max,
					int retry_max,
					http_callback_t&& callback):
		WFComplexClientTask(retry_max, std::move(callback)),
		redirect_max_(redirect_max),
		redirect_count_(0)
	{
		HttpRequest *client_req = this->get_req();

		client_req->set_method(HttpMethodGet);
		client_req->set_http_version("HTTP/1.1");
	}

protected:
	virtual CommMessageOut *message_out();
	virtual CommMessageIn *message_in();
	virtual int keep_alive_timeout();
	virtual bool init_success();
	virtual void init_failed();
	virtual bool finish_once();

protected:
	bool need_redirect(ParsedURI& uri);
	bool redirect_url(HttpResponse *client_resp, ParsedURI& uri);
	void set_empty_request();
	void check_response();

private:
	int redirect_max_;
	int redirect_count_;
};
  1. WFComplexClientTask
#WFTaskFactory.inl
template<class REQ, class RESP, typename CTX = bool>
class WFComplexClientTask : public WFClientTask<REQ, RESP>
{
protected:
	using task_callback_t = std::function<void (WFNetworkTask<REQ, RESP> *)>;

public:
	WFComplexClientTask(int retry_max, task_callback_t&& cb):
		WFClientTask<REQ, RESP>(NULL, WFGlobal::get_scheduler(), std::move(cb))
	{
		type_ = TT_TCP;
		fixed_addr_ = false;
		retry_max_ = retry_max;
		retry_times_ = 0;
		redirect_ = false;
		ns_policy_ = NULL;
		router_task_ = NULL;
	}

protected:
	// new api for children
	virtual bool init_success() { return true; }
	virtual void init_failed() {}
	virtual bool check_request() { return true; }
	virtual WFRouterTask *route();
	virtual bool finish_once() { return true; }

public:
	void init(const ParsedURI& uri)
	{
		uri_ = uri;
		init_with_uri();
	}

	void init(ParsedURI&& uri)
	{
		uri_ = std::move(uri);
		init_with_uri();
	}

	void init(TransportType type,
			  const struct sockaddr *addr,
			  socklen_t addrlen,
			  const std::string& info);

	void set_transport_type(TransportType type)
	{
		type_ = type;
	}

	TransportType get_transport_type() const { return type_; }

	virtual const ParsedURI *get_current_uri() const { return &uri_; }

	void set_redirect(const ParsedURI& uri)
	{
		redirect_ = true;
		init(uri);
	}

	void set_redirect(TransportType type, const struct sockaddr *addr,
					  socklen_t addrlen, const std::string& info)
	{
		redirect_ = true;
		init(type, addr, addrlen, info);
	}

	bool is_fixed_addr() const { return this->fixed_addr_; }

protected:
	void set_fixed_addr(int fixed) { this->fixed_addr_ = fixed; }

	void set_info(const std::string& info)
	{
		info_.assign(info);
	}

	void set_info(const char *info)
	{
		info_.assign(info);
	}

protected:
	virtual void dispatch();
	virtual SubTask *done();

	void clear_resp()
	{
		size_t size = this->resp.get_size_limit();

		this->resp.~RESP();
		new(&this->resp) RESP();
		this->resp.set_size_limit(size);
	}

	void disable_retry()
	{
		retry_times_ = retry_max_;
	}

protected:
	TransportType type_;
	ParsedURI uri_;
	std::string info_;
	bool fixed_addr_;
	bool redirect_;
	CTX ctx_;
	int retry_max_;
	int retry_times_;
	WFNSPolicy *ns_policy_;
	WFRouterTask *router_task_;
	RouteManager::RouteResult route_result_;
	WFNSTracing tracing_;

public:
	CTX *get_mutable_ctx() { return &ctx_; }

private:
	void clear_prev_state();
	void init_with_uri();
	bool set_port();
	void router_callback(void *t);
	void switch_callback(void *t);
};
  1. WFClientTask
#WFTask.inl
template<class REQ, class RESP>
class WFClientTask : public WFNetworkTask<REQ, RESP>
{
protected:
	virtual CommMessageOut *message_out()
	{
		/* By using prepare function, users can modify request after
		 * the connection is established. */
		if (this->prepare)
			this->prepare(this);

		return &this->req;
	}

	virtual CommMessageIn *message_in() { return &this->resp; }

protected:
	virtual WFConnection *get_connection() const
	{
		CommConnection *conn;

		if (this->target)
		{
			conn = this->CommSession::get_connection();
			if (conn)
				return (WFConnection *)conn;
		}

		errno = ENOTCONN;
		return NULL;
	}

protected:
	virtual SubTask *done()
	{
		SeriesWork *series = series_of(this);

		if (this->state == WFT_STATE_SYS_ERROR && this->error < 0)
		{
			this->state = WFT_STATE_SSL_ERROR;
			this->error = -this->error;
		}

		if (this->callback)
			this->callback(this);

		delete this;
		return series->pop();
	}

public:
	void set_prepare(std::function<void (WFNetworkTask<REQ, RESP> *)> prep)
	{
		this->prepare = std::move(prep);
	}

protected:
	std::function<void (WFNetworkTask<REQ, RESP> *)> prepare;

public:
	WFClientTask(CommSchedObject *object, CommScheduler *scheduler,
				 std::function<void (WFNetworkTask<REQ, RESP> *)>&& cb) :
		WFNetworkTask<REQ, RESP>(object, scheduler, std::move(cb))
	{
	}

protected:
	virtual ~WFClientTask() { }
};
  1. WFNetworkTask
#WFTask.h
template<class REQ, class RESP>
class WFNetworkTask : public CommRequest
{
public:
	/* start(), dismiss() are for client tasks only. */
	void start()
	{
		assert(!series_of(this));
		Workflow::start_series_work(this, nullptr);
	}

	void dismiss()
	{
		assert(!series_of(this));
		delete this;
	}

public:
	REQ *get_req() { return &this->req; }
	RESP *get_resp() { return &this->resp; }

public:
	void *user_data;

public:
	int get_state() const { return this->state; }
	int get_error() const { return this->error; }

	/* Call when error is ETIMEDOUT, return values:
	 * TOR_NOT_TIMEOUT, TOR_WAIT_TIMEOUT, TOR_CONNECT_TIMEOUT,
	 * TOR_TRANSMIT_TIMEOUT (send or receive).
	 * SSL connect timeout also returns TOR_CONNECT_TIMEOUT. */
	int get_timeout_reason() const { return this->timeout_reason; }

	/* Call only in callback or server's process. */
	long long get_task_seq() const
	{
		if (!this->target)
		{
			errno = ENOTCONN;
			return -1;
		}

		return this->get_seq();
	}

	int get_peer_addr(struct sockaddr *addr, socklen_t *addrlen) const;

	virtual WFConnection *get_connection() const = 0;

public:
	/* All in milliseconds. timeout == -1 for unlimited. */
	void set_send_timeout(int timeout) { this->send_timeo = timeout; }
	void set_receive_timeout(int timeout) { this->receive_timeo = timeout; }
	void set_keep_alive(int timeout) { this->keep_alive_timeo = timeout; }

public:
	/* Do not reply this request. */
	void noreply()
	{
		if (this->state == WFT_STATE_TOREPLY)
			this->state = WFT_STATE_NOREPLY;
	}

	/* Push reply data synchronously. */
	virtual int push(const void *buf, size_t size)
	{
		return this->scheduler->push(buf, size, this);
	}

	/* To check if the connection was closed before replying.
	   Always returns 'true' in callback. */
	bool closed() const
	{
		if (this->state == WFT_STATE_TOREPLY)
			return !this->get_target()->has_idle_conn();
		else
			return this->state != WFT_STATE_UNDEFINED;
	}

public:
	void set_callback(std::function<void (WFNetworkTask<REQ, RESP> *)> cb)
	{
		this->callback = std::move(cb);
	}

protected:
	virtual int send_timeout() { return this->send_timeo; }
	virtual int receive_timeout() { return this->receive_timeo; }
	virtual int keep_alive_timeout() { return this->keep_alive_timeo; }

protected:
	int send_timeo;
	int receive_timeo;
	int keep_alive_timeo;
	REQ req;
	RESP resp;
	std::function<void (WFNetworkTask<REQ, RESP> *)> callback;

protected:
	WFNetworkTask(CommSchedObject *object, CommScheduler *scheduler,
				  std::function<void (WFNetworkTask<REQ, RESP> *)>&& cb) :
		CommRequest(object, scheduler),
		callback(std::move(cb))
	{
		this->send_timeo = -1;
		this->receive_timeo = -1;
		this->keep_alive_timeo = 0;
		this->target = NULL;
		this->timeout_reason = TOR_NOT_TIMEOUT;
		this->user_data = NULL;
		this->state = WFT_STATE_UNDEFINED;
		this->error = 0;
	}

	virtual ~WFNetworkTask() { }
};
  1. CommRequest
# CommRequest.h
class CommRequest : public SubTask, public CommSession
{
public:
	CommRequest(CommSchedObject *object, CommScheduler *scheduler)
	{
		this->scheduler = scheduler;
		this->object = object;
		this->wait_timeout = 0;
	}

	CommSchedObject *get_request_object() const { return this->object; }
	void set_request_object(CommSchedObject *object) { this->object = object; }
	int get_wait_timeout() const { return this->wait_timeout; }
	void set_wait_timeout(int timeout) { this->wait_timeout = timeout; }

public:
	virtual void dispatch()
	{
		if (this->scheduler->request(this, this->object, this->wait_timeout,
									 &this->target) < 0)
		{
			this->handle(CS_STATE_ERROR, errno);
		}
	}

protected:
	int state;
	int error;

protected:
	CommTarget *target;
#define TOR_NOT_TIMEOUT			0
#define TOR_WAIT_TIMEOUT		1
#define TOR_CONNECT_TIMEOUT		2
#define TOR_TRANSMIT_TIMEOUT	3
	int timeout_reason;

protected:
	int wait_timeout;
	CommSchedObject *object;
	CommScheduler *scheduler;

protected:
	virtual void handle(int state, int error);
};

(1)应用程序调用start()时,调用到 WFNetworkTask类的

`void start()
	{
		assert(!series_of(this));
		Workflow::start_series_work(this, nullptr);
	}`

(2)

inline void
Workflow::start_series_work(SubTask *first, series_callback_t callback)
{
	new SeriesWork(first, std::move(callback));
	first->dispatch();
}

(3)调用 WFComplexClientTask<REQ, RESP, CTX>::dispatch() (这里为什么不是调用 CommRequest 的 dispatch

template<class REQ, class RESP, typename CTX>
void WFComplexClientTask<REQ, RESP, CTX>::dispatch()
{
	switch (this->state)
	{
	case WFT_STATE_UNDEFINED:       // 第一次是这个状态
		if (this->check_request())   // 这里直接return true 
		{
			if (this->route_result_.request_object)    // 第一次走着初始化是空的,直接到下面产生router_task_
			{
	case WFT_STATE_SUCCESS:   // 第二次就直接success了
				this->set_request_object(route_result_.request_object);
				this->WFClientTask<REQ, RESP>::dispatch(); //这里会调用到 CommRequest 的 dispatch
				return;
			}
			// 第一次直接过来了,产生route做dns解析
			// 产生一个router_task_插入到前面去做dns解析
			router_task_ = this->route();
			series_of(this)->push_front(this);
			series_of(this)->push_front(router_task_);
		}

	default:
		break;
	}

	this->subtask_done();
}

(4)CommRequest::dispatch 组成
dns解析完后,

this->WFClientTask<REQ, RESP>::dispatch();

调用CommRequest的dispatch

void CommRequest::dispatch()
{
	// 发送请求
	this->scheduler->request(this, this->object, this->wait_timeout,
								 &this->target);
	...
}

(5)scheduler 的reques执行的是

/* wait_timeout in microseconds, -1 for no timeout. */
	int request(CommSession *session, CommSchedObject *object,
				int wait_timeout, CommTarget **target)
	{
		int ret = -1;

		*target = object->acquire(wait_timeout);  //获取通信target
		if (*target)
		{
			ret = this->comm.request(session, *target); // 调用request去发request请求
			if (ret < 0)
				(*target)->release();
		}

		return ret;
	}

这里CommTarget 才是通讯目标,基本上就是ip+port, 还有两个超时参数。连接池什么的都在target里
(6)request_idle_conn()为复用连接,没有则创建新的连接 this->request_new_conn

int Communicator::request(CommSession *session, CommTarget *target)
{
	int errno_bak;

	if (session->passive)
	{
		errno = EINVAL;
		return -1;
	}

	errno_bak = errno;
	session->target = target;
	session->out = NULL;
	session->in = NULL;
	//有可复用连接
	if (this->request_idle_conn(session, target) < 0)
	{
	    //没有可复用连接
		if (this->request_new_conn(session, target) < 0)
		{
			session->conn = NULL;
			session->seq = 0;
			return -1;
		}
	}

	errno = errno_bak;
	return 0;
}

(7)有可复用的连接

int Communicator::request_idle_conn(CommSession *session, CommTarget *target)
{
	struct CommConnEntry *entry;
	struct list_head *pos;
	int ret = -1;

	while (1)
	{
	    //1. 寻找可以复用的连接
		pthread_mutex_lock(&target->mutex);
		if (!list_empty(&target->idle_list))
		{
			pos = target->idle_list.next;
			entry = list_entry(pos, struct CommConnEntry, list);
			list_del(pos);
			pthread_mutex_lock(&entry->mutex);
		}
		else
			entry = NULL;

		pthread_mutex_unlock(&target->mutex);
		if (!entry)
		{
			errno = ENOENT;
			return -1;
		}

		if (mpoller_set_timeout(entry->sockfd, -1, this->mpoller) >= 0)
			break;

		entry->state = CONN_STATE_CLOSING;
		pthread_mutex_unlock(&entry->mutex);
	}

    //2. 拼凑req请求,添加一些字段
	entry->session = session;
	session->conn = entry->conn;
	session->seq = entry->seq++;
	session->out = session->message_out();
	//3. 发送消息
	if (session->out)
		ret = this->send_message(entry);

	if (ret < 0)
	{
		entry->error = errno;
		mpoller_del(entry->sockfd, this->mpoller);
		entry->state = CONN_STATE_ERROR;
		ret = 1;
	}

	pthread_mutex_unlock(&entry->mutex);
	return ret;
}

(8)没有可复用的连接

int Communicator::request_new_conn(CommSession *session, CommTarget *target)
{
	struct CommConnEntry *entry;
	struct poller_data data;
	int timeout;

	entry = this->launch_conn(session, target);
	if (entry)
	{
		session->conn = entry->conn;
		session->seq = entry->seq++;
		data.operation = PD_OP_CONNECT;
		data.fd = entry->sockfd;
		data.ssl = NULL;
		data.context = entry;
		timeout = session->target->connect_timeout;
		if (mpoller_add(&data, timeout, this->mpoller) >= 0)
			return 0;

		this->release_conn(entry);
	}

	return -1;
}

(a)如果没有可以复用的连接,我们先去建立连接,然后把connect操作挂到epoll上面监听(异步connect)

struct CommConnEntry *Communicator::launch_conn(CommSession *session,
												CommTarget *target)
{
	struct CommConnEntry *entry;
	int sockfd;
	int ret;
    //1. connect 建立连接
	sockfd = this->nonblock_connect(target);
	if (sockfd >= 0)
	{
		entry = (struct CommConnEntry *)malloc(sizeof (struct CommConnEntry));
		if (entry)
		{
			ret = pthread_mutex_init(&entry->mutex, NULL);
			if (ret == 0)
			{
				//2. 创建新的CommConnection
	            // 然后初始化entry
				entry->conn = target->new_connection(sockfd);
				if (entry->conn)
				{
					entry->seq = 0;
					entry->mpoller = this->mpoller;
					entry->service = NULL;
					entry->target = target;
					entry->session = session;
					entry->ssl = NULL;
					entry->sockfd = sockfd;
					entry->state = CONN_STATE_CONNECTING;
					entry->ref = 1;
					return entry;
				}

				pthread_mutex_destroy(&entry->mutex);
			}
			else
				errno = ret;

			free(entry);
		}

		close(sockfd);
	}

	return NULL;
}

1、connect建立连接

int Communicator::nonblock_connect(CommTarget *target)
{
	// 创建cfd
	int sockfd = target->create_connect_fd();
	...
	// 设置非阻塞
	__set_fd_nonblock(sockfd)
	...
	// 然后调用connec连接
	if (connect(sockfd, target->addr, target->addrlen) >= 0 ||
		errno == EINPROGRESS)
	{
		return sockfd;
	}
	...
}

2、创建新的CommConnection

virtual CommConnection *new_connection(int connect_fd)
{
	return new CommConnection;
}

(b)异步connect (这里没懂,为什么会到这里)
然后我们poller检测出这个事件后

// __poller_thread_routines 中调用 __poller_handle_connect(node, poller);

static void __poller_handle_connect(struct __poller_node *node,
									poller_t *poller)
{
	socklen_t len = sizeof (int);
	int error;

	if (getsockopt(node->data.fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0)
		error = errno;

	if (__poller_remove_node(node, poller))
		return;
	...
   //放入消息队列
	poller->cb((struct poller_result *)node, poller->ctx);
}

poller->cb((struct poller_result *)node, poller->ctx);就是把node(res) 放入这个msg queue里

(c)Communicator::handler_thread_routine


void Communicator::handler_thread_routine(void *context)
{
	Communicator *comm = (Communicator *)context;
	struct poller_result *res;

	while ((res = (struct poller_result *)msgqueue_get(comm->queue)) != NULL)
	{
		switch (res->data.operation)
		{
		...
		case PD_OP_CONNECT:
		case PD_OP_SSL_CONNECT:
			comm->handle_connect_result(res);
			break;
		...
	}
}

(d)Communicator::handle_connect_result
于是来处理connect,发送entry,并把read放到epoll上间监听

void Communicator::handle_connect_result(struct poller_result *res)
{
	struct CommConnEntry *entry = (struct CommConnEntry *)res->data.context;
	CommSession *session = entry->session;
	CommTarget *target = entry->target;

	session->out = session->message_out();

	ret = this->send_message(entry);

	res->data.operation = PD_OP_READ;
	res->data.message = NULL;
	timeout = session->first_timeout();
	if (timeout == 0)
		timeout = Communicator::first_timeout_recv(session);
	else
	{
		session->timeout = -1;
		session->begin_time.tv_nsec = -1;
	}
	...
	mpoller_add(&res->data, timeout, this->mpoller);
	...
}

(9)send_message

int Communicator::send_message(struct CommConnEntry *entry)
{
	struct iovec vectors[ENCODE_IOV_MAX];
	struct iovec *end;
	int cnt;
    //消息序列化到vectors的数组
	cnt = entry->session->out->encode(vectors, ENCODE_IOV_MAX);
	...
	end = vectors + cnt;
	if (!entry->ssl)
	{
	    //发送消息
		cnt = this->send_message_sync(vectors, cnt, entry);
		if (cnt <= 0)
			return cnt;
	}

	return this->send_message_async(end - cnt, cnt, entry);
}

4、参考链接

https://github.com/chanchann/workflow_annotation/blob/main/src_analysis/18_http_01.md

  • 9
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值