八、Session的处理

event_accept小节曾经说过taskprocess成员是来自于listenerhandler成员,而listenerhandler成员是在解析配置文件时设置为process_session的。那么接下来就看看Haproxy最主要的任务处理过程是怎么样的。

[sec/sessiom.c]process_session()
/* Processes the client, server, request and response jobs of a session task,
 * then puts it back to the wait queue in a clean state, or cleans up its
 * resources if it must be deleted. Returns in <next> the date the task wants
 * to be woken up, or TICK_ETERNITY. In order not to call all functions for
 * nothing too many times, the request and response buffers flags are monitored
 * and each function is called only if at least another function has changed at
 * least one flag it is interested in.
 */

作者介绍说此函数用于处理会话的客户端、服务端,请求和响应相关的任务。进入此函数之前,TASK将会被从runqueue中移除,处理完之后任务将会被放入等待队列,或者需要删除的话,那么清除掉它的资源。函数的返回值中的超时时间用于表示下一次进入runqueue的时间。请求和响应缓冲区的处理函数只有在它感兴趣东西的相应标志被设置才会被调用。

[src/sessiom.c]process_session()
struct task *process_session(struct task *t)
{
	struct session *s = t->context;
	unsigned int rqf_last, rpf_last;
	unsigned int req_ana_back;

	//DPRINTF(stderr, "%s:%d: cs=%d ss=%d(%d) rqf=0x%08x rpf=0x%08x\n", __FUNCTION__, __LINE__,
	//        s->si[0].state, s->si[1].state, s->si[1].err_type, s->req->flags, s->rep->flags);

	/* this data may be no longer valid, clear it */
	memset(&s->txn.auth, 0, sizeof(s->txn.auth));

	/* This flag must explicitly be set every time */
	s->req->flags &= ~BF_READ_NOEXP;

	/* Keep a copy of req/rep flags so that we can detect shutdowns */
	rqf_last = s->req->flags;
	rpf_last = s->rep->flags;

	/* we don't want the stream interface functions to recursively wake us up */
	if (s->req->prod->owner == t)
		s->req->prod->flags |= SI_FL_DONT_WAKE;
	if (s->req->cons->owner == t)
		s->req->cons->flags |= SI_FL_DONT_WAKE;

	/* 1a: Check for low level timeouts if needed. We just set a flag on
	 * stream interfaces when their timeouts have expired.
	 */
	if (unlikely(t->state & TASK_WOKEN_TIMER)) {
		stream_int_check_timeouts(&s->si[0]);
		stream_int_check_timeouts(&s->si[1]);

在event_accept()中我们已经知道TASKcontext成员对应着与之关联的SESSION。接着将请求和响应缓冲区的FLAGS保存起来。如果对应的TASK存在,由于现在正在处理过程中,因此不让它们在stream interface相关的函数中被唤醒。声明下,如果有的话,此时的TASK仍然在runqueue中。如果本次任务得以运行是由于超时而被调度的话,那么需要检查更低层次的超时是否已经触发。

[src/stream_interface.c]process_session()->stream_int_check_timeouts()
/*
 * This function only has to be called once after a wakeup event in case of
 * suspected timeout. It controls the stream interface timeouts and sets
 * si->flags accordingly. It does NOT close anything, as this timeout may
 * be used for any purpose. It returns 1 if the timeout fired, otherwise
 * zero.
 */
int stream_int_check_timeouts(struct stream_interface *si)
{
	if (tick_is_expired(si->exp, now_ms)) {
		si->flags |= SI_FL_EXP;
		return 1;
	}
	return 0;
}

此函数控制stream interface的超时机制,如果stream interface的超时时间已经到了,那么设置SL_FL_EXP标志,并返回1,表示超时已经发生。否则直接返回0。

[src/sessiom.c]process_session()
		/* check buffer timeouts, and close the corresponding stream interfaces
		 * for future reads or writes. Note: this will also concern upper layers
		 * but we do not touch any other flag. We must be careful and correctly
		 * detect state changes when calling them.
		 */

		buffer_check_timeouts(s->req);

		if(unlikely((s->req->flags & (BF_SHUTW|BF_WRITE_TIMEOUT)) == BF_WRITE_TIMEOUT)) {
			s->req->cons->flags |= SI_FL_NOLINGER;
			s->req->cons->shutw(s->req->cons);
		}

		if (unlikely((s->req->flags & (BF_SHUTR|BF_READ_TIMEOUT)) == BF_READ_TIMEOUT))
			s->req->prod->shutr(s->req->prod);

		buffer_check_timeouts(s->rep);

		if (unlikely((s->rep->flags & (BF_SHUTW|BF_WRITE_TIMEOUT)) == BF_WRITE_TIMEOUT)) {
			s->rep->cons->flags |= SI_FL_NOLINGER;
			s->rep->cons->shutw(s->rep->cons);
		}

		if (unlikely((s->rep->flags & (BF_SHUTR|BF_READ_TIMEOUT)) == BF_READ_TIMEOUT))
			s->rep->prod->shutr(s->rep->prod);
	}

检查请求和响应缓冲区的读写是否超时。对REQ BUFFER的读写端与REP BUFFER的读写端进行超时标志检查,如果超时,那么关闭相应的stream interface,对于写端还会设置上SI_FL_NOLINGER标志,这是一个程序中自定义的类似于TCP_NOLINGER的选项。缓冲区超时检查对应的函数为buffer_check_timeout。

[src/buffers.c]process_session()->buffer_check_timeout()
/* Check buffer timeouts, and set the corresponding flags. The
 * likely/unlikely have been optimized for fastest normal path.
 * The read/write timeouts are not set if there was activity on the buffer.
 * That way, we don't have to update the timeout on every I/O. Note that the
 * analyser timeout is always checked.
 */
static inline void buffer_check_timeouts(struct buffer *b)
{
	if (likely(!(b->flags & (BF_SHUTR|BF_READ_TIMEOUT|BF_READ_ACTIVITY|BF_READ_NOEXP))) &&
	    unlikely(tick_is_expired(b->rex, now_ms)))
		b->flags |= BF_READ_TIMEOUT;

	if (likely(!(b->flags & (BF_SHUTW|BF_WRITE_TIMEOUT|BF_WRITE_ACTIVITY))) &&
	    unlikely(tick_is_expired(b->wex, now_ms)))
		b->flags |= BF_WRITE_TIMEOUT;

	if (likely(!(b->flags & BF_ANA_TIMEOUT)) &&
	    unlikely(tick_is_expired(b->analyse_exp, now_ms)))
		b->flags |= BF_ANA_TIMEOUT;
}

如果BUFFER的读端没有关闭,没有读超时,没有在活动状态(read返回0read到不完整数据,出错),并且允许超时,那么检查BUFFER的超时时间是否已经达到,如果是那么将BUFFER的读取超时标志设置好。对BUFFER的写端的处理类似。最后检查BUFFER分析器是否超时并根据情况设置相应位。

[src/sessiom.c]process_session()
	/* 1b: check for low-level errors reported at the stream interface.
	 * First we check if it's a retryable error (in which case we don't
	 * want to tell the buffer). Otherwise we report the error one level
	 * upper by setting flags into the buffers. Note that the side towards
	 * the client cannot have connect (hence retryable) errors. Also, the
	 * connection setup code must be able to deal with any type of abort.
	 */
	if (unlikely(s->si[0].flags & SI_FL_ERR)) {
		if (s->si[0].state == SI_ST_EST || s->si[0].state == SI_ST_DIS) {
			s->si[0].shutr(&s->si[0]);
			s->si[0].shutw(&s->si[0]);
			stream_int_report_error(&s->si[0]);
			if (!(s->req->analysers) && !(s->rep->analysers)) {
				s->be->counters.cli_aborts++;
				if (s->srv)
					s->srv->counters.cli_aborts++;
				if (!(s->flags & SN_ERR_MASK))
					s->flags |= SN_ERR_CLICL;
				if (!(s->flags & SN_FINST_MASK))
					s->flags |= SN_FINST_D;
			}
		}
	}

	if (unlikely(s->si[1].flags & SI_FL_ERR)) {
		if (s->si[1].state == SI_ST_EST || s->si[1].state == SI_ST_DIS) {
			s->si[1].shutr(&s->si[1]);
			s->si[1].shutw(&s->si[1]);
			stream_int_report_error(&s->si[1]);
			s->be->counters.failed_resp++;
			if (s->srv)
				s->srv->counters.failed_resp++;
			if (!(s->req->analysers) && !(s->rep->analysers)) {
				s->be->counters.srv_aborts++;
				if (s->srv)
					s->srv->counters.srv_aborts++;
				if (!(s->flags & SN_ERR_MASK))
					s->flags |= SN_ERR_SRVCL;
				if (!(s->flags & SN_FINST_MASK))
					s->flags |= SN_FINST_D;
			}
		}
		/* note: maybe we should process connection errors here ? */
	}

接着检查stream interface是否出错。如果出错了,那对于连接状态或者处于关闭但并没释放资源状态的stream interface进行读和写的关闭,然后报告错误,报告错误函数很简单,只是将对应BUFFER的标志位加上读或者写错误。然后根据分析器是否存在做一些统计量的改变,以及设置SESSION的错误原因和SESSION结束时所处的状态。在event_accept函数中知道,shutr和shutw成员分别指向stream_sock_shutr和stream_sock_shutw函数。

src/stream_sock.c]process_session()->stream_sock_shutr()
void stream_sock_shutr(struct stream_interface *si)
{
	si->ib->flags &= ~BF_SHUTR_NOW;
	if (si->ib->flags & BF_SHUTR)
		return;
	si->ib->flags |= BF_SHUTR;
	si->ib->rex = TICK_ETERNITY;
	si->flags &= ~SI_FL_WAIT_ROOM;

	if (si->state != SI_ST_EST && si->state != SI_ST_CON)
		return;

	if (si->ob->flags & BF_SHUTW) {
		fd_delete(si->fd);
		si->state = SI_ST_DIS;
		si->exp = TICK_ETERNITY;
		return;
	}
	EV_FD_CLR(si->fd, DIR_RD);
	return;
}

去掉BF_SHUTR_NOW标志,如果当前已经设置了BF_SHUTR标志,那么直接返回。否则设置BF_SHUTR标志,去掉SI_FL_WAIT_ROOM标志,表示已经不再等待空间了,如果stream interface的状态不是连接中与发出了connect函数并分配了资源的状态,那么直接返回,因为不需要做一些清理工作。否则检查如果写端是否已经关闭(是否设置了BF_SHUTW标志),如果是,那么释放掉对应的fd,并将stream interface的状态更改成SI_ST_DIS并返回;否则的话清除对应fd的读事件。

src/stream_sock.c]process_session()->stream_sock_shutr()
void stream_sock_shutw(struct stream_interface *si)
{
	si->ob->flags &= ~BF_SHUTW_NOW;
	if (si->ob->flags & BF_SHUTW)
		return;
	si->ob->flags |= BF_SHUTW;
	si->ob->wex = TICK_ETERNITY;
	si->flags &= ~SI_FL_WAIT_DATA;

	switch (si->state) {
	case SI_ST_EST:
		/* we have to shut before closing, otherwise some short messages
		 * may never leave the system, especially when there are remaining
		 * unread data in the socket input buffer, or when nolinger is set.
		 * However, if SI_FL_NOLINGER is explicitly set, we know there is
		 * no risk so we close both sides immediately.
		 */
		if (si->flags & SI_FL_ERR) {
			/* quick close, the socket is already shut. Remove pending flags. */
			si->flags &= ~SI_FL_NOLINGER;
		} else if (si->flags & SI_FL_NOLINGER) {
			si->flags &= ~SI_FL_NOLINGER;
			setsockopt(si->fd, SOL_SOCKET, SO_LINGER,
				   (struct linger *) &nolinger, sizeof(struct linger));
		} else {
			EV_FD_CLR(si->fd, DIR_WR);
			shutdown(si->fd, SHUT_WR);

			if (!(si->ib->flags & (BF_SHUTR|BF_DONT_READ)))
				return;
		}

		/* fall through */
	case SI_ST_CON:
		/* we may have to close a pending connection, and mark the
		 * response buffer as shutr
		 */
		fd_delete(si->fd);
		/* fall through */
	case SI_ST_CER:
	case SI_ST_QUE:
	case SI_ST_TAR:
		si->state = SI_ST_DIS;
	default:
		si->flags &= ~SI_FL_WAIT_ROOM;
		si->ib->flags |= BF_SHUTR;
		si->ib->rex = TICK_ETERNITY;
		si->exp = TICK_ETERNITY;
		return;
	}
}

shutw基本上与shutr类似,所不同的是由于涉及到NOLINGER选项,因此需要对各种stream interface的状态进行区分处理,但是所有的情况都会落入default中。对于连接中状态,如果之前出错了,那么应该已经做过相应的处理了,因此去除NOLINGER标志往下掉落知道default就可;或者没有出错,但是设置了NOLINGER标志,因此需要去掉标志并将NOLINGER机制蔓延到socketNOLINGER;最后,没有出错,也没有NOLINGER标志,那么调用shutdown()进行socket写端的关闭。对于此处的两个stream interfaceshut函数的调用会导致其状态改变(假设之前已经链接了)。

[src/sessiom.c]process_session()
	if (s->si[1].state == SI_ST_CON) {
		/* we were trying to establish a connection on the server side,
		 * maybe it succeeded, maybe it failed, maybe we timed out, ...
		 */
		if (unlikely(!sess_update_st_con_tcp(s, &s->si[1])))
			sess_update_st_cer(s, &s->si[1]);
		else if (s->si[1].state == SI_ST_EST)
			sess_establish(s, &s->si[1]);

		/* state is now one of SI_ST_CON (still in progress), SI_ST_EST
		 * (established), SI_ST_DIS (abort), SI_ST_CLO (last error),
		 * SI_ST_ASS/SI_ST_TAR/SI_ST_REQ for retryable errors.
		 */
	}

如果此时后端stream interface已经调用过connect函数并分配了资源,但是状态还不是已连接状态,那么需要检查是否有错误,并根据结果改变stream interface的状态。检查的函数为sess_update_st_con_tcp。

[src/sessiom.c]process_session()->sess_update_st_con_tcp()
/* This function is called with (si->state == SI_ST_CON) meaning that a
 * connection was attempted and that the file descriptor is already allocated.
 * We must check for establishment, error and abort. Possible output states
 * are SI_ST_EST (established), SI_ST_CER (error), SI_ST_DIS (abort), and
 * SI_ST_CON (no change). The function returns 0 if it switches to SI_ST_CER,
 * otherwise 1.
 */
int sess_update_st_con_tcp(struct session *s, struct stream_interface *si)
{
	struct buffer *req = si->ob;
	struct buffer *rep = si->ib;

	/* If we got an error, or if nothing happened and the connection timed
	 * out, we must give up. The CER state handler will take care of retry
	 * attempts and error reports.
	 */
	if (unlikely(si->flags & (SI_FL_EXP|SI_FL_ERR))) {
		si->exp   = TICK_ETERNITY;
		si->state = SI_ST_CER;
		si->flags &= ~SI_FL_CAP_SPLICE;
		fd_delete(si->fd);

		if (si->err_type)
			return 0;

		si->err_loc = s->srv;
		if (si->flags & SI_FL_ERR)
			si->err_type = SI_ET_CONN_ERR;
		else
			si->err_type = SI_ET_CONN_TO;
		return 0;
	}

如果出现超时或者出错,那么stream interface的状态改成SI_ST_CER,表示connect出错,并释放资源。如果之前设置了错误类型,那么可以直接返回了,否则根据标志位将错误类型设置为链接错误或者连接超时,并返回。

[src/sessiom.c]process_session()->sess_update_st_con_tcp()
	/* OK, maybe we want to abort */
	if (unlikely((rep->flags & BF_SHUTW) ||
		     ((req->flags & BF_SHUTW_NOW) && /* FIXME: this should not prevent a connection from establishing */
		      (((req->flags & (BF_OUT_EMPTY|BF_WRITE_ACTIVITY)) == BF_OUT_EMPTY) ||
		       s->be->options & PR_O_ABRT_CLOSE)))) {
		/* give up */
		si->shutw(si);
		si->err_type |= SI_ET_CONN_ABRT;
		si->err_loc  = s->srv;
		si->flags &= ~SI_FL_CAP_SPLICE;
		if (s->srv_error)
			s->srv_error(s, si);
		return 1;
	}

如果响应缓冲的输出端已经关闭,或者当请求缓冲的输出端准备关闭,并且没有出错但没有数据或者配置了在关闭的时候直接放弃掉请求,那么关闭stream interface的写端,将错误类型设置为SI_ET_CONN_ABRT,表示连接被放弃。

[src/sessiom.c]process_session()->sess_update_st_con_tcp()
	/* we need to wait a bit more if there was no activity either */
	if (!(req->flags & BF_WRITE_ACTIVITY))
		return 1;

	/* OK, this means that a connection succeeded. The caller will be
	 * responsible for handling the transition from CON to EST.
	 */
	s->logs.t_connect = tv_ms_elapsed(&s->logs.tv_accept, &now);
	si->exp      = TICK_ETERNITY;
	si->state    = SI_ST_EST;
	si->err_type = SI_ET_NONE;
	si->err_loc  = NULL;
	return 1;
}

如果没有任何可活动事件,那么直接返回,因为需要需要再等一会儿某些触发事件的状态会改变。最后表示链接成功,将状态设置为SI_ST_EST,清除错误,并记录连接延迟。只有SI_ST_CER错误本函数才会返回0。对于返回值为0的处理函数为sess_update_st_cer。

[src/sessiom.c]process_session()->sess_update_st_cer()
/* This function is called with (si->state == SI_ST_CER) meaning that a
 * previous connection attempt has failed and that the file descriptor
 * has already been released. Possible causes include asynchronous error
 * notification and time out. Possible output states are SI_ST_CLO when
 * retries are exhausted, SI_ST_TAR when a delay is wanted before a new
 * connection attempt, SI_ST_ASS when it's wise to retry on the same server,
 * and SI_ST_REQ when an immediate redispatch is wanted. The buffers are
 * marked as in error state. It returns 0.
 */
int sess_update_st_cer(struct session *s, struct stream_interface *si)
{
	/* we probably have to release last session from the server */
	if (s->srv) {
		health_adjust(s->srv, HANA_STATUS_L4_ERR);

		if (s->flags & SN_CURR_SESS) {
			s->flags &= ~SN_CURR_SESS;
			s->srv->cur_sess--;
		}
	}

如果当前SESSIONSERVER已经设置,那么先对其进行健康检查。如果SRV已经将当前的SESSION计入其处理的SESSION统计中,那么由于已经失败了,那么需要将其数量减一。

[src/sessiom.c]process_session()->sess_update_st_cer()
	/* ensure that we have enough retries left */
	s->conn_retries--;
	if (s->conn_retries < 0) {
		if (!si->err_type) {
			si->err_type = SI_ET_CONN_ERR;
			si->err_loc = s->srv;
		}

		if (s->srv)
			s->srv->counters.failed_conns++;
		s->be->counters.failed_conns++;
		sess_change_server(s, NULL);
		if (may_dequeue_tasks(s->srv, s->be))
			process_srv_queue(s->srv);

		/* shutw is enough so stop a connecting socket */
		si->shutw(si);
		si->ob->flags |= BF_WRITE_ERROR;
		si->ib->flags |= BF_READ_ERROR;

		si->state = SI_ST_CLO;
		if (s->srv_error)
			s->srv_error(s, si);
		return 0;
	}

如果允许的重连次数已经到达,那么设置错误类型,修正统计量。调用sess_change_server将本SESSION占据的链接槽(slot)释放出来,然后根据条件调度其他的SESSION的使用本槽。stream interface的状态在返回时被设置成SI_ST_CLO,表示已经关闭了。

[src/sessiom.c]process_session()->sess_change_server()
void sess_change_server(struct session *sess, struct server *newsrv)
{
	if (sess->srv_conn == newsrv)
		return;

	if (sess->srv_conn) {
		sess->srv_conn->served--;
		if (sess->srv_conn->proxy->lbprm.server_drop_conn)
			sess->srv_conn->proxy->lbprm.server_drop_conn(sess->srv_conn);
		sess->srv_conn = NULL;
	}

	if (newsrv) {
		newsrv->served++;
		if (newsrv->proxy->lbprm.server_take_conn)
			newsrv->proxy->lbprm.server_take_conn(newsrv);
		sess->srv_conn = newsrv;
	}
}

此函数很简单,如果当前链接的SERVER与新SERVER相同,那么直接返回。否则若当前已经于某个SERVER链接,则将老SERVER的链接去掉;如果指定了新SERVER,那么链接新SERVER

[include/proto/queue.h]process_session()->sess_update_st_cer()->may_dequeue_tasks()
static inline int may_dequeue_tasks(const struct server *s, const struct proxy *p) {
	return (s && (s->nbpend || (p->nbpend && srv_is_usable(s->state, s->eweight))) &&
		(!s->maxconn || s->cur_sess < srv_dynamic_maxconn(s)));
}

首先判断SERVER是否存在;接着查看SERVER的请求等待队列中是否有请求,或者代理中是否含有等待链接的请求并且当前SERVER的状态正常;最后检查当前SERVER是否还允许新链接。也就是说本函数就是判断当前SERVER是否允许新连接,并且在本SERVER或者对应的PROXY中是否有等待链接的请求,如果是,则返回正确,否则返回错误。如果返回正确,那么接下来需要调度一个请求给本SERVER处理。

[include/proto/queue.h]process_session()->sess_update_st_cer()->process_srv_queue()
void process_srv_queue(struct server *s)
{
	struct proxy  *p = s->proxy;
	int maxconn;

	/* First, check if we can handle some connections queued at the proxy. We
	 * will take as many as we can handle.
	 */

	maxconn = srv_dynamic_maxconn(s);
	while (s->served < maxconn) {
		struct session *sess = pendconn_get_next_sess(s, p);
		if (sess == NULL)
			break;
		task_wakeup(sess->task, TASK_WOKEN_RES);
	}
}

本函数也很简单,只是根据SERVER允许链接的剩余数量来找出对应数量的等待链接的SESSION,唤醒其相应的TASK

[include/proto/queue.h]process_session()->sess_update_st_cer()->process_srv_queue()->pendconn_get_next_sess()
struct session *pendconn_get_next_sess(struct server *srv, struct proxy *px)
{
	struct pendconn *ps, *pp;
	struct session *sess;
	struct server *rsrv;

	rsrv = srv->tracked;
	if (!rsrv)
		rsrv = srv;

	ps = pendconn_from_srv(srv);
	pp = pendconn_from_px(px);
	/* we want to get the definitive pendconn in <ps> */
	if (!pp || !srv_is_usable(rsrv->state, rsrv->eweight)) {
		if (!ps)
			return NULL;
	} else {
		/* pendconn exists in the proxy queue */
		if (!ps || tv_islt(&pp->sess->logs.tv_request, &ps->sess->logs.tv_request)) {
			ps = pp;
			ps->sess->srv = srv;
		}
	}
	sess = ps->sess;
	pendconn_free(ps);

	/* we want to note that the session has now been assigned a server */
	sess->flags |= SN_ASSIGNED;
	sess->srv = srv;
	sess->srv_conn = srv;
	srv->served++;
	if (px->lbprm.server_take_conn)
		px->lbprm.server_take_conn(srv);

	return sess;
}

如果PROXYSERVER中均有等待链接的SESSION,那么根据根据请求到来的时间先后决定使用从PROXYSERVER中找到的SESSION。找到相应SESSION后,将对应的描述本SESSION的链接相关的资源释放掉。设置好SESSIONSERVER,并更新SERVER服务的SESSION数量,并将SESSION加上ASSIGED标志,表示对应的SERVER已经就绪了。

[src/sessiom.c]process_session()->sess_update_st_cer()
	/* If the "redispatch" option is set on the backend, we are allowed to
	 * retry on another server for the last retry. In order to achieve this,
	 * we must mark the session unassigned, and eventually clear the DIRECT
	 * bit to ignore any persistence cookie. We won't count a retry nor a
	 * redispatch yet, because this will depend on what server is selected.
	 */
	if (s->srv && s->conn_retries == 0 &&
	    s->be->options & PR_O_REDISP && !(s->flags & SN_FORCE_PRST)) {
		sess_change_server(s, NULL);
		if (may_dequeue_tasks(s->srv, s->be))
			process_srv_queue(s->srv);

		s->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
		s->prev_srv = s->srv;
		si->state = SI_ST_REQ;
	} else {
		if (s->srv)
			s->srv->counters.retries++;
		s->be->counters.retries++;
		si->state = SI_ST_ASS;
	}

如果是最后一次重连,那么如果配置了允许重定向,并没当前SESSION并没有强制保持连接,那么将SESSIONASSIGNED标志去掉,并将stream interface的状态设置为请求链接以重新分配SERVER。再次之前需要释放当前SERVER的链接槽(因为之前已经处于ASSIGNED状态,占据着一个slot),并对当前SERVER进行一次SESSION链接的调度。

如果不是最后一次的重连,那么只是简单的修正统计量并将stream interface的状态设置为SI_ST_ASS,表示已经分配好SERVER,可以直接进行connect

[src/sessiom.c]process_session()->sess_update_st_cer()
	if (si->flags & SI_FL_ERR) {
		/* The error was an asynchronous connection error, and we will
		 * likely have to retry connecting to the same server, most
		 * likely leading to the same result. To avoid this, we wait
		 * one second before retrying.
		 */

		if (!si->err_type)
			si->err_type = SI_ET_CONN_ERR;

		si->state = SI_ST_TAR;
		si->exp = tick_add(now_ms, MS_TO_TICKS(1000));
		return 0;
	}
	return 0;
}

如果stream interface有错误,若错误类型没有定,那么将其设置为链接错误。将stream interface的状态设置为SI_ST_TAR表示等一小段时间在重连,马上重连的话很有可能还是这个错误,因为错误标志是异步设置的。

如果对于之前sess_update_st_con_tcp检查连接结果中,如果链接正确建立,也就是stream interface的状态从SI_ST_CON转成SI_ST_EST,那么调用sess_establish进行处理。

[src/sessiom.c]process_session()->sess_establish()
/*
 * This function handles the transition between the SI_ST_CON state and the
 * SI_ST_EST state. It must only be called after switching from SI_ST_CON to
 * SI_ST_EST.
 */
void sess_establish(struct session *s, struct stream_interface *si)
{
	struct buffer *req = si->ob;
	struct buffer *rep = si->ib;

	if (s->srv)
		health_adjust(s->srv, HANA_STATUS_L4_OK);

	if (s->be->mode == PR_MODE_TCP) { /* let's allow immediate data connection in this case */
		/* if the user wants to log as soon as possible, without counting
		 * bytes from the server, then this is the right moment. */
		if (s->fe->to_log && !(s->logs.logwait & LW_BYTES)) {
			s->logs.t_close = s->logs.t_connect; /* to get a valid end date */
			s->do_log(s);
		}
	}
	else {
		s->txn.rsp.msg_state = HTTP_MSG_RPBEFORE;
		/* reset hdr_idx which was already initialized by the request.
		 * right now, the http parser does it.
		 * hdr_idx_init(&s->txn.hdr_idx);
		 */
	}

	rep->analysers |= s->fe->fe_rsp_ana | s->be->be_rsp_ana;
	rep->flags |= BF_READ_ATTACHED; /* producer is now attached */
	req->wex = TICK_ETERNITY;
}

本函数比较重要的内容是倒数23(因为其他的我还不是很清楚,但是对于健康调整暂时是不管的,对于日志记录也没什么说的)。这里对响应缓冲区的分析器加上了前端PROXY和后端PROXY的响应分析器,然后将响应缓冲设置标志指明读端已经附接上了。对于Haproxy的所有READ相关的标志都是说的输入相关的,WRITE都是输出相关的。

对于状态为后端stream interface状态为SI_ST_CON的处理总结如下。如果connect出错或者超时,那么返回0,设置stream interface状态为SI_ST_CER;对于已关闭,或者准备关闭,并且没有出错没有输出数据或者后端设置了放弃掉请求当关闭时,那么将stream interface状态设置为SI_ET_CONN_ABRT,返回1;最后对于建立连接成功的情形,则将状态设置为SI_ST_EST,返回1SI_ST_CER的错误,会进行重连尝试,最后一次重连根据配置有可能允许重定向,也就是重新选择SERVER来进行连接。在重定向以及最后一次重连失败时都可能触发对对应SERVER相关的等待链接的SESSION的调度。在此过程中如果发现stream interface产生了异步错误,那么将当前stream interface的重连将会等待一会时间之后才会重连。等待是将TASK的放入等待队列,并不会阻塞其他的处理。

最后对于链接成功的处理是更新响应缓冲区的分析器,并将响应缓冲区的输入已附接标志设置上,表示响应缓冲区的生产者已经存在了。

对于上面所述的对对应SERVER相关的等待链接的SESSION进行调度的流程是,根据当前SERVER允许链接的数量从PROXY或者SERVER的等待链接的SESSION队列中找出请求时间排在最前面的SESSION,将其SERVER设置为当前SERVER并为其增加SN_ASSIGNED标志位,用于说明它已经分配好SERVER,可以进行connect操作了。

[src/sessiom.c]process_session()
resync_stream_interface:
	/* Check for connection closure */

	DPRINTF(stderr,
		"[%u] %s:%d: task=%p s=%p, sfl=0x%08x, rq=%p, rp=%p, exp(r,w)=%u,%u rqf=%08x rpf=%08x rql=%d rpl=%d cs=%d ss=%d, cet=0x%x set=0x%x retr=%d\n",
		now_ms, __FUNCTION__, __LINE__,
		t,
		s, s->flags,
		s->req, s->rep,
		s->req->rex, s->rep->wex,
		s->req->flags, s->rep->flags,
		s->req->l, s->rep->l, s->rep->cons->state, s->req->cons->state,
		s->rep->cons->err_type, s->req->cons->err_type,
		s->conn_retries);

	/* nothing special to be done on client side */
	if (unlikely(s->req->prod->state == SI_ST_DIS))
		s->req->prod->state = SI_ST_CLO;

	/* When a server-side connection is released, we have to count it and
	 * check for pending connections on this server.
	 */
	if (unlikely(s->req->cons->state == SI_ST_DIS)) {
		s->req->cons->state = SI_ST_CLO;
		if (s->srv) {
			if (s->flags & SN_CURR_SESS) {
				s->flags &= ~SN_CURR_SESS;
				s->srv->cur_sess--;
			}
			sess_change_server(s, NULL);
			if (may_dequeue_tasks(s->srv, s->be))
				process_srv_queue(s->srv);
		}
	}

如果请求缓冲区的生产者,也就是SESSION的stream interface[0]已经处于SI_ST_DIS状态,那么将其状态设置为SI_ST_CLO,这是对应于客户端的。

如果请求缓冲区的消费者,也就是SESSIONstream interface[1]已经是SI_ST_DIS状态,那么将其状态设置为SI_ST_CLO,若当前SESSION已经统计入SERVERSESSION数量中,那么需要将其减去,然后将当前的SESSION占用的链接槽释放掉,并对当前SERVER相关的等待链接的SESSION进行一次调度。

[src/sessiom.c]process_session()
	/*
	 * Note: of the transient states (REQ, CER, DIS), only REQ may remain
	 * at this point.
	 */

 resync_request:
	/* Analyse request */
	if ((s->req->flags & BF_MASK_ANALYSER) ||
	    (s->req->flags ^ rqf_last) & BF_MASK_STATIC) {
		unsigned int flags = s->req->flags;

		if (s->req->prod->state >= SI_ST_EST) {
			int max_loops = global.tune.maxpollevents;
			unsigned int ana_list;
			unsigned int ana_back;

			/* it's up to the analysers to stop new connections,
			 * disable reading or closing. Note: if an analyser
			 * disables any of these bits, it is responsible for
			 * enabling them again when it disables itself, so
			 * that other analysers are called in similar conditions.
			 */
			buffer_auto_read(s->req);
			buffer_auto_connect(s->req);
			buffer_auto_close(s->req);

			/* We will call all analysers for which a bit is set in
			 * s->req->analysers, following the bit order from LSB
			 * to MSB. The analysers must remove themselves from
			 * the list when not needed. Any analyser may return 0
			 * to break out of the loop, either because of missing
			 * data to take a decision, or because it decides to
			 * kill the session. We loop at least once through each
			 * analyser, and we may loop again if other analysers
			 * are added in the middle.
			 *
			 * We build a list of analysers to run. We evaluate all
			 * of these analysers in the order of the lower bit to
			 * the higher bit. This ordering is very important.
			 * An analyser will often add/remove other analysers,
			 * including itself. Any changes to itself have no effect
			 * on the loop. If it removes any other analysers, we
			 * want those analysers not to be called anymore during
			 * this loop. If it adds an analyser that is located
			 * after itself, we want it to be scheduled for being
			 * processed during the loop. If it adds an analyser
			 * which is located before it, we want it to switch to
			 * it immediately, even if it has already been called
			 * once but removed since.
			 *
			 * In order to achieve this, we compare the analyser
			 * list after the call with a copy of it before the
			 * call. The work list is fed with analyser bits that
			 * appeared during the call. Then we compare previous
			 * work list with the new one, and check the bits that
			 * appeared. If the lowest of these bits is lower than
			 * the current bit, it means we have enabled a previous
			 * analyser and must immediately loop again.
			 */

			ana_list = ana_back = s->req->analysers;
			while (ana_list && max_loops--) {
				/* Warning! ensure that analysers are always placed in ascending order! */

				if (ana_list & AN_REQ_INSPECT) {
					if (!tcp_inspect_request(s, s->req, AN_REQ_INSPECT))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_INSPECT);
				}

				if (ana_list & AN_REQ_WAIT_HTTP) {
					if (!http_wait_for_request(s, s->req, AN_REQ_WAIT_HTTP))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_WAIT_HTTP);
				}

				if (ana_list & AN_REQ_HTTP_PROCESS_FE) {
					if (!http_process_req_common(s, s->req, AN_REQ_HTTP_PROCESS_FE, s->fe))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_PROCESS_FE);
				}

				if (ana_list & AN_REQ_SWITCHING_RULES) {
					if (!process_switching_rules(s, s->req, AN_REQ_SWITCHING_RULES))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_SWITCHING_RULES);
				}

				if (ana_list & AN_REQ_HTTP_PROCESS_BE) {
					if (!http_process_req_common(s, s->req, AN_REQ_HTTP_PROCESS_BE, s->be))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_PROCESS_BE);
				}

				if (ana_list & AN_REQ_HTTP_TARPIT) {
					if (!http_process_tarpit(s, s->req, AN_REQ_HTTP_TARPIT))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_TARPIT);
				}

				if (ana_list & AN_REQ_HTTP_INNER) {
					if (!http_process_request(s, s->req, AN_REQ_HTTP_INNER))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_INNER);
				}

				if (ana_list & AN_REQ_HTTP_BODY) {
					if (!http_process_request_body(s, s->req, AN_REQ_HTTP_BODY))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_BODY);
				}

				if (ana_list & AN_REQ_PRST_RDP_COOKIE) {
					if (!tcp_persist_rdp_cookie(s, s->req, AN_REQ_PRST_RDP_COOKIE))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_PRST_RDP_COOKIE);
				}

				if (ana_list & AN_REQ_STICKING_RULES) {
					if (!process_sticking_rules(s, s->req, AN_REQ_STICKING_RULES))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_STICKING_RULES);
				}

				if (ana_list & AN_REQ_HTTP_XFER_BODY) {
					if (!http_request_forward_body(s, s->req, AN_REQ_HTTP_XFER_BODY))
						break;
					UPDATE_ANALYSERS(s->req->analysers, ana_list, ana_back, AN_REQ_HTTP_XFER_BODY);
				}
				break;
			}
		}

		if ((s->req->flags ^ flags) & BF_MASK_STATIC) {
			rqf_last = s->req->flags;
			goto resync_request;
		}
	}

	/* we'll monitor the request analysers while parsing the response,
	 * because some response analysers may indirectly enable new request
	 * analysers (eg: HTTP keep-alive).
	 */
	req_ana_back = s->req->analysers;

 resync_response:
	/* Analyse response */

	if (unlikely(s->rep->flags & BF_HIJACK)) {
		/* In inject mode, we wake up everytime something has
		 * happened on the write side of the buffer.
		 */
		unsigned int flags = s->rep->flags;

		if ((s->rep->flags & (BF_WRITE_PARTIAL|BF_WRITE_ERROR|BF_SHUTW)) &&
		    !(s->rep->flags & BF_FULL)) {
			s->rep->hijacker(s, s->rep);
		}

		if ((s->rep->flags ^ flags) & BF_MASK_STATIC) {
			rpf_last = s->rep->flags;
			goto resync_response;
		}
	}
	else if ((s->rep->flags & BF_MASK_ANALYSER) ||
		 (s->rep->flags ^ rpf_last) & BF_MASK_STATIC) {
		unsigned int flags = s->rep->flags;

		if (s->rep->prod->state >= SI_ST_EST) {
			int max_loops = global.tune.maxpollevents;
			unsigned int ana_list;
			unsigned int ana_back;

			/* it's up to the analysers to stop disable reading or
			 * closing. Note: if an analyser disables any of these
			 * bits, it is responsible for enabling them again when
			 * it disables itself, so that other analysers are called
			 * in similar conditions.
			 */
			buffer_auto_read(s->rep);
			buffer_auto_close(s->rep);

			/* We will call all analysers for which a bit is set in
			 * s->rep->analysers, following the bit order from LSB
			 * to MSB. The analysers must remove themselves from
			 * the list when not needed. Any analyser may return 0
			 * to break out of the loop, either because of missing
			 * data to take a decision, or because it decides to
			 * kill the session. We loop at least once through each
			 * analyser, and we may loop again if other analysers
			 * are added in the middle.
			 */

			ana_list = ana_back = s->rep->analysers;
			while (ana_list && max_loops--) {
				/* Warning! ensure that analysers are always placed in ascending order! */

				if (ana_list & AN_RES_WAIT_HTTP) {
					if (!http_wait_for_response(s, s->rep, AN_RES_WAIT_HTTP))
						break;
					UPDATE_ANALYSERS(s->rep->analysers, ana_list, ana_back, AN_RES_WAIT_HTTP);
				}

				if (ana_list & AN_RES_STORE_RULES) {
					if (!process_store_rules(s, s->rep, AN_RES_STORE_RULES))
						break;
					UPDATE_ANALYSERS(s->rep->analysers, ana_list, ana_back, AN_RES_STORE_RULES);
				}

				if (ana_list & AN_RES_HTTP_PROCESS_BE) {
					if (!http_process_res_common(s, s->rep, AN_RES_HTTP_PROCESS_BE, s->be))
						break;
					UPDATE_ANALYSERS(s->rep->analysers, ana_list, ana_back, AN_RES_HTTP_PROCESS_BE);
				}

				if (ana_list & AN_RES_HTTP_XFER_BODY) {
					if (!http_response_forward_body(s, s->rep, AN_RES_HTTP_XFER_BODY))
						break;
					UPDATE_ANALYSERS(s->rep->analysers, ana_list, ana_back, AN_RES_HTTP_XFER_BODY);
				}
				break;
			}
		}

		if ((s->rep->flags ^ flags) & BF_MASK_STATIC) {
			rpf_last = s->rep->flags;
			goto resync_response;
		}
	}

对于以上请求和响应缓冲区的处理均是对于SI_ST_EST状态来说的。具体的一些处理流程(函数)现在不看,因为它不影响我们的对于流程的理解,在介绍完整体流程之后有可能对其进行分析。提前说一句,对于概述中说的使用Cookie来保持http session的请求处理就是在这一部分进行的。请求到来时,根据Cookie找到对应的SERVER;对于响应中设置Cookie则是在下面的过程中被处理,因为只是第一次返回响应时需要设置Cookie,所以是在connect的时候做的。

分析器的处理规则是根据设置的相应标志位调用相应的处理函数,并且调用顺序是从LSBMSB,并且当当前处理函数设置了一个比它位置比它小的标志位,那么在处理完之后,下一个标志位的处理必定要回到最小的标志位处处理。这有点类似于Unix下对文件描述符的处理,总是从最小的开始。

缓冲器的处理函数返回零时将会跳出分析处理循环,这是由于两个原因,一是数据缺少不足以做出一个处理决定;二是本SESSION被中止。

响应缓冲区的处理函数如果修改了BF_AUTO_READ之类的标志,那么在返回之前,他们必须将这类型标志改回来。

[src/sessiom.c]process_session()
	/* maybe someone has added some request analysers, so we must check and loop */
	if (s->req->analysers & ~req_ana_back)
		goto resync_request;

如果请求缓冲区的标志已经改变,那么需要回去对其再次进行分析处理。

[src/sessiom.c]process_session()
	/* FIXME: here we should call protocol handlers which rely on
	 * both buffers.
	 */

	/*
	 * Now we propagate unhandled errors to the session. Normally
	 * we're just in a data phase here since it means we have not
	 * seen any analyser who could set an error status.
	 */
	if (!(s->flags & SN_ERR_MASK)) {
		if (s->req->flags & (BF_READ_ERROR|BF_READ_TIMEOUT|BF_WRITE_ERROR|BF_WRITE_TIMEOUT)) {
			/* Report it if the client got an error or a read timeout expired */
			s->req->analysers = 0;
			if (s->req->flags & BF_READ_ERROR) {
				s->be->counters.cli_aborts++;
				if (s->srv)
					s->srv->counters.cli_aborts++;
				s->flags |= SN_ERR_CLICL;
			}
			else if (s->req->flags & BF_READ_TIMEOUT) {
				s->be->counters.cli_aborts++;
				if (s->srv)
					s->srv->counters.cli_aborts++;
				s->flags |= SN_ERR_CLITO;
			}
			else if (s->req->flags & BF_WRITE_ERROR) {
				s->be->counters.srv_aborts++;
				if (s->srv)
					s->srv->counters.srv_aborts++;
				s->flags |= SN_ERR_SRVCL;
			}
			else {
				s->be->counters.srv_aborts++;
				if (s->srv)
					s->srv->counters.srv_aborts++;
				s->flags |= SN_ERR_SRVTO;
			}
			sess_set_term_flags(s);
		}
		else if (s->rep->flags & (BF_READ_ERROR|BF_READ_TIMEOUT|BF_WRITE_ERROR|BF_WRITE_TIMEOUT)) {
			/* Report it if the server got an error or a read timeout expired */
			s->rep->analysers = 0;
			if (s->rep->flags & BF_READ_ERROR) {
				s->be->counters.srv_aborts++;
				if (s->srv)
					s->srv->counters.srv_aborts++;
				s->flags |= SN_ERR_SRVCL;
			}
			else if (s->rep->flags & BF_READ_TIMEOUT) {
				s->be->counters.srv_aborts++;
				if (s->srv)
					s->srv->counters.srv_aborts++;
				s->flags |= SN_ERR_SRVTO;
			}
			else if (s->rep->flags & BF_WRITE_ERROR) {
				s->be->counters.cli_aborts++;
				if (s->srv)
					s->srv->counters.cli_aborts++;
				s->flags |= SN_ERR_CLICL;
			}
			else {
				s->be->counters.cli_aborts++;
				if (s->srv)
					s->srv->counters.cli_aborts++;
				s->flags |= SN_ERR_CLITO;
			}
			sess_set_term_flags(s);
		}
	}

如果当前SESSION没有错误标志,那么检查请求和响应缓冲是否出现了错误或者超时,如果有,那么修正统计信息,并将对应的SESSION错误类型增加到SESSION标识位中,最后调用session_set_term_flag根据stream interface的状态设置当前SESSION的终止时所处的状态。

[src/sessiom.c]process_session()->sess_set_term_flags()
void sess_set_term_flags(struct session *s)
{
	if (!(s->flags & SN_FINST_MASK)) {
		if (s->si[1].state < SI_ST_REQ) {

			s->fe->counters.failed_req++;
			if (s->listener->counters)
				s->listener->counters->failed_req++;

			s->flags |= SN_FINST_R;
		}
		else if (s->si[1].state == SI_ST_QUE)
			s->flags |= SN_FINST_Q;
		else if (s->si[1].state < SI_ST_EST)
			s->flags |= SN_FINST_C;
		else if (s->si[1].state == SI_ST_EST || s->si[1].prev_state == SI_ST_EST)
			s->flags |= SN_FINST_D;
		else
			s->flags |= SN_FINST_L;
	}
}

如果SESSION终止时所处的状态已经设置,那么直接返回。否则,如果stream interface[1]的状态还没到SI_ST_REQ,也就是还没到有连接需求的状态,那么将前端PROXY的失败请求数增加,如果LISTNER也有统计量,那么也更新对应的统计量,并将SESSION终止时的状态设置为SN_FINST_R,表示SESSION终止在客户端请求时;如果stream interface[1]的状态为SI_ST_QUE,那么将SESSION终止时的状态设置为SN_FINST_Q,表示SESSION终止在等待链接的队列中等待时;如果小于SI_ST_EST,那么设置为SN_FINST_C,表示终止在连接建立过程中;如果为SI_ST_EST,那么设置为SN_FINST_D,表示终止在数据处理中;其他的情况设置为SN_FINST_L,表示在发送完最后一帧数据时终止。

[src/sessiom.c]process_session()
	/*
	 * Here we take care of forwarding unhandled data. This also includes
	 * connection establishments and shutdown requests.
	 */


	/* If noone is interested in analysing data, it's time to forward
	 * everything. We configure the buffer to forward indefinitely.
	 */
	if (!s->req->analysers &&
	    !(s->req->flags & (BF_HIJACK|BF_SHUTW|BF_SHUTW_NOW)) &&
	    (s->req->prod->state >= SI_ST_EST) &&
	    (s->req->to_forward != BUF_INFINITE_FORWARD)) {
		/* This buffer is freewheeling, there's no analyser nor hijacker
		 * attached to it. If any data are left in, we'll permit them to
		 * move.
		 */
		buffer_auto_read(s->req);
		buffer_auto_connect(s->req);
		buffer_auto_close(s->req);
		buffer_flush(s->req);

		/* If the producer is still connected, we'll enable data to flow
		 * from the producer to the consumer (which might possibly not be
		 * connected yet).
		 */
		if (!(s->req->flags & (BF_SHUTR|BF_SHUTW|BF_SHUTW_NOW)))
			buffer_forward(s->req, BUF_INFINITE_FORWARD);
	}

如果已经不再有人对请求缓冲区的数据进行分析感兴趣,并且写还没有关闭或者没有准备关闭,并且请求缓冲区的生产者的状态不小于EST状态,并且允许在send_max之后可用于发送且不会触发唤醒操作的数据量不为无限,那么是时候将数据进行往前推进了。给请求缓冲去设置一些标志位以允许数据流动。然后,如果请求缓冲区的输入输出都没有关闭,并且输出也没有准备关闭,那么调用buffer_forward计算to_forward的正确值。

[src/sessiom.c]process_session()
	/* check if it is wise to enable kernel splicing to forward request data */
	if (!(s->req->flags & (BF_KERN_SPLICING|BF_SHUTR)) &&
	    s->req->to_forward &&
	    (global.tune.options & GTUNE_USE_SPLICE) &&
	    (s->si[0].flags & s->si[1].flags & SI_FL_CAP_SPLICE) &&
	    (pipes_used < global.maxpipes) &&
	    (((s->fe->options2|s->be->options2) & PR_O2_SPLIC_REQ) ||
	     (((s->fe->options2|s->be->options2) & PR_O2_SPLIC_AUT) &&
	      (s->req->flags & BF_STREAMER_FAST)))) {
		s->req->flags |= BF_KERN_SPLICING;
	}

	/* reflect what the L7 analysers have seen last */
	rqf_last = s->req->flags;

如果请求缓冲区还没开启使用SPLICE系统调用,并且输入没有关闭,并且设置了数据推进不触发任务被唤醒的数据量,并且配置中启动了使用SPLICE系统调用的选项,并且请求缓冲区的生产者与消费者均同意使用SPLICE,而且程序还有可用的管道,并且(前后端PROXY都要求传递请求的时候使用SPLICE,或者前后端PROXY设置了自动使用SPLICE且消费者对于数据的消耗非常快),那么在请求缓冲区的FLAGS中增加使用SPLICE的标志BF_KERN_SPLICE

[src/sessiom.c]process_session()
	/*
	 * Now forward all shutdown requests between both sides of the buffer
	 */

	/* first, let's check if the request buffer needs to shutdown(write), which may
	 * happen either because the input is closed or because we want to force a close
	 * once the server has begun to respond.
	 */
	if (unlikely((s->req->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_AUTO_CLOSE|BF_SHUTR)) ==
		     (BF_AUTO_CLOSE|BF_SHUTR)))
			buffer_shutw_now(s->req);

	/* shutdown(write) pending */
	if (unlikely((s->req->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_OUT_EMPTY)) == (BF_SHUTW_NOW|BF_OUT_EMPTY)))
		s->req->cons->shutw(s->req->cons);

	/* shutdown(write) done on server side, we must stop the client too */
	if (unlikely((s->req->flags & (BF_SHUTW|BF_SHUTR|BF_SHUTR_NOW)) == BF_SHUTW &&
		     !s->req->analysers))
		buffer_shutr_now(s->req);

	/* shutdown(read) pending */
	if (unlikely((s->req->flags & (BF_SHUTR|BF_SHUTR_NOW)) == BF_SHUTR_NOW))
		s->req->prod->shutr(s->req->prod);

检查,是否需要关闭请求缓冲区的读写。

[src/sessiom.c]process_session()
	/* it's possible that an upper layer has requested a connection setup or abort.
	 * There are 2 situations where we decide to establish a new connection :
	 *  - there are data scheduled for emission in the buffer
	 *  - the BF_AUTO_CONNECT flag is set (active connection)
	 */
	if (s->req->cons->state == SI_ST_INI) {
		if (!(s->req->flags & BF_SHUTW)) {
			if ((s->req->flags & (BF_AUTO_CONNECT|BF_OUT_EMPTY)) != BF_OUT_EMPTY) {
				/* If we have a ->connect method, we need to perform a connection request,
				 * otherwise we immediately switch to the connected state.
				 */
				if (s->req->cons->connect)
					s->req->cons->state = SI_ST_REQ; /* new connection requested */
				else
					s->req->cons->state = SI_ST_EST; /* connection established */
			}
		}
		else {
			s->req->cons->state = SI_ST_CLO; /* shutw+ini = abort */
			buffer_shutw_now(s->req);        /* fix buffer flags upon abort */
			buffer_shutr_now(s->rep);
		}
	}

如果请求缓冲区的消费者状态为已经初始化,若请求缓冲区的输出端没有关闭,若请求缓冲区有数据待输出,并且允许后端自动连接,那么根据请求缓冲区的消费者是否设置了connect函数,将其状态设置为SI_ST_REQ(是),表示有链接的需求;SI_ST_EST(否),connect函数不存在,那么简单的改变其状态为已连接。

若请求缓冲区的输出端已经关闭,则将请求缓冲区的消费者状态设置为SI_ST_CLO,并关闭读写。

[src/sessiom.c]process_session()
	/* we may have a pending connection request, or a connection waiting
	 * for completion.
	 */
	if (s->si[1].state >= SI_ST_REQ && s->si[1].state < SI_ST_CON) {
		do {
			/* nb: step 1 might switch from QUE to ASS, but we first want
			 * to give a chance to step 2 to perform a redirect if needed.
			 */
			if (s->si[1].state != SI_ST_REQ)
				sess_update_stream_int(s, &s->si[1]);
			if (s->si[1].state == SI_ST_REQ)
				sess_prepare_conn_req(s, &s->si[1]);

			if (s->si[1].state == SI_ST_ASS && s->srv &&
			    s->srv->rdr_len && (s->flags & SN_REDIRECTABLE))
				perform_http_redirect(s, &s->si[1]);
		} while (s->si[1].state == SI_ST_ASS);

对于要求连接但还没connectSESSION,现在要做的当然是与后台进行连接了。只有SI_ST_ASS状态的能够直接使用connect进行连接,其他状态则必须先找到一个SERVER,并获得了链接槽之后,状态编程SIST_ASS后才能链接。作者注释说,第一步应该是先将状态转换成ASS,再做链接;但是在这儿作者先给予第二步执行以使它根据需要能够产生一次重定向。先执行第二步在执行第一步会更快的产生重定向,能够使链接更快地建立。

[src/sessiom.c]process_session()->session_update_stream_int()
void sess_update_stream_int(struct session *s, struct stream_interface *si)
{
	DPRINTF(stderr,"[%u] %s: sess=%p rq=%p, rp=%p, exp(r,w)=%u,%u rqf=%08x rpf=%08x rql=%d rpl=%d cs=%d ss=%d\n",
		now_ms, __FUNCTION__,
		s,
		s->req, s->rep,
		s->req->rex, s->rep->wex,
		s->req->flags, s->rep->flags,
		s->req->l, s->rep->l, s->rep->cons->state, s->req->cons->state);

	if (si->state == SI_ST_ASS) {
		/* Server assigned to connection request, we have to try to connect now */
		int conn_err;

		conn_err = connect_server(s);
		if (conn_err == SN_ERR_NONE) {
			/* state = SI_ST_CON now */
			if (s->srv)
				srv_inc_sess_ctr(s->srv);
			return;
		}

对于状态为ASS状态,直接调用connect_server进行连接。如果链接没有出错,则更新对应SERVER的统计量。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()
int connect_server(struct session *s)
{
	int err;

	if (!(s->flags & SN_ADDR_SET)) {
		err = assign_server_address(s);
		if (err != SRV_STATUS_OK)
			return SN_ERR_INTERNAL;
	}

如果还没有设置服务端IP地址,那么调用assign_server_address进行设置。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
int assign_server_address(struct session *s)
{
#ifdef DEBUG_FULL
	fprintf(stderr,"assign_server_address : s=%p\n",s);
#endif

	if ((s->flags & SN_DIRECT) || (s->be->lbprm.algo & BE_LB_KIND)) {
		/* A server is necessarily known for this session */
		if (!(s->flags & SN_ASSIGNED))
			return SRV_STATUS_INTERNAL;

		s->srv_addr = s->srv->addr;

SN_DIRECT表示SESSION直接从http Cookie中找到对应的SERVER。如果是从Cookie找到的SERVER或者是负载均衡得到的SERVER,那么将SESSION的服务端地址设置为SESSION对应的SERVER的地址。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
		if (!s->srv_addr.sin_addr.s_addr) {
			/* if the server has no address, we use the same address
			 * the client asked, which is handy for remapping ports
			 * locally on multiple addresses at once.
			 */
			if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
				get_frt_addr(s);

			s->srv_addr.sin_addr = ((struct sockaddr_in *)&s->frt_addr)->sin_addr;
		}

如果SERVER没有IP地址,若后端PROXY没有设置透明IP,并且前端PROXY服务端的IP没有设置,那么调用get_frt_addr获取客户端最初始要求的目的地址。最后将获取到的地址作为SERVER的地址。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
		/* if this server remaps proxied ports, we'll use
		 * the port the client connected to with an offset. */
		if (s->srv->state & SRV_MAPPORTS) {
			if (!(s->be->options & PR_O_TRANSP) && !(s->flags & SN_FRT_ADDR_SET))
				get_frt_addr(s);
			if (s->frt_addr.ss_family == AF_INET) {
				s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
							     ntohs(((struct sockaddr_in *)&s->frt_addr)->sin_port));
			} else {
				s->srv_addr.sin_port = htons(ntohs(s->srv_addr.sin_port) +
							     ntohs(((struct sockaddr_in6 *)&s->frt_addr)->sin6_port));
			}
		}
	}

如果SERVER使用的是前端PROXY映射后的端口,那么将从get_frt_addr得到的端口进行映射。此处要求调用一次get_frt_addr,是由于之前的get_frt_addr是有条件才执行的。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
	else if (*(int *)&s->be->dispatch_addr.sin_addr) {
		/* connect to the defined dispatch addr */
		s->srv_addr = s->be->dispatch_addr;
	}

如果SERVER不是从Cookie与负载均衡得到的,那么若设置了重定向地址,那么将服务端地址设置为重定向地址。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
	else if (s->be->options & PR_O_TRANSP) {
		/* in transparent mode, use the original dest addr if no dispatch specified */
		if (!(s->flags & SN_FRT_ADDR_SET))
			get_frt_addr(s);

		memcpy(&s->srv_addr, &s->frt_addr, MIN(sizeof(s->srv_addr), sizeof(s->frt_addr)));
		/* when we support IPv6 on the backend, we may add other tests */
		//qfprintf(stderr, "Cannot get original server address.\n");
		//return SRV_STATUS_INTERNAL;

如果以上都不是,并且后端PROXY设置了透明代理模式,那么调用get_frt_addr获取源目的地址作为服务端地址。

 [src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
}
else if (s->be->options & PR_O_HTTP_PROXY) {
/* If HTTP PROXY option is set, then server is already assigned
* during incoming client request parsing. */

作者说明,如果后端PROXYHTTP的代理,那么在请求分析处理的时候就已经将服务端地址设置好了。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_server_address()
	}
	else {
		/* no server and no LB algorithm ! */
		return SRV_STATUS_INTERNAL;
	}

	s->flags |= SN_ADDR_SET;
	return SRV_STATUS_OK;
}

最后将SN_ADDR_SET告知其他部分服务端地址已经设置好了。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()
	if (!s->req->cons->connect)
		return SN_ERR_INTERNAL;

	assign_tproxy_address(s);
设置后端PROXY的源地址。
[src/bankend.c]process_session()->session_update_stream_int()->connect_server()->assign_tproxy_address()
static void assign_tproxy_address(struct session *s)
{
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
	if (s->srv != NULL && s->srv->state & SRV_BIND_SRC) {
		switch (s->srv->state & SRV_TPROXY_MASK) {
		case SRV_TPROXY_ADDR:
			s->from_addr = *(struct sockaddr_in *)&s->srv->tproxy_addr;
			break;
		case SRV_TPROXY_CLI:
		case SRV_TPROXY_CIP:
			/* FIXME: what can we do if the client connects in IPv6 ? */
			s->from_addr = *(struct sockaddr_in *)&s->cli_addr;
			break;
		case SRV_TPROXY_DYN:
			if (s->srv->bind_hdr_occ) {
				/* bind to the IP in a header */
				s->from_addr.sin_port = 0;
				s->from_addr.sin_addr.s_addr = htonl(get_ip_from_hdr2(&s->txn.req,
										s->srv->bind_hdr_name,
										s->srv->bind_hdr_len,
										&s->txn.hdr_idx,
										s->srv->bind_hdr_occ));
			}
			break;
		default:
			memset(&s->from_addr, 0, sizeof(s->from_addr));
		}
	}
	else if (s->be->options & PR_O_BIND_SRC) {
		switch (s->be->options & PR_O_TPXY_MASK) {
		case PR_O_TPXY_ADDR:
			s->from_addr = *(struct sockaddr_in *)&s->be->tproxy_addr;
			break;
		case PR_O_TPXY_CLI:
		case PR_O_TPXY_CIP:
			/* FIXME: what can we do if the client connects in IPv6 ? */
			s->from_addr = *(struct sockaddr_in *)&s->cli_addr;
			break;
		case PR_O_TPXY_DYN:
			if (s->be->bind_hdr_occ) {
				/* bind to the IP in a header */
				s->from_addr.sin_port = 0;
				s->from_addr.sin_addr.s_addr = htonl(get_ip_from_hdr2(&s->txn.req,
										s->be->bind_hdr_name,
										s->be->bind_hdr_len,
										&s->txn.hdr_idx,
										s->be->bind_hdr_occ));
			}
			break;
		default:
			memset(&s->from_addr, 0, sizeof(s->from_addr));
		}
	}
#endif
}

如果SERVER要求指定的源地址,那么根据设定设置好相应的IP地址(用于客户端bind);否则若后端PROXY要求指定的源地址,那么也根据设定设置好相应IP;若没有要求,那么什么都不做。

[src/bankend.c]process_session()->session_update_stream_int()->connect_server()
	err = s->req->cons->connect(s->req->cons, s->be, s->srv,
				    (struct sockaddr *)&s->srv_addr,
				    (struct sockaddr *)&s->from_addr);

	if (err != SN_ERR_NONE)
		return err;

	if (s->srv) {
		s->flags |= SN_CURR_SESS;
		s->srv->cur_sess++;
		if (s->srv->cur_sess > s->srv->counters.cur_sess_max)
			s->srv->counters.cur_sess_max = s->srv->cur_sess;
		if (s->be->lbprm.server_take_conn)
			s->be->lbprm.server_take_conn(s->srv);
	}

	return SN_ERR_NONE;  /* connection is OK */
}

接着调用请求缓冲区的消费者的connectr函数,在event_accept中可知,这是tcpv4_connect_server函数。如果出错,那么直接返回,否则将当前SESSION加入SERVERSESSION计数中,并更新一些统计量。

[src/sessiom.c]process_session()->session_update_stream_int()
		/* We have received a synchronous error. We might have to
		 * abort, retry immediately or redispatch.
		 */
		if (conn_err == SN_ERR_INTERNAL) {
			if (!si->err_type) {
				si->err_type = SI_ET_CONN_OTHER;
				si->err_loc  = s->srv;
			}

			if (s->srv)
				srv_inc_sess_ctr(s->srv);
			if (s->srv)
				s->srv->counters.failed_conns++;
			s->be->counters.failed_conns++;

			/* release other sessions waiting for this server */
			sess_change_server(s, NULL);
			if (may_dequeue_tasks(s->srv, s->be))
				process_srv_queue(s->srv);

			/* Failed and not retryable. */
			si->shutr(si);
			si->shutw(si);
			si->ob->flags |= BF_WRITE_ERROR;

			s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);

			/* no session was ever accounted for this server */
			si->state = SI_ST_CLO;
			if (s->srv_error)
				s->srv_error(s, si);
			return;
		}

链接失败,并且错误为内部错误,那么更新统计量,并将当前链接槽释放掉,再对SERVER相关的等待SESSION进行一次调度。关闭请求缓冲的读写,将状态设置为SI_ST_CLO

[src/sessiom.c]process_session()->session_update_stream_int()
		/* We are facing a retryable error, but we don't want to run a
		 * turn-around now, as the problem is likely a source port
		 * allocation problem, so we want to retry now.
		 */
		si->state = SI_ST_CER;
		si->flags &= ~SI_FL_ERR;
		sess_update_st_cer(s, si);
		/* now si->state is one of SI_ST_CLO, SI_ST_TAR, SI_ST_ASS, SI_ST_REQ */
		return;
	}

错误不是内部错误,那么将状态设置为SI_ST_CER,并调用sess_update_st_cer进行检查是否重连,重定向,以及进行调度。

[src/sessiom.c]process_session()->session_update_stream_int()
	else if (si->state == SI_ST_QUE) {
		/* connection request was queued, check for any update */
		if (!s->pend_pos) {
			/* The connection is not in the queue anymore. Either
			 * we have a server connection slot available and we
			 * go directly to the assigned state, or we need to
			 * load-balance first and go to the INI state.
			 */
			si->exp = TICK_ETERNITY;
			if (unlikely(!(s->flags & SN_ASSIGNED)))
				si->state = SI_ST_REQ;
			else {
				s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
				si->state = SI_ST_ASS;
			}
			return;
		}

如果stream interface的状态为SI_ST_QUE,若对应的SESSION已经不再等待队列中,那么有两种可能,一是已经获得的链接槽,但是状态还没改变,那么可以简单的修改其状态为SI_ST_ASS;另一种是进行重定向了,那么将其状态改为SI_ST_REQ(要求连接状态)

[src/sessiom.c]process_session()->session_update_stream_int()
		/* Connection request still in queue... */
		if (si->flags & SI_FL_EXP) {
			/* ... and timeout expired */
			si->exp = TICK_ETERNITY;
			s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
			if (s->srv)
				s->srv->counters.failed_conns++;
			s->be->counters.failed_conns++;
			si->shutr(si);
			si->shutw(si);
			si->ob->flags |= BF_WRITE_TIMEOUT;
			if (!si->err_type)
				si->err_type = SI_ET_QUEUE_TO;
			si->state = SI_ST_CLO;
			if (s->srv_error)
				s->srv_error(s, si);
			return;
		}

运行至此,表示SESSION还在队列中等待链接槽,若此时超时已经发生,那么记录在队列中等待的时候,更新统计量,关闭请求缓冲区的读写。将状态设置为SI_ST_CLO,错误类型置为SI_ET_QUEUE_TO,表示在等待链接槽时超时了。

[src/sessiom.c]process_session()->session_update_stream_int()
		/* Connection remains in queue, check if we have to abort it */
		if ((si->ob->flags & (BF_READ_ERROR)) ||
		    ((si->ob->flags & BF_SHUTW_NOW) &&   /* empty and client aborted */
		     (si->ob->flags & BF_OUT_EMPTY || s->be->options & PR_O_ABRT_CLOSE))) {
			/* give up */
			si->exp = TICK_ETERNITY;
			s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
			si->shutr(si);
			si->shutw(si);
			si->err_type |= SI_ET_QUEUE_ABRT;
			si->state = SI_ST_CLO;
			if (s->srv_error)
				s->srv_error(s, si);
			return;
		}

		/* Nothing changed */
		return;
	}

运行至此,表示还在等待链接槽,但是超时没发生。那么如果客户端的读取出错,或者(请求缓冲区的输出端准备关闭了并且(请求缓冲区中没有数据或者后端PROXY设置了忽略请求在关闭的时候)),那么关闭stream interface[1]的读和写,将状态设置为SI_ST_CLO,将错误类型设置为SI_ET_QUEUE_ABRT,表示在等待链接槽时被遗弃。

最后的情况时仍然在等待,但是没有超时发生并且也没有被抛弃掉,那么不作任何改变。

[src/sessiom.c]process_session()->session_update_stream_int()
	else if (si->state == SI_ST_TAR) {
		/* Connection request might be aborted */
		if ((si->ob->flags & (BF_READ_ERROR)) ||
		    ((si->ob->flags & BF_SHUTW_NOW) &&  /* empty and client aborted */
		     (si->ob->flags & BF_OUT_EMPTY || s->be->options & PR_O_ABRT_CLOSE))) {
			/* give up */
			si->exp = TICK_ETERNITY;
			si->shutr(si);
			si->shutw(si);
			si->err_type |= SI_ET_CONN_ABRT;
			si->state = SI_ST_CLO;
			if (s->srv_error)
				s->srv_error(s, si);
			return;
		}

		if (!(si->flags & SI_FL_EXP))
			return;  /* still in turn-around */

		si->exp = TICK_ETERNITY;

		/* we keep trying on the same server as long as the session is
		 * marked "assigned".
		 * FIXME: Should we force a redispatch attempt when the server is down ?
		 */
		if (s->flags & SN_ASSIGNED)
			si->state = SI_ST_ASS;
		else
			si->state = SI_ST_REQ;
		return;
	}
}

如果是等待一小段时间再连接。

那么若前端客户端连接出错,或者(请求缓冲区的输出端准备关闭了并且(请求缓冲区中没有数据或者后端PROXY设置了忽略请求在关闭的时候)),那么关闭stream interface[1]的读和写,将状态设置为SI_ST_CLO,将错误类型设置为SI_ET_CONN_ABRT,表示在链接建立过程中被遗弃。

否则的话检查等待时间是否已经到达,如果还没到达,直接返回,仍然等待着。

最后,超时时间已到,如果SESSION已经获得过链接槽,则将stream interface状态设置为ASS;否则设置为要求连接。

[src/sessiom.c]process_session()->sess_prepare_conn_req()
static void sess_prepare_conn_req(struct session *s, struct stream_interface *si) {
	DPRINTF(stderr,"[%u] %s: sess=%p rq=%p, rp=%p, exp(r,w)=%u,%u rqf=%08x rpf=%08x rql=%d rpl=%d cs=%d ss=%d\n",
		now_ms, __FUNCTION__,
		s,
		s->req, s->rep,
		s->req->rex, s->rep->wex,
		s->req->flags, s->rep->flags,
		s->req->l, s->rep->l, s->rep->cons->state, s->req->cons->state);

	if (si->state != SI_ST_REQ)
		return;

	/* Try to assign a server */
	if (srv_redispatch_connect(s) != 0) {
		/* We did not get a server. Either we queued the
		 * connection request, or we encountered an error.
		 */
		if (si->state == SI_ST_QUE)
			return;

		/* we did not get any server, let's check the cause */
		si->shutr(si);
		si->shutw(si);
		si->ob->flags |= BF_WRITE_ERROR;
		if (!si->err_type)
			si->err_type = SI_ET_CONN_OTHER;
		si->state = SI_ST_CLO;
		if (s->srv_error)
			s->srv_error(s, si);
		return;
	}

	/* The server is assigned */
	s->logs.t_queue = tv_ms_elapsed(&s->logs.tv_accept, &now);
	si->state = SI_ST_ASS;
}

如果状态不是SI_ST_REQ,那么直接返回。

调用srv_redispatch_connect选择合适的SERVER,如果选择失败,并且状态变为SI_ST_QUE,那么直接返回。否则关闭stream interface[1]的读和写,将状态设置为SI_ST_CLO

最后,对于成功的获取到链接槽的,将其状态设置为ASS,并记录了其在队列中的延迟。

[src/backend.c]process_session()->sess_prepare_conn_req()->srv_redispatch_connect()
int srv_redispatch_connect(struct session *t)
{
	int conn_err;

	/* We know that we don't have any connection pending, so we will
	 * try to get a new one, and wait in this state if it's queued
	 */
 redispatch:
	conn_err = assign_server_and_queue(t);
	switch (conn_err) {
	case SRV_STATUS_OK:
		break;

SESSION选择一个SERVER并获取链接槽,或者由于SERVER的链接已经满了,因此将SESSION入队。如果正确的指定了SERVER并获取到链接槽,那么任务就完成了。对于assign_server_and_queue()不做说明,其内部调用了assign_server(),后者是根据负载均衡算法调用不同的函数来选择正确的SERVER

[src/backend.c]process_session()->sess_prepare_conn_req()->srv_redispatch_connect()
	case SRV_STATUS_FULL:
		/* The server has reached its maxqueue limit. Either PR_O_REDISP is set
		 * and we can redispatch to another server, or it is not and we return
		 * 503. This only makes sense in DIRECT mode however, because normal LB
		 * algorithms would never select such a server, and hash algorithms
		 * would bring us on the same server again. Note that t->srv is set in
		 * this case.
		 */
		if (((t->flags & (SN_DIRECT|SN_FORCE_PRST)) == SN_DIRECT) &&
		    (t->be->options & PR_O_REDISP)) {
			t->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
			t->prev_srv = t->srv;
			goto redispatch;
		}

		if (!t->req->cons->err_type) {
			t->req->cons->err_type = SI_ET_QUEUE_ERR;
			t->req->cons->err_loc = t->srv;
		}

		t->srv->counters.failed_conns++;
		t->be->counters.failed_conns++;
		return 1;

对于SERVER链接已满,并且等待队列也满的情况,如果SERVER的来源是Cookie并且没有强制保持链接,而且后端PROXY允许重定向,那么去掉与SESSION中与Cookie得到SERVER相关的所有标志位。转回去重新进行一次负载均衡。此处的重定向只针对由Cookie指定SERVER的情况,对于之前就是负载均衡获得的SERVER,那么直接计入错误。

[src/backend.c]process_session()->sess_prepare_conn_req()->srv_redispatch_connect()
	case SRV_STATUS_NOSRV:
		/* note: it is guaranteed that t->srv == NULL here */
		if (!t->req->cons->err_type) {
			t->req->cons->err_type = SI_ET_CONN_ERR;
			t->req->cons->err_loc = NULL;
		}

		t->be->counters.failed_conns++;
		return 1;

如果后端PROXY发现没有可用的SERVER,那么这个请求也是失败了。

[src/backend.c]process_session()->sess_prepare_conn_req()->srv_redispatch_connect()
	case SRV_STATUS_QUEUED:
		t->req->cons->exp = tick_add_ifset(now_ms, t->be->timeout.queue);
		t->req->cons->state = SI_ST_QUE;
		/* do nothing else and do not wake any other session up */
		return 1;

对于SESSION已经被入队的情况,对其设置一个超时时间,然后将stream interface[1]状态修改为SI_ST_QUE。表示SESSION已经在队列中等待了。那么之后此SESSION可能被处理,可能由于客户端关闭而被抛弃,或者被要求重新调度。

[src/backend.c]process_session()->sess_prepare_conn_req()->srv_redispatch_connect()
	case SRV_STATUS_INTERNAL:
	default:
		if (!t->req->cons->err_type) {
			t->req->cons->err_type = SI_ET_CONN_OTHER;
			t->req->cons->err_loc = t->srv;
		}

		if (t->srv)
			srv_inc_sess_ctr(t->srv);
		if (t->srv)
			t->srv->counters.failed_conns++;
		t->be->counters.failed_conns++;

		/* release other sessions waiting for this server */
		if (may_dequeue_tasks(t->srv, t->be))
			process_srv_queue(t->srv);
		return 1;
	}
	/* if we get here, it's because we got SRV_STATUS_OK, which also
	 * means that the connection has not been queued.
	 */
	return 0;
}

对于内部错误和默认情况,也是属于失败,因此需要更新统计量。由于队列没有满(若队列已经满,那么会落入SRV_STATUS_FULL分支),那么其他等待着的SESSION可能能够成功的获得槽位,因此做一次调度。

[src/sessiom.c]process_session()
		/* Now we can add the server name to a header (if requested) */
		/* check for HTTP mode and proxy server_name_hdr_name != NULL */
		if ((s->flags & SN_BE_ASSIGNED) &&
			(s->be->mode == PR_MODE_HTTP) &&
			(s->be->server_id_hdr_name != NULL)) {

			http_send_name_header(&s->txn,
					      &s->txn.req,
					      s->req,
					      s->be,
					      s->srv->id);
		}
	}

如果正确的获取槽位,若代理的模式是HTTP,并且后端PROXY指定了SERVERid,那么设置相应的Cookie以使相应的http session能够得到保持,这儿是对应服务端设置Cookie用以保持http session;对于后续请求或缺指定SERVER是在对请求缓冲区的分析处理过程中完成的。

在此,对上面连接以及选择SERVER的过程做一下总结。首先,要与后端连接的stream interface的状态必须为[REQ,CON]区间之内。

其中只有ASS状态会调用connect函数,对于connect出错原因为内部错的情况,此SESSION会被终止,并且在释放链接槽的时候会做一次调度以使其他的SESSION能够得到处理。

QUE状态的SESSION,如果其当前已不再队列中了,那么它有两种去处,以使已得到链接槽,即将变为ASS状态;另一种是要求重新分配SERVER,状态为SI_ST_REQ。如果SESSION仍然在排队,并且其超时时间已经到了,那么SESSION被终止,错误类型为等待链接槽时发生超时。如果SESSIONH仍然在队列中,且超时没有发生,如果客户端已经关闭或者出错,或者满足被抛弃的要求,那么将其抛弃掉,也就是SESSION被终止,错误类型等待时被抛弃。

TAR状态的SESSION,如果客户端已关闭或者出错,或者满足被抛弃的条件,那么SESSION被终止,请求被抛弃,错误类型为链接建立过程中被迫弃。如果SESSION正常,并且超时还没有发生,那么仍然等待着。对于等待事件已经到的情况,根据SESSION是否已经获得链接槽将stream interface的状态设置为SI_ST_ASSSI_ST_REQ

REQ状态的SESSION,是要求与后端进行连接的状态,它要求分配一个链接槽。

分配链接槽时,如果指定的SERVER可连接数与等待队列均已满的话,若当前SESSIONSERVER是从Cookie获取,并且允许重定向,那么重新使用负载均衡做一次SERVER的选择,否则记录错误,并终止本SESSION。如果后端PROXY发现已经没有运行着的SERVER,那么这也是直接终止掉本SESSION。如果SESSION已经被入队,那么设置在队列中等待的超时时间,然后将状态更改为SI_ST_QUE。对于其他错误,SESSION均被终止,并且会连带着会在对应SERVER上对等待链接槽的SESSION进行一次调度。

SESSION获取到链接槽之后,若当前后端代理方式是http,那么会设置Cookie来保持http session

[src/sessiom.c]process_session()
	/* Benchmarks have shown that it's optimal to do a full resync now */
	if (s->req->prod->state == SI_ST_DIS || s->req->cons->state == SI_ST_DIS)
		goto resync_stream_interface;

	/* otherwise wewant to check if we need to resync the req buffer or not */
	if ((s->req->flags ^ rqf_last) & BF_MASK_STATIC)
		goto resync_request;

	/* perform output updates to the response buffer */

	/* If noone is interested in analysing data, it's time to forward
	 * everything. We configure the buffer to forward indefinitely.
	 */
	if (!s->rep->analysers &&
	    !(s->rep->flags & (BF_HIJACK|BF_SHUTW|BF_SHUTW_NOW)) &&
	    (s->rep->prod->state >= SI_ST_EST) &&
	    (s->rep->to_forward != BUF_INFINITE_FORWARD)) {
		/* This buffer is freewheeling, there's no analyser nor hijacker
		 * attached to it. If any data are left in, we'll permit them to
		 * move.
		 */
		buffer_auto_read(s->rep);
		buffer_auto_close(s->rep);
		buffer_flush(s->rep);
		if (!(s->rep->flags & (BF_SHUTR|BF_SHUTW|BF_SHUTW_NOW)))
			buffer_forward(s->rep, BUF_INFINITE_FORWARD);
	}

	/* check if it is wise to enable kernel splicing to forward response data */
	if (!(s->rep->flags & (BF_KERN_SPLICING|BF_SHUTR)) &&
	    s->rep->to_forward &&
	    (global.tune.options & GTUNE_USE_SPLICE) &&
	    (s->si[0].flags & s->si[1].flags & SI_FL_CAP_SPLICE) &&
	    (pipes_used < global.maxpipes) &&
	    (((s->fe->options2|s->be->options2) & PR_O2_SPLIC_RTR) ||
	     (((s->fe->options2|s->be->options2) & PR_O2_SPLIC_AUT) &&
	      (s->rep->flags & BF_STREAMER_FAST)))) {
		s->rep->flags |= BF_KERN_SPLICING;
	}

	/* reflect what the L7 analysers have seen last */
	rpf_last = s->rep->flags;

	/*
	 * Now forward all shutdown requests between both sides of the buffer
	 */

	/*
	 * FIXME: this is probably where we should produce error responses.
	 */

	/* first, let's check if the response buffer needs to shutdown(write) */
	if (unlikely((s->rep->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK|BF_AUTO_CLOSE|BF_SHUTR)) ==
		     (BF_AUTO_CLOSE|BF_SHUTR)))
		buffer_shutw_now(s->rep);

	/* shutdown(write) pending */
	if (unlikely((s->rep->flags & (BF_SHUTW|BF_OUT_EMPTY|BF_SHUTW_NOW)) == (BF_OUT_EMPTY|BF_SHUTW_NOW)))
		s->rep->cons->shutw(s->rep->cons);

	/* shutdown(write) done on the client side, we must stop the server too */
	if (unlikely((s->rep->flags & (BF_SHUTW|BF_SHUTR|BF_SHUTR_NOW)) == BF_SHUTW) &&
	    !s->rep->analysers)
		buffer_shutr_now(s->rep);

	/* shutdown(read) pending */
	if (unlikely((s->rep->flags & (BF_SHUTR|BF_SHUTR_NOW)) == BF_SHUTR_NOW))
		s->rep->prod->shutr(s->rep->prod);

	if (s->req->prod->state == SI_ST_DIS || s->req->cons->state == SI_ST_DIS)
		goto resync_stream_interface;

	if (s->req->flags != rqf_last)
		goto resync_request;

	if ((s->rep->flags ^ rpf_last) & BF_MASK_STATIC)
		goto resync_response;

上面这部分代码与对请求缓冲区的处理类似。主要功能就是允许数据流动而不触发可能由I/O而被唤醒任务的唤醒,并且根据条件指定是否使用内核提供的用于性能优化的系统调用来推动数据的流动。

[src/sessiom.c]process_session()
	/* we're interested in getting wakeups again */
	s->req->prod->flags &= ~SI_FL_DONT_WAKE;
	s->req->cons->flags &= ~SI_FL_DONT_WAKE;

现在允许本SESSION对应的TASKstream interface的相关函数被调用时被再次唤醒。

[src/sessiom.c]process_session()
	/* This is needed only when debugging is enabled, to indicate
	 * client-side or server-side close. Please note that in the unlikely
	 * event where both sides would close at once, the sequence is reported
	 * on the server side first.
	 */
	if (unlikely((global.mode & MODE_DEBUG) &&
		     (!(global.mode & MODE_QUIET) ||
		      (global.mode & MODE_VERBOSE)))) {
		int len;

		if (s->si[1].state == SI_ST_CLO &&
		    s->si[1].prev_state == SI_ST_EST) {
			len = sprintf(trash, "%08x:%s.srvcls[%04x:%04x]\n",
				      s->uniq_id, s->be->id,
				      (unsigned short)s->si[0].fd,
				      (unsigned short)s->si[1].fd);
			if (write(1, trash, len) < 0) /* shut gcc warning */;
		}

		if (s->si[0].state == SI_ST_CLO &&
		    s->si[0].prev_state == SI_ST_EST) {
			len = sprintf(trash, "%08x:%s.clicls[%04x:%04x]\n",
				      s->uniq_id, s->be->id,
				      (unsigned short)s->si[0].fd,
				      (unsigned short)s->si[1].fd);
			if (write(1, trash, len) < 0) /* shut gcc warning */;
		}
	}

	if (likely((s->rep->cons->state != SI_ST_CLO) ||
		   (s->req->cons->state > SI_ST_INI && s->req->cons->state < SI_ST_CLO))) {

		if ((s->fe->options & PR_O_CONTSTATS) && (s->flags & SN_BE_ASSIGNED))
			session_process_counters(s);

		if (s->rep->cons->state == SI_ST_EST && !s->rep->cons->iohandler)
			s->rep->cons->update(s->rep->cons);

		if (s->req->cons->state == SI_ST_EST && !s->req->cons->iohandler)
			s->req->cons->update(s->req->cons);

		s->req->flags &= ~(BF_READ_NULL|BF_READ_PARTIAL|BF_WRITE_NULL|BF_WRITE_PARTIAL);
		s->rep->flags &= ~(BF_READ_NULL|BF_READ_PARTIAL|BF_WRITE_NULL|BF_WRITE_PARTIAL);
		s->si[0].prev_state = s->si[0].state;
		s->si[1].prev_state = s->si[1].state;
		s->si[0].flags &= ~(SI_FL_ERR|SI_FL_EXP);
		s->si[1].flags &= ~(SI_FL_ERR|SI_FL_EXP);

		/* Trick: if a request is being waiting for the server to respond,
		 * and if we know the server can timeout, we don't want the timeout
		 * to expire on the client side first, but we're still interested
		 * in passing data from the client to the server (eg: POST). Thus,
		 * we can cancel the client's request timeout if the server's
		 * request timeout is set and the server has not yet sent a response.
		 */

		if ((s->rep->flags & (BF_AUTO_CLOSE|BF_SHUTR)) == 0 &&
		    (tick_isset(s->req->wex) || tick_isset(s->rep->rex))) {
			s->req->flags |= BF_READ_NOEXP;
			s->req->rex = TICK_ETERNITY;
		}

		/* Call the second stream interface's I/O handler if it's embedded.
		 * Note that this one may wake the task up again.
		 */
		if (s->req->cons->iohandler) {
			s->req->cons->iohandler(s->req->cons);
			if (task_in_rq(t)) {
				/* If we woke up, we don't want to requeue the
				 * task to the wait queue, but rather requeue
				 * it into the runqueue ASAP.
				 */
				t->expire = TICK_ETERNITY;
				return t;
			}
		}

此处如果请求缓冲区的消费者设置了iohandler,那么将会调用它,然后若任务又被调度到runqueue中,那么将其超时时间设置为0.,然后返回任务。在任务返回之后,下一个被调度的有可能还是本任务,也就是说它又重新再运行一次。下一节在讲任务调度时在具体讲。

[src/sessiom.c]process_session()
		t->expire = tick_first(tick_first(s->req->rex, s->req->wex),
				       tick_first(s->rep->rex, s->rep->wex));
		if (s->req->analysers)
			t->expire = tick_first(t->expire, s->req->analyse_exp);

		if (s->si[0].exp)
			t->expire = tick_first(t->expire, s->si[0].exp);

		if (s->si[1].exp)
			t->expire = tick_first(t->expire, s->si[1].exp);

任务的下一次超时时间被设置为请求响应缓冲区的读写超时,请求缓冲区的分析处理超时,stream interface[0]和stream interface[1]超时时间中最小的。

[src/sessiom.c]process_session()
#ifdef DEBUG_FULL
		fprintf(stderr,
			"[%u] queuing with exp=%u req->rex=%u req->wex=%u req->ana_exp=%u"
			" rep->rex=%u rep->wex=%u, si[0].exp=%u, si[1].exp=%u, cs=%d, ss=%d\n",
			now_ms, t->expire, s->req->rex, s->req->wex, s->req->analyse_exp,
			s->rep->rex, s->rep->wex, s->si[0].exp, s->si[1].exp, s->si[0].state, s->si[1].state);
#endif

#ifdef DEBUG_DEV
		/* this may only happen when no timeout is set or in case of an FSM bug */
		if (!tick_isset(t->expire))
			ABORT_NOW();
#endif
		return t; /* nothing more to do */
	}

最后返回当前任务,这是在stream interface[1]的状态在SI_ST_INI和SI_ST_CLO之间时会做的,那么如果不在这个范围之内的状态,将会进入以下代码。

[src/sessiom.c]process_session()
	s->fe->feconn--;
	if (s->flags & SN_BE_ASSIGNED)
		s->be->beconn--;
	actconn--;
	s->listener->nbconn--;
	if (s->listener->state == LI_FULL &&
	    s->listener->nbconn < s->listener->maxconn) {
		/* we should reactivate the listener */
		EV_FD_SET(s->listener->fd, DIR_RD);
		s->listener->state = LI_READY;
	}

	if (unlikely((global.mode & MODE_DEBUG) &&
		     (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
		int len;
		len = sprintf(trash, "%08x:%s.closed[%04x:%04x]\n",
			      s->uniq_id, s->be->id,
			      (unsigned short)s->req->prod->fd, (unsigned short)s->req->cons->fd);
		if (write(1, trash, len) < 0) /* shut gcc warning */;
	}

	s->logs.t_close = tv_ms_elapsed(&s->logs.tv_accept, &now);
	session_process_counters(s);

	if (s->txn.status) {
		int n;

		n = s->txn.status / 100;
		if (n < 1 || n > 5)
			n = 0;

		if (s->fe->mode == PR_MODE_HTTP)
			s->fe->counters.fe.http.rsp[n]++;

		if ((s->flags & SN_BE_ASSIGNED) &&
		    (s->be->mode == PR_MODE_HTTP))
			s->be->counters.be.http.rsp[n]++;
	}

	/* let's do a final log if we need it */
	if (s->logs.logwait &&
	    !(s->flags & SN_MONITOR) &&
	    (!(s->fe->options & PR_O_NULLNOLOG) || s->req->total)) {
		s->do_log(s);
	}

	/* the task MUST not be in the run queue anymore */
	session_free(s);
	task_delete(t);
	task_free(t);
	return NULL;
}

这部分主要是修正相应的统计量,还有就是由于本次SESSION被终止,如果之前LISTENER的状态为LI_FULL,那么现在数量降下来了,则可以将其状态修改为LI_READY以接受新的连接请求。最后就是将与本SESSION相关的资源释放掉。


 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值