android dns之dns服务器可用性机制,成功率低于门限则惩罚抑制(二)

1.3.3 rcode(dns response code)

bionic\libc\include\arpa\nameser_compat.h

#define NOERROR		ns_r_noerror
#define FORMERR		ns_r_formerr
#define SERVFAIL	ns_r_servfail
#define NXDOMAIN	ns_r_nxdomain
#define NOTIMP		ns_r_notimpl
#define REFUSED		ns_r_refused
#define YXDOMAIN	ns_r_yxdomain
#define YXRRSET		ns_r_yxrrset
#define NXRRSET		ns_r_nxrrset
#define NOTAUTH		ns_r_notauth
#define NOTZONE		ns_r_notzone

bionic\libc\include\arpa\nameser.h

/*
 * Currently defined response codes.
 */
typedef	enum __ns_rcode {
	ns_r_noerror = 0,	/* No error occurred. */
	ns_r_formerr = 1,	/* Format error. */
	ns_r_servfail = 2,	/* Server failure. */
	ns_r_nxdomain = 3,	/* Name error. */
	ns_r_notimpl = 4,	/* Unimplemented. */
	ns_r_refused = 5,	/* Operation refused. */
	/* these are for BIND_UPDATE */
	ns_r_yxdomain = 6,	/* Name exists */
	ns_r_yxrrset = 7,	/* RRset exists */
	ns_r_nxrrset = 8,	/* RRset does not exist */
	ns_r_notauth = 9,	/* Not authoritative for zone */
	ns_r_notzone = 10,	/* Zone of record different from zone section */
	ns_r_max = 11,
	/* The following are EDNS extended rcodes */
	ns_r_badvers = 16,
	/* The following are TSIG errors */
	ns_r_badsig = 16,
	ns_r_badkey = 17,
	ns_r_badtime = 18
} ns_rcode;

1.3.4 dns成功率低于75%,则设置该dns服务器为不可用(都不可用时则取消可用性限制)

1.3.4.1 检查1,样本集大于最小样本门限min_samples,且dns失败或超时次数大于0
1.3.4.2 检查2,dns server上成功率低于目标门限,默认成功率门限75%,success_threshold
1.3.4.3 检查3,dns sample统计样本集没有过期,默认过期时间半小时,sample_validity
bool
_res_stats_usable_server(const struct __res_params* params, struct __res_stats* stats) {
    int successes = -1;
    int errors = -1;
    int timeouts = -1;
    int internal_errors = -1;
    int rtt_avg = -1;
    time_t last_sample_time = 0;
    android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors,
            &rtt_avg, &last_sample_time);
    if (successes >= 0 && errors >= 0 && timeouts >= 0) {
        int total = successes + errors + timeouts;
        if (DBG) {
            async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "NS stats: S %d + E %d + T %d + I %d "
                 "= %d, rtt = %d, min_samples = %d\n", successes, errors, timeouts, internal_errors,
                 total, rtt_avg, params->min_samples);
        }
        if (total >= params->min_samples && (errors > 0 || timeouts > 0)) {
            int success_rate = successes * 100 / total;
            if (DBG) {
                async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "success rate %d%%\n",
                                      success_rate);
            }
            //dns成功率是否地域75%?
            if (success_rate < params->success_threshold) { 
                // evNowTime() is used here instead of time() to stay consistent with the rest of
                // the code base
                time_t now = evNowTime().tv_sec;
                //sample样本集是否已过期,默认过期时间半小时 
                //(过期时间计算:当前时间 减去 最新一个统计sample时间,是否大于 过期门限)
                if (now - last_sample_time > params->sample_validity) {
                    // Note: It might be worth considering to expire old servers after their expiry
                    // date has been reached, however the code for returning the ring buffer to its
                    // previous non-circular state would induce additional complexity.
                    if (DBG) {
                        async_safe_format_log(ANDROID_LOG_INFO, "libc",
                            "samples stale, retrying server\n");
                    }
                    _res_stats_clear_samples(stats);
                } else {
                    if (DBG) {
                        async_safe_format_log(ANDROID_LOG_INFO, "libc",
                            "too many resolution errors, ignoring server\n");
                    }
                    return 0;
                }
            }
        }
    }
    return 1;
}
1.3.4.4 last_sample_time,表示某网络下某dns server的最新一次dns记录的时间
/* Aggregates the reachability statistics for the given server based on on the stored samples. */
void
android_net_res_stats_aggregate(struct __res_stats* stats, int* successes, int* errors,
        int* timeouts, int* internal_errors, int* rtt_avg, time_t* last_sample_time)
{
    int s = 0;   // successes
    int e = 0;   // errors
    int t = 0;   // timouts
    int ie = 0;  // internal errors
    long rtt_sum = 0;
    time_t last = 0;
    int rtt_count = 0;
    for (int i = 0 ; i < stats->sample_count ; ++i) {
        // Treat everything as an error that the code in send_dg() already considers a
        // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN
        // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section
        // is not treated as an error here either. FORMERR seems to sometimes be returned by
        // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses
        // as an indication of a broken server is unclear, though. For now treat such responses,
        // as well as unknown codes as errors.
        switch (stats->samples[i].rcode) {
        case NOERROR:
        case NOTAUTH:
        case NXDOMAIN:
            ++s;
            rtt_sum += stats->samples[i].rtt;
            ++rtt_count;
            break;
        case RCODE_TIMEOUT:
            ++t;
            break;
        case RCODE_INTERNAL_ERROR:
            ++ie;
            break;
        case SERVFAIL:
        case NOTIMP:
        case REFUSED:
        default:
            ++e;
            break;
        }
    }
    *successes = s;
    *errors = e;
    *timeouts = t;
    *internal_errors = ie;
    /* If there was at least one successful sample, calculate average RTT. */
    if (rtt_count) {
        *rtt_avg = rtt_sum / rtt_count;
    } else {
        *rtt_avg = -1;
    }
    /* If we had at least one sample, populate last sample time. */
    if (stats->sample_count > 0) {
    //预防sample_next 自增到上限后可能反转为0,一旦反转后,sample_next为最新时间
        if (stats->sample_next > 0) {  
            last = stats->samples[stats->sample_next - 1].at;
        } else {
            last = stats->samples[stats->sample_count - 1].at;
        }
    }
    *last_sample_time = last;
}

1.3.5 dns首次请求完成后,计算dns可用性统计信息sample,从全局cache链中找到该网络netid的dns cache,将sample刷新到该网络的cache对应的dns server名下。

int
res_nsend(res_state statp,
	  const u_char *buf, int buflen, u_char *ans, int anssiz)
{
		.......
			n = send_dg(statp, buf, buflen, ans, anssiz, &terrno,
				    ns, &v_circuit, &gotsomewhere, &now, &rcode, &delay, timeouttype);
			//AR000AT6PU liuheng 0625 get type &name from buffer end

			/* Only record stats the first time we try a query. See above. */
			if (try == 0) {
				struct __res_sample sample;
				_res_stats_set_sample(&sample, now, rcode, delay);
				_resolv_cache_add_resolver_stats_sample(statp->netid, revision_id,
					ns, &sample, params.max_samples);
			}
1.3.5.1 sample_next自增,用来作为下一个sample存入到数组时的索引
/* Resolver reachability statistics. */
static void
_res_cache_add_stats_sample_locked(struct __res_stats* stats, const struct __res_sample* sample,
        int max_samples) {
    // Note: This function expects max_samples > 0, otherwise a (harmless) modification of the
    // allocated but supposedly unused memory for samples[0] will happen
    XLOG("%s: adding sample to stats, next = %d, count = %d", __FUNCTION__,
            stats->sample_next, stats->sample_count);
    stats->samples[stats->sample_next] = *sample;
    if (stats->sample_count < max_samples) {
        ++stats->sample_count;
    }
    //sample_next自增,用来作为下一个sample存入到数组时的索引
    if (++stats->sample_next >= max_samples) {
        stats->sample_next = 0;
    }
}
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值