温控daemon(七)ss算法

monitor算法是一种静态算法,超过阈值就限制为多少频率,或者做什么处理,ss不一样它是一种动态算法。

下面我们就从代码层面进行分析。也是在main函数中调用了ss_algo_init函数。

1. ss_algo_init

我们来看下这个函数的前半段函数,这里主要先是过滤配置中的setting,然后创建sensor的device的client,然后就是获取devie信息,还有就是将device的各个档位的信息lvl_info放入每个setting的lvl_arr中了。

int ss_algo_init(struct thermal_setting_t *setting)
{
	int ret_val = 0;
	struct device_info dev_info;
	struct setting_info *cfg;
	int err = 0;
	uint8_t i = 0;

	if (setting == NULL) {
		msg("%s: Invalid Argument\n", __func__);
		return -(EINVAL);
	}

	sem_init(&sem_config_set, 0, 1);
	cfg = setting->list;
	/* Import settings */
	while (cfg) {
		err = 0;
		if (clnt_cnt >= MAX_INSTANCES_SUPPORTED) {//最大根数限制
			msg("%s: Max SS instances reached.", __func__);
			break;
		}

		if (cfg->algo_type != SS_ALGO_TYPE) {//类型过滤
			cfg = cfg->next;
			continue;
		}

		if (cfg->err_disable) {//错误过滤
			info("%s: Entry Disabled %s.", __func__,
			    cfg->desc);
			cfg = cfg->next;
			continue;
		}

		if (cfg->data.ss.sampling_period_ms == 0) {//采样率没有配置
			msg("%s: Sampling is not configured, "
			    "disabling entry %s", __func__,  cfg->desc);
			continue;
		}

		algo_clnt[clnt_cnt].sensor_clnt =//为每一个setting创建一个sensor的client
			sensors_manager_reg_clnt(cfg->data.ss.sensor);
		if (algo_clnt[clnt_cnt].sensor_clnt == NULL) {
			msg("%s: Sensor clnt create fail %s\n", __func__,
			    cfg->data.ss.sensor);
			err = EFAULT;
			goto error_handler;
		}

		algo_clnt[clnt_cnt].dev_clnt =//为每个setting创建device的client,这里不像monitor算法,只有有一个device
			devices_manager_reg_clnt(cfg->data.ss.device);
		if (algo_clnt[clnt_cnt].dev_clnt == NULL) {
			msg("%s: Device clnt create fail %s\n", __func__,
			    cfg->data.ss.device);
			err = EFAULT;
			goto error_handler;
		}

		err = devices_manager_get_info(cfg->data.ss.device,
					       &dev_info);//获取device的device_info
		if (err) {
			msg("%s: Can not grab dev info %s\n", __func__,
			    cfg->data.ss.device);
			goto error_handler;
		}

		if (dev_info.dev_type != DEVICE_OP_VALUE_TYPE) {//类型只能是这种,比如cpu、gpu
			msg("%s: Improper dev_type %d\n", __func__,
			    cfg->algo_type);
			err = EFAULT;
			goto error_handler;
		}

		if (dev_info.num_of_levels == 0) {//必须要有多档位,比如cpu、gpu
			msg("%s: No levels supported for this device.\n",
			    __func__);
			err = EFAULT;
			goto error_handler;
		}

		algo_clnt[clnt_cnt].lvl_arr = (struct device_lvl_info *)
			malloc(sizeof(struct device_lvl_info) *
			       dev_info.num_of_levels);

		err = devices_manager_get_lvl_list(cfg->data.ss.device,
						   algo_clnt[clnt_cnt].lvl_arr,
						   &dev_info.num_of_levels);//把每个device的lvl_info传入每个setting的lvl_arr中
		if (err) {
			msg("%s: Can not grab dev lvl list %s\n", __func__,
			    cfg->data.ss.device);
			goto error_handler;
		}

我们再来看下这个函数的后半段代码,我们主要看没有出错的处理

error_handler:
		if (err) {//出错的处理
			/* Clean up and look for next SS cfg */
			if (algo_clnt[clnt_cnt].sensor_clnt)
				sensors_manager_dereg_clnt(
				   algo_clnt[clnt_cnt].sensor_clnt);
			if (algo_clnt[clnt_cnt].dev_clnt)
				devices_manager_dereg_clnt(
				   algo_clnt[clnt_cnt].dev_clnt);
			if (algo_clnt[clnt_cnt].lvl_arr)
				free(algo_clnt[clnt_cnt].lvl_arr);
			memset(&algo_clnt[clnt_cnt], 0x0,
			       sizeof(struct ss_algo_t));
		} else {
			algo_clnt[clnt_cnt].disabled =
				cfg->disable;
			algo_clnt[clnt_cnt].state =
				SS_STATE_STOP_ALGO;//初始的状态
			algo_clnt[clnt_cnt].min_op_lvl =
				dev_info.min_dev_op_value;//最新值
			algo_clnt[clnt_cnt].max_op_lvl =
				dev_info.max_dev_op_value;
			algo_clnt[clnt_cnt].dev_op_lvl =
				dev_info.max_dev_op_value;
			algo_clnt[clnt_cnt].setting = &(cfg->data.ss);
			algo_clnt[clnt_cnt].desc = cfg->desc;
			algo_clnt[clnt_cnt].num_of_lvls =
				dev_info.num_of_levels;
			algo_clnt[clnt_cnt].active_set_point =//触发值
				cfg->data.ss.set_point;
			algo_clnt[clnt_cnt].active_set_point_clr =//清除值
				cfg->data.ss.set_point_clr;
			if (cfg->data.ss.device_mtgn_max_limit == 1)
				algo_clnt[clnt_cnt].dev_mtgn_max_limit = 0;
			else
				algo_clnt[clnt_cnt].dev_mtgn_max_limit =
					cfg->data.ss.device_mtgn_max_limit;
			if (cfg->data.ss.device_perf_floor == 1)
				algo_clnt[clnt_cnt].device_perf_floor = 0;
			else
				algo_clnt[clnt_cnt].device_perf_floor =
					cfg->data.ss.device_perf_floor;
			cfg->data.ss.sampling_period_ms =
				MIN(SAMPLE_PERIOD,
				    cfg->data.ss.sampling_period_ms);
			strlcpy(algo_clnt[clnt_cnt].device_units,
				&dev_info.device_units_name[0], MAX_UNIT_NAME_SIZE);

			dbgmsg("%s: SS ID %s, Sensor %s\n", __func__, cfg->desc,
			       cfg->data.ss.sensor);
			dbgmsg("%s: Device %s, Number of Levels %d\n",
			       __func__, cfg->data.ss.device,
			       algo_clnt[clnt_cnt].num_of_lvls);
			dbgmsg("%s: Set Point %dmC, Set Point Clr %dmC, "
			       "Override %dmC\n", __func__,
			       cfg->data.ss.set_point,
			       cfg->data.ss.set_point_clr,
			       cfg->data.ss.override);
			dbgmsg("%s: MAXFREQ %dkHz, MINFREQ %dkHz, TC %d", __func__,
			       dev_info.max_dev_op_value,
			       dev_info.min_dev_op_value,
			       cfg->data.ss.time_constant);
			if (!cfg->disable)
				print_setting(cfg);

			/* Create sampling groups for similarly configured
			   intervals and save sampling group id of each client
			   on its own algo info */
			for (i = 0; i < MAX_INSTANCES_SUPPORTED &&
			     sampling_groups[i].sampling != 0; i++) {
				if (cfg->data.ss.sampling_period_ms ==
				    sampling_groups[i].sampling) {
					algo_clnt[clnt_cnt].sampling_group_id = i;
					break;
				}
			}

			if (i >= MAX_INSTANCES_SUPPORTED ||
				(sampling_groups[i].sampling == 0 &&
				sampling_group_count >= MAX_INSTANCES_SUPPORTED)) {
				msg("%s: Sampling group count max is reached:%d idx:%d\n",
					__func__, sampling_group_count, i);
				err = -1;
				goto error_handler;
			} else if (sampling_groups[i].sampling == 0) {
				sampling_groups[i].sampling =
					cfg->data.ss.sampling_period_ms;//将每个setting下面的采样率放在sampling_groups数组中
				algo_clnt[clnt_cnt].sampling_group_id = i;
				sampling_group_count++;
			}
			clnt_cnt++;
		}
		cfg = cfg->next;
	}

	if (clnt_cnt == 0) {
		info("%s: No SS's configured.\n", __func__);
		return -(EFAULT);
	}

	ret_val = pthread_create(&thread, NULL, (void *)&algo_monitor,//创建线程执行algo_monitor算法监控
				 NULL);
	if (ret_val != 0)
		msg("Error initializing SS algo monitor\n");
	return ret_val;
}

2. algo_monitor

下面我们来看下algo_monitor函数,这个函数主要是开启线程后去执行是ss算法的主函数。主要通过掩码去执行不同函数,并且函数也会等待condition,直到有condition broadcast之后,才会继续执行。

static void *algo_monitor(void *data)
{
	uint32_t        local_ev_mask = 0;
	uint8_t i = 0;

	/* Set mask to all trigger setting all thresholds */
	if (clnt_cnt < MAX_INSTANCES_SUPPORTED)
		thresh_reached_mask = (1U << clnt_cnt) - 1;
	else
		thresh_reached_mask = UINT32_MAX;

	/* Create timer id for each sampling group */
	for(i = 0; i < sampling_group_count; i++) {

		memset(&sampling_groups[i].timer_val, 0,
		       sizeof(struct itimerspec));
		TIMER_SAMPLING_SET(sampling_groups[i],//把sampling的值,分成秒和微秒
				sampling_groups[i].sampling);
		sampling_groups[i].timer_id = timer_register_event(i, timer_expired);//获取定时器id
		if (sampling_groups[i].timer_id == -1)
			msg("%s, Registration of timer failed\n", __func__);
	}

	/* Set inital active thresholds */
	handle_override_sig();//对应override模式的先不分析
	/* Set initial thresholds */
	handle_thresh_sig();

	thermal_server_register_client_req_handler("override", override_notify, NULL);//为client注册回调
	thermal_server_register_client_req_handler(CONFIG_QUERY_CLIENT, config_query_notify, NULL);
	thermal_server_register_client_req_handler(CONFIG_SET_CLIENT, config_parameter_set_notify, NULL);

	while (1) {
		dbgmsg("%s: Wait for EV", __func__);
		pthread_mutex_lock(&wait_mutex);
		if (!ev_mask)
			pthread_cond_wait(&wait_cond, &wait_mutex);//等待condition

		/* Make local copy and clear. */
		local_ev_mask = ev_mask;//根据是否有掩码来是否执行函数。
		ev_mask = 0;
		pthread_mutex_unlock(&wait_mutex);

		if (local_ev_mask & OVERRIDE_EV) {
			dbgmsg("%s: Override EVT", __func__);
			handle_override_sig();
		}

		if (local_ev_mask & THRESH_EV) {
			dbgmsg("%s: Thresh EVT", __func__);
			handle_thresh_sig();
		}

		if (local_ev_mask & TIMER_EV) {
			dbgmsg("%s: Timer EVT", __func__);
			handle_timer_sig();
		}

		if (local_ev_mask & CONFIG_SET_EV) {
			dbgmsg("%s: Config set EVT", __func__);
			handle_config_set_sig();
			sem_post(&sem_config_set);
		}
	}
	return NULL;
}

我们先来看下设置采样率的函数,就是将sampling的毫秒设置成秒和微秒。

#define TIMER_SAMPLING_SET(s_grp, sampling) { \
	(s_grp.timer_val.it_value.tv_sec = (sampling / 1000)); \
	(s_grp.timer_val.it_value.tv_nsec = ((sampling % 1000) * 1000000)); \
	}

我们再来看看如何设置的定时器,先是调用了timer_init创建epoll以及创建线程监控epoll,然后起一个定时器放入epoll监控。

int timer_register_event(int id, timer_expired_callback handler)
{
	struct epoll_event ev;
	int timer_id;
	int ret_val;
	static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
	static int timer_inited = 0;
	struct event_data *data = (struct event_data *)malloc(sizeof(struct event_data));

	if (data == NULL) {
		msg("%s: Failed to alloc event data\n", __func__);
		goto data_malloc_err;
	}

	pthread_mutex_lock(&mutex);

	if (timer_inited == 0) {
		ret_val = timer_init();//epoll的初始化,以及创建线程监控epoll
		if (ret_val) {
			pthread_mutex_unlock(&mutex);
			goto timerfd_create_err;
		}else {
			timer_inited = 1;
		}
	}

	pthread_mutex_unlock(&mutex);

	timer_id = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);//创建定时器
	if (timer_id == -1) {
		msg("%s: timerfd_create failed\n", __func__);
		goto timerfd_create_err;
	}

	data->fd = timer_id;
	data->callback = handler;
	data->id = id;
	ev.events = EPOLLIN | EPOLLPRI;
	ev.data.ptr = (void *)(data);

	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, data->fd, &ev) == -1) {
		msg("%s, epoll_ctl failed\n", __func__);//将定时器的fd放入epoll,数据一个是这个定时器对应在sampling_groups位置。
		goto epoll_add_err;
	}

	return timer_id;

timer_init函数创建了一个epoll,然后创建了一个thread执行timer_thread_start函数

static int timer_init(void)
{
	int ret_val;

	epollfd = epoll_create(MAX_EPOLL_EVENTS);
	if (epollfd == -1) {
		msg("%s: epoll_create failed, errno = %d\n", __func__, errno);
		return -errno;
	}

	ret_val = pthread_create(&callback_thread, NULL, timer_thread_start, NULL);
	if (ret_val) {
		msg("%s: Error cannot create timer callback thread, errno = %d\n", __func__, errno);
		return -1;
	}

	return ret_val;
}

我们来看下监控定时器的函数,有epoll响应了直接调用回调函数。

static void* timer_thread_start(void* arg)
{
	int i;
	struct event_data *data;
	int buf[2], ret = 0;

	while (1) {
		struct epoll_event events[MAX_EPOLL_EVENTS];
		int nevents;
		nevents = epoll_wait(epollfd, events, MAX_EPOLL_EVENTS, -1);
		if (nevents == -1)
			msg("%s: epoll wait failed, errno = %d\n", __func__, errno);

		for (i = 0; i < nevents; ++i) {
			data = events[i].data.ptr;
			ret = read(data->fd, buf, sizeof(buf));
			if (ret < 0)
				msg("epoll callback read error:%d\n", errno);
			if (data->callback)
				(*(void (*)(int))data->callback)(data->id);
		}
	}
	return NULL;
}

我们再来看下回调函数timer_expired,就是设置了ev_mask的掩码,然后broadcast了condition,这样我们的algo_monitor函数就能可以从wait这个 condition中解开。下一步我们就要看是谁设置的定时器。

static void timer_expired(int id)
{
	/* Notify the waiting thread */
	pthread_mutex_lock(&wait_mutex);
	ev_mask |= TIMER_EV;
	sampling_group_mask |= (0x1U << id);
	pthread_cond_broadcast(&wait_cond);
	pthread_mutex_unlock(&wait_mutex);
}

我们还是继续分析algo_monitor函数后面的函数,我们来看下handle_thresh_sig函数。这个时候是第一次调用handle_thresh_sig函数。我们看下面的分析在第一次进这个函数的时候,最后也不会去设置定时器,只要在超过设定的触发值才会起定时器。

static void handle_thresh_sig(void)
{
	uint8_t idx = 0;
	int     temp;
	uint8_t transition = 0;
	struct sensor_thresh_req thresh;
	int ret_val;

	/* Get temp and handle */
	for (idx = 0; idx < clnt_cnt; idx++) {
		uint8_t s_id = 0;
		if ((thresh_reached_mask & (0x1U << idx)) == 0)
			continue;

		if (algo_clnt[idx].disabled) {//过滤disabled
			pthread_mutex_lock(&wait_mutex);
			/* Mark off handled item for disabled ss instance */
			thresh_reached_mask &= (~(0x1U << idx));
			pthread_mutex_unlock(&wait_mutex);
			continue;
		}

		s_id = algo_clnt[idx].sampling_group_id;//获取该setting的采样率的id

		temp = sensors_manager_read_trip_temp(algo_clnt[idx].sensor_clnt);//通过sensor获取温度
		dbgmsg("%s: SS Id %s, Read %s %dmC", __func__,
		       algo_clnt[idx].desc,
		       algo_clnt[idx].setting->sensor, temp);
		transition = 0;
		if (((algo_clnt[idx].state == SS_STATE_STOP_ALGO) ||//开始state为STOP_ALGO
		     (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) &&
		     (temp >= algo_clnt[idx].active_set_point)) {//温度大于设置的触发值
			thermalmsg(LOG_LVL_INFO, LOG_TRACE, "%s: %s: %s=%s %s=%d mC",
				   SENSOR_THRESHOLD_HIT, algo_clnt[idx].desc, SENSORS,
				   algo_clnt[idx].setting->sensor, TEMPERATURE, temp);
			transition = 1;
			algo_clnt[idx].state = SS_STATE_START_SAMPLING;
		} else if (((algo_clnt[idx].state == SS_STATE_START_SAMPLING) ||
			    (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) &&
			    (temp <= algo_clnt[idx].active_set_point_clr)) {
			thermalmsg(LOG_LVL_INFO, LOG_TRACE, "%s: %s: %s=%s %s=%d mC",
				   SENSOR_THRESHOLD_CLR, algo_clnt[idx].desc, SENSORS,
				   algo_clnt[idx].setting->sensor, TEMPERATURE, temp);
			algo_clnt[idx].state = SS_STATE_STOP_ALGO;
			transition = 1;
		} else if ((algo_clnt[idx].state == SS_STATE_STOP_SAMPLING) &&
			   (temp < algo_clnt[idx].active_set_point)) {
			transition = 1;
		}

		if (transition) {//刚进来一般transition为0
			thermalmsg(LOG_LVL_DBG, (LOG_LOGCAT | LOG_TRACE),
			       "%s: SS Id %s Transition State %d", __func__,
			       algo_clnt[idx].desc,
			       algo_clnt[idx].state);

			/* Set initial Dev OP value point */
			if (algo_clnt[idx].state == SS_STATE_START_SAMPLING) {
				algo_clnt[idx].dev_op_lvl =
					algo_clnt[idx].max_op_lvl;
				algo_clnt[idx].curr_lvl = 0;
			}
		}

		memset(&thresh, 0x0, sizeof(thresh));
		thresh.notify_cb_func = thresh_notify;//设置thresh的回调
		thresh.notify_cb_data = (void *)(uintptr_t)idx;//setting的index
		if ((algo_clnt[idx].state == SS_STATE_STOP_ALGO) ||
		    (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) {
			/* Allow CPU to run at max frequency */
			device_clnt_cancel_request(algo_clnt[idx].dev_clnt);//停止了之后需要取消device的request
			algo_clnt[idx].tc_delay = 0;
			thresh.thresh.high_valid = 1;
			thresh.thresh.high = algo_clnt[idx].active_set_point;
			sampling_groups[s_id].ss_clnt_state &= (~(0x1U << idx));//第一次进来应该这个时候state为0
			/* Flush error history */
			set_curr_err(&algo_clnt[idx].err_info, 0, ERR_HISTORY);
		} else {
			thresh.thresh.low_valid = 1;
			thresh.thresh.low = algo_clnt[idx].active_set_point_clr;
			sampling_groups[s_id].ss_clnt_state |= (0x1U << idx);
		}

		pthread_mutex_lock(&wait_mutex);
		/* Mark off handled item */
		thresh_reached_mask &= (~(0x1U << idx));
		pthread_mutex_unlock(&wait_mutex);

		/* Set Thresh */
		sensors_manager_set_thresh_lvl(algo_clnt[idx].sensor_clnt, &thresh);
	}

	/* Adjust the sampling interval period. */
	for(idx = 0; idx < sampling_group_count; idx++) {
		if (sampling_groups[idx].ss_clnt_state) {
			if (sampling_groups[idx].enabled)
				continue;

			TIMER_SAMPLING_SET(sampling_groups[idx],
					   sampling_groups[idx].sampling);
			ret_val = settimer(&sampling_groups[idx]);
			if (ret_val)
				msg("%s: Error cannot settime.\n", __func__);
			else
				sampling_groups[idx].enabled = 1;
		} else {//state为0,但是第一次应该enabled为0,
			if (sampling_groups[idx].enabled == 0)
				continue;

			/* Disarm the timer */
			TIMER_SAMPLING_SET(sampling_groups[idx], 0);
			ret_val = settimer(&sampling_groups[idx]);
			if (ret_val)
				msg("%s: Error cannot settime.\n", __func__);
			else
				sampling_groups[idx].enabled = 0;
		}
	}
}

下面我们再看sensors_manager_set_thresh_lvl函数,把sensor的client的thresh传入了这个函数

int sensors_manager_set_thresh_lvl(sensor_clnt_handle clnt,
		       struct sensor_thresh_req *thresh_info)
{
	struct sensors_mgr_sensor_info *sensor_mgr = NULL;
	struct sensor_client_type   *client = clnt;
	int ret_val = 0;

	if (client == NULL) {
		msg("%s: Invalid args.\n", __func__);
		return -(EINVAL);
	}

	ret_val = validate_clnt(client);//验证client
	if (ret_val != 0)
		return ret_val;

	sensor_mgr = client->sensor_mgr;

	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	if ( thresh_info == NULL) {
		/* Clear client request */
		client->request_active = 0;
		dbgmsg("%s: %s clear request.\n", __func__, sensor_mgr->name);
	} else if (validate_thresh_info(thresh_info) == 0) {
		memcpy(&client->request, thresh_info,
		       sizeof(struct sensor_thresh_req));
		client->request_active = 1;//这个值代表给client对sensor有请求
		dbgmsg("%s: %s Hi(%d) %d, Lo(%d) %d, desc:%d Interval(%d) %d\n",
		       __func__, sensor_mgr->name,
		       thresh_info->thresh.high_valid, thresh_info->thresh.high,
		       thresh_info->thresh.low_valid, thresh_info->thresh.low,
		       thresh_info->thresh.descending_threshold,
		       thresh_info->polling_interval_valid,
		       thresh_info->polling_interval);
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);

	/* Update thresholds. */
	update_active_thresh(sensor_mgr);
	return 0;
}

我们再来看看update_active_thresh函数,主要是从client中获取thresh来更新sensor的数据。

static int update_active_thresh(struct sensors_mgr_sensor_info *sensor_mgr)
{
	struct sensor_client_type  *client = NULL;
	struct sensor_thresh_req *active = NULL;
	uint8_t                active_req = 0;


	if (sensor_mgr == NULL)
		return -(EINVAL);

	active = &sensor_mgr->active_thresh;

	memset(active, 0, sizeof(struct sensor_thresh_req));
	active->thresh.low = INT32_MIN;
	active->thresh.high  = INT32_MAX;
	active->polling_interval = UINT32_MAX;

	client = sensor_mgr->client_list;
	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	while (client != NULL) {//遍历client,如果没有request直接continue
		if (!client->request_active) {
			client = client->next_clnt;
			continue;
		}

		struct sensor_thresh_req *thresh = &client->request;
		if (thresh->thresh.descending_threshold)
			active->thresh.descending_threshold = 1;
		/* Find active high */
		if (thresh->thresh.high_valid) {//取所有client最小的thresh.high
			active->thresh.high_valid = 1;
			active->thresh.high = MIN(active->thresh.high,
							thresh->thresh.high);
		}

		/* Find active low */
		if (thresh->thresh.low_valid) {
			active->thresh.low_valid = 1;
			active->thresh.low = MAX(active->thresh.low,
						       thresh->thresh.low);
		}

		/* Find min polling interval */
		if (thresh->polling_interval_valid) {
			active->polling_interval_valid = 1;//采样率也去最小
			active->polling_interval = MIN(active->polling_interval,
							     thresh->polling_interval);
		}

		active_req = 1;
		client = client->next_clnt;
	}

	if ((active->thresh.high > active->thresh.low) &&
	    (active->thresh.high_valid || active->thresh.low_valid)) {
		/* We can take advantage of interrupt */
		sensor_mgr->active_thresh_valid = 1;
	} else {
		sensor_mgr->active_thresh_valid = 0;
	}

	/* Room for optimization if thresholds didn't change. */
	if (sensor_mgr->active_thresh_valid &&
	    sensor_mgr->update_thresholds) {//当thresh要更新时调用sensor的update_thresholds函数
		sensor_mgr->update_thresholds(sensor_mgr);
	}

	if (!sensor_mgr->req_active && active_req) {
		/* Advertise there is now an active request available */
		pthread_mutex_lock(&(sensor_mgr->req_wait_mutex));
		sensor_mgr->req_active = 1;//client有请求
		pthread_cond_broadcast(&(sensor_mgr->req_wait_cond));
		pthread_mutex_unlock(&(sensor_mgr->req_wait_mutex));
	} else {
		sensor_mgr->req_active = active_req;
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);
	return 0;
}

这样第一次调用handle_thresh_sig分析完了,但是温度没有达到设置的触发值时,我们要如何下次再去调用这个函数。

3. sensor的监控触发

这个时候我们就联想到sensor也有一个监听函数,之前在sensor的那篇博客我们也分析过sensor_monitor函数。每一个sensor都会起一个thread执行sensor_monitor函数来监控。

static void *sensor_monitor(void *vsensor_mgr)
{
	struct sensors_mgr_sensor_info *sensor_mgr = vsensor_mgr;

	while (sensor_mgr->thread_shutdown != 1) {
		/* Wait here until there is actually a request to process */
		if (!sensor_mgr->req_active) {//sensor的req_active必须为1
			dbgmsg("%s: %s Wait for client request.\n", __func__, sensor_mgr->name);
			pthread_mutex_lock(&(sensor_mgr->req_wait_mutex));
			while (!sensor_mgr->req_active) {
				pthread_cond_wait(&(sensor_mgr->req_wait_cond),
						&(sensor_mgr->req_wait_mutex));
			}
			pthread_mutex_unlock(&(sensor_mgr->req_wait_mutex));
		}
		dbgmsg("%s: %s Sensor wait.\n", __func__, sensor_mgr->name);
		sensor_wait(sensor_mgr);//这个看sensor博客的分析

		if (sensor_mgr->get_trip_temperature)
			sensor_mgr->last_reading =
				sensor_mgr->get_trip_temperature(sensor_mgr);
		else
			sensor_mgr->last_reading =//获取温度保存在last_reading
				sensor_mgr->get_temperature(sensor_mgr);

		notify_clnts(sensor_mgr);
	}

	return NULL;
}

我们再来看notify_clnts函数,这个时候我们还是遍历client,看每个client是否有申请,只有上一次sensor读取的温度大于最高值,或者小于最小值,才会调用client的回调函数。这样其实好处很明显不用核心的算法线程,一直跑,只要超过设定的值才会去开启算法的流程。

static int notify_clnts(struct sensors_mgr_sensor_info *sensor_mgr)
{
	struct sensor_client_type *client = NULL;
	enum sensor_notify_event_type thresh_event;

	if (sensor_mgr == NULL)
		return -(EINVAL);

	client = sensor_mgr->client_list;

	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	while (client != NULL) {
		if (client->request_active) {
			struct thresholds_req_t *thresh = &client->request.thresh;

			/* Notify clients of thresh crossings */
			thresh_event = SENSOR_NOTIFY_NORMAL_THRESH_EVENT;
			if (thresh->high_valid &&
			    (sensor_mgr->last_reading >= thresh->high)) {
				thresh_event = SENSOR_NOTIFY_HIGH_THRESH_EVENT;//温度大于client的最大值
			} else if (thresh->low_valid &&
				   (sensor_mgr->last_reading <= thresh->low)) {
				thresh_event = SENSOR_NOTIFY_LOW_THRESH_EVENT;//温度小于client的最小值
			}

			if (thresh_event != SENSOR_NOTIFY_NORMAL_THRESH_EVENT) {//只有当出现上面两种情况才会调用client的回调
				client->request_active = 0;
				client->request.notify_cb_func(client,
							       thresh_event,
							       sensor_mgr->last_reading,
							       client->request.notify_cb_data);
			}
		}
		client = client->next_clnt;
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);
	update_active_thresh(sensor_mgr);//还需要更新sensor的thresh值。
	return 0;
}

所以当我们sensor的监控测到的温度超过算法设定的值,才会去开启算法的流程;或者低于point_clr才会去到算法流程中取消算法监控。

4. 触发ss算法

这个时候我们再去看ss算法的thresh_notify,这个时候已经是超过算法设定的温度值了。

static void thresh_notify(sensor_clnt_handle clnt,
			  enum sensor_notify_event_type event,
			  int reading, void *data)
{
	if (NULL == clnt) {
		msg("%s: unexpected NULL", __func__);
		return;
	}

	dbgmsg("%s: SS Id %s, Update recieved %s %d", __func__,
	       algo_clnt[(uintptr_t)data].desc,
	       algo_clnt[(uintptr_t)data].setting->sensor,
	       reading);

	/* Notify the waiting thread */
	pthread_mutex_lock(&wait_mutex);
	thresh_reached_mask |= (0x1U << (uintptr_t)data);
	ev_mask |= (THRESH_EV | TIMER_EV);
	sampling_group_mask |= (0x1U << algo_clnt[(uintptr_t)data].sampling_group_id);
	pthread_cond_broadcast(&wait_cond);
	pthread_mutex_unlock(&wait_mutex);
}

这个时候condition broadcast了,算法的thread主函数就能继续往下了,algo_monitor的代码如下,这个时候我们就去调用handle_thresh_sig和handle_timer_sig。

	while (1) {
		dbgmsg("%s: Wait for EV", __func__);
		pthread_mutex_lock(&wait_mutex);
		if (!ev_mask)
			pthread_cond_wait(&wait_cond, &wait_mutex);//跳出等待

		/* Make local copy and clear. */
		local_ev_mask = ev_mask;
		ev_mask = 0;
		pthread_mutex_unlock(&wait_mutex);

		if (local_ev_mask & OVERRIDE_EV) {
			dbgmsg("%s: Override EVT", __func__);
			handle_override_sig();
		}

		if (local_ev_mask & THRESH_EV) {
			dbgmsg("%s: Thresh EVT", __func__);
			handle_thresh_sig();
		}

		if (local_ev_mask & TIMER_EV) {
			dbgmsg("%s: Timer EVT", __func__);
			handle_timer_sig();
		}

		if (local_ev_mask & CONFIG_SET_EV) {
			dbgmsg("%s: Config set EVT", __func__);
			handle_config_set_sig();
			sem_post(&sem_config_set);
		}
	}

这个时候再进handle_thresh_sig函数,这个时候温度已经超过设定值了,最后就会设置定时器。

static void handle_thresh_sig(void)
{
	uint8_t idx = 0;
	int     temp;
	uint8_t transition = 0;
	struct sensor_thresh_req thresh;
	int ret_val;

	/* Get temp and handle */
	for (idx = 0; idx < clnt_cnt; idx++) {
		uint8_t s_id = 0;
		......
		s_id = algo_clnt[idx].sampling_group_id;

		temp = sensors_manager_read_trip_temp(algo_clnt[idx].sensor_clnt);//读取温度值

		transition = 0;
		if (((algo_clnt[idx].state == SS_STATE_STOP_ALGO) ||
		     (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) &&
		     (temp >= algo_clnt[idx].active_set_point)) {//这个时候已经超过设置的值
			transition = 1;
			algo_clnt[idx].state = SS_STATE_START_SAMPLING;
		} else if (((algo_clnt[idx].state == SS_STATE_START_SAMPLING) ||
			    (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) &&
			    (temp <= algo_clnt[idx].active_set_point_clr)) {
			algo_clnt[idx].state = SS_STATE_STOP_ALGO;
			transition = 1;
		} else if ((algo_clnt[idx].state == SS_STATE_STOP_SAMPLING) &&
			   (temp < algo_clnt[idx].active_set_point)) {
			transition = 1;
		}

		if (transition) {
			/* Set initial Dev OP value point */
			if (algo_clnt[idx].state == SS_STATE_START_SAMPLING) {
				algo_clnt[idx].dev_op_lvl =//设置为最大值
					algo_clnt[idx].max_op_lvl;
				algo_clnt[idx].curr_lvl = 0;//频率为最大档(是降序的)
			}
		}

		memset(&thresh, 0x0, sizeof(thresh));
		thresh.notify_cb_func = thresh_notify;
		thresh.notify_cb_data = (void *)(uintptr_t)idx;
		if ((algo_clnt[idx].state == SS_STATE_STOP_ALGO) ||
		    (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING)) {
			/* Allow CPU to run at max frequency */
			device_clnt_cancel_request(algo_clnt[idx].dev_clnt);
			algo_clnt[idx].tc_delay = 0;
			thresh.thresh.high_valid = 1;
			thresh.thresh.high = algo_clnt[idx].active_set_point;
			sampling_groups[s_id].ss_clnt_state &= (~(0x1U << idx));
			/* Flush error history */
			set_curr_err(&algo_clnt[idx].err_info, 0, ERR_HISTORY);
		} else {
			thresh.thresh.low_valid = 1;
			thresh.thresh.low = algo_clnt[idx].active_set_point_clr;//设置为thresh.low了
			sampling_groups[s_id].ss_clnt_state |= (0x1U << idx);
		}

		pthread_mutex_lock(&wait_mutex);
		/* Mark off handled item */
		thresh_reached_mask &= (~(0x1U << idx));
		pthread_mutex_unlock(&wait_mutex);

		/* Set Thresh */
		sensors_manager_set_thresh_lvl(algo_clnt[idx].sensor_clnt, &thresh);//重新设置sensor的thresh
	}

	/* Adjust the sampling interval period. */
	for(idx = 0; idx < sampling_group_count; idx++) {
		if (sampling_groups[idx].ss_clnt_state) {
			if (sampling_groups[idx].enabled)
				continue;

			TIMER_SAMPLING_SET(sampling_groups[idx],
					   sampling_groups[idx].sampling);
			ret_val = settimer(&sampling_groups[idx]);//设置定时器了
			if (ret_val)
				msg("%s: Error cannot settime.\n", __func__);
			else
				sampling_groups[idx].enabled = 1;
		} else {
			if (sampling_groups[idx].enabled == 0)
				continue;

			/* Disarm the timer */
			TIMER_SAMPLING_SET(sampling_groups[idx], 0);
			ret_val = settimer(&sampling_groups[idx]);
			if (ret_val)
				msg("%s: Error cannot settime.\n", __func__);
			else
				sampling_groups[idx].enabled = 0;
		}
	}
}

定时器的回调函数我们再来看下,会设置TIMER_EV的掩码,然后broadcast condition。因此最后定时器到了之后会执行handle_timer_sig函数。

static void timer_expired(int id)
{
	/* Notify the waiting thread */
	pthread_mutex_lock(&wait_mutex);
	ev_mask |= TIMER_EV;
	sampling_group_mask |= (0x1U << id);
	pthread_cond_broadcast(&wait_cond);
	pthread_mutex_unlock(&wait_mutex);
}

但是之前从sensor的监控调用来的函数thresh_notify,会先调一次handle_timer_sig函数。大致的意思是温度超过设定值就降频,并且重新设置定时器监控;温度低于设定值就升频,升到最高频还没有到设定值,就取消这个setting的算法监控。

static void handle_timer_sig(void)
{
	......

	ret_val = clock_gettime(CLOCK_MONOTONIC, &curr_time);//获取当前时间
	if (ret_val) {
		msg("%s: Error getting time.\n", __func__);
		return;
	}

	/* Get temp and handle */
	for (sample_id = 0; sample_id < sampling_group_count; sample_id++) {
		if ((local_sampling_group_mask & (0x1U << sample_id)) == 0)
			continue;

		sample_cnt = (uint32_t)(((curr_time.tv_sec -//当前时间减去上一次设置定时器的时间/采样率
			       sampling_groups[sample_id].timer_start.tv_sec) *
			       1000) + ((curr_time.tv_nsec -
			       sampling_groups[sample_id].timer_start.tv_nsec) /
			       1000000)) / sampling_groups[sample_id].sampling;

		if (sample_cnt > ERR_HISTORY)//最大为5倍
			sample_cnt = ERR_HISTORY;

		for (idx = 0; idx < clnt_cnt; idx++) {
			if (algo_clnt[idx].disabled)
				continue;

			if ((sampling_groups[sample_id].ss_clnt_state &
			    (0x1U << idx)) == 0)
				continue;

			if ((algo_clnt[idx].state == SS_STATE_STOP_ALGO) ||
			    (algo_clnt[idx].state == SS_STATE_STOP_SAMPLING))
				continue;


			temp = sensors_manager_read(algo_clnt[idx].sensor_clnt);

			/* Converge to set_point temperature when mitigating */
			error = algo_clnt[idx].active_set_point - temp;//这个值是负值代表当前温度超过设置的值

			set_curr_err(&algo_clnt[idx].err_info, error,
				     sample_cnt);


			if (sample_cnt == 0)//当前时间距离上一次设置定时器的时间在一倍以内
				E0 = error;
			else
				E0 = get_err(&algo_clnt[idx].err_info, 0);
			E1 = get_err(&algo_clnt[idx].err_info, -1);

			if ((algo_clnt[idx].setting->time_constant > 1) &&
			    (E0 == E1)) {
				algo_clnt[idx].tc_delay++;

				if (algo_clnt[idx].tc_delay <
				    algo_clnt[idx].setting->time_constant)
					continue;
			}

			if (E0 < 0 || (E0 == 0 && algo_clnt[idx].curr_lvl <= 0)) {
				/* Negative error means Hot */
				increase_mitigation_lvl(&algo_clnt[idx]);//为负值代表超过设置的值,要降频处理,这样才能降温
			} else {
				decrease_mitigation_lvl(&algo_clnt[idx]);// 否则就升频
				/* Stop sampling if the freq is at max */
				if (algo_clnt[idx].curr_lvl <= 0) {//如果最后升到最高还是没有超过设置的值就停止监听了
					pthread_mutex_lock(&wait_mutex);
					thresh_reached_mask |= (0x1U << idx);
					pthread_mutex_unlock(&wait_mutex);
					algo_clnt[idx].state = SS_STATE_STOP_SAMPLING;
					handle_thresh_sig();//这个时候会取消这个setting的算法监控
				}
			}

			algo_clnt[idx].tc_delay = 0;
		}
		pthread_mutex_lock(&wait_mutex);
		/* Mark off handled item */
		sampling_group_mask &= (~(0x1U << sample_id));
		pthread_mutex_unlock(&wait_mutex);
	}

	/* Restart handled timers */
	for (sample_id = 0; sample_id < sampling_group_count; sample_id++) {
		if (((local_sampling_group_mask & (0x1U << sample_id)) == 0) ||
		    (sampling_groups[sample_id].ss_clnt_state == 0))//如果某个setting的状态为0说明不在监控了
			continue;

		ret_val = settimer(&sampling_groups[sample_id]);//如果还要监控,重新设定定时器最后还会到这个函数
		if (ret_val)
			msg("%s: Error cannot settime.\n", __func__);
	}
}

我们再来看increase_mitigation_lvl降频的处理,因为我们保存的频率的档位是降序的所以这个时候只有将index往后加,这个档位的频率会减少,再去向device申请处理,有一点我们要注意当设置这个最大限频频率时,意思是频率最小限到这个设置的值,再小就return了。

static void increase_mitigation_lvl(struct ss_algo_t *instance)
{
	int applied_lvl;
	union device_request req;

	if (instance->curr_lvl >= (int)(instance->num_of_lvls - 1)) {
		/* Nothing to be done at max level. */
		return;
	}

	if (instance->dev_mtgn_max_limit &&//当设置这个最大限频频率时,意思是频率最小限到这个设置的值,再小就return了。
	    instance->lvl_arr[instance->curr_lvl + 1].lvl.value <
			instance->dev_mtgn_max_limit) {
		dbgmsg("Already at device mitigation max limit\n");
		return;
	}

	if (instance->device_perf_floor &&
	    instance->lvl_arr[instance->curr_lvl + 1].perf_lvl <
			instance->device_perf_floor) {
		dbgmsg("Already at device mitigation min perf limit\n");
		return;
	}

	instance->curr_lvl++;//因为频率是降序,所以数值越大频率越低

	instance->dev_op_lvl = instance->lvl_arr[instance->curr_lvl].lvl.value;

	req.value = instance->dev_op_lvl;

	applied_lvl = device_clnt_request(instance->dev_clnt, &req);//向device申请

}

升频处理decrease_mitigation_lvl,这个和降频的原理一样。

static void decrease_mitigation_lvl(struct ss_algo_t *instance)
{
	int applied_lvl;
	union device_request req;

	if (instance->curr_lvl <= 0) {
		/* Nothing to be done at min lvl */
		return;
	}

	instance->curr_lvl--;//向前的档位

	instance->dev_op_lvl = instance->lvl_arr[instance->curr_lvl].lvl.value;

	req.value = instance->dev_op_lvl;
	thermalmsg(LOG_LVL_INFO, LOG_TRACE, "%s: %s: %s=%s %s=%d%s",
		CPU_FREQ_PRE_REQ, instance->desc, DEVICE,
		instance->setting->device, FREQUENCY, req.value,
		instance->device_units);
	applied_lvl = device_clnt_request(instance->dev_clnt, &req);//向device申请

}

5. 总结

这样我们就把ss算法分析完了,大致就是sensor的监控,监控到温度值比某个ss算法的setting的设置的温度值高。这个时候就会开启算法流程,然后开始算法的监控,并且不断的设置定时器对温度监控,小于设置的温度时调高频率,大于设置的温度时调低频率,而当调整到最大频率并且温度低于设置的温度时就停止算法监控。或者当sensor监控的函数,监控到温度低于point_clr时,直接结束ss算法这个setting的监控。

	{
		.desc = "SS-CPU1-3",
		.algo_type = SS_ALGO_TYPE,
		.data.ss =
		{
			.sensor = "tsens_tz_sensor4",
			.device = "cpu",
			.sampling_period_ms = 65,
			.set_point = 85000,
			.set_point_clr = 55000,
		},
	},

 

  • 3
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值