MTK温控简介

        熟话说,不懂温控的性能工程师不是好司机。性能与温控一直都是相爱相杀,即有要求第三方整改时的同仇敌忾,又有因一方修改参数导致另一方指标劣化时的拍案而起。知己知彼方能百战不败,了解温控框架,是性能工程师的必备技能。

        MTK温控框架是基于Linux的Thermal机制。Linux的Thermal机制是基于Zone为单位的热管理机制,核心包括三个部分:获取区域温度的设备thermal_zone_device,对区域降温的设备thermal_cooling_device,温控策略thermal_governor。thermal_governor从thermal_zone_device获取区域温度,然后根据当前温度,决定调用哪个降温设备来为该区域降温。总体上说,哪个区域发热,就调用影响该区域温度的降温设备来为该区域降温,也就是头痛医头,脚痛医脚。

         MTK温控框架如下图所示。service thermal声明为main类型,init进程在启动main类型的service时会启动thermal进程thermal进程解析thermal.conf配置文件,根据文件配置的参数设置Linux Thermal Core,设置参数的节点位于/proc/driver/thermal/目录下。其中温控SOC目标功耗会发送给PPM模块,PPM将目标功耗转换为拔核或限频。

 一、Linux Thermal Core

        内核将采集区域温度的设备抽象为结构体struct thermal_zone_device,主要成员包括:char type[]设备名称;int temperature当前温度;int last_temperature上次采集温度;struct thermal_governor *governor对应governor;int polling_delay温度采集时间间隔等等。其中struct thermal_zone_device_ops *ops是采集区域温度设备的操作抽象,包括绑定降温设备、获取设备温度等。

struct thermal_zone_device {
	int id;                             //每个thermal_zone_device有独立的id
	char type[THERMAL_NAME_LENGTH];     //名称
    .................................
	int polling_delay;                  //采集时间间隔
	int temperature;                    //当前温度
	int last_temperature;                //上次采集温度
    .................................
	struct thermal_zone_device_ops *ops;   //对操作集合
	struct thermal_governor *governor;     //对应温控策略
	struct list_head thermal_instances;    //链接对应cooling设备
	................................
};
struct thermal_zone_device_ops {
    //绑定降温设备
	int (*bind) (struct thermal_zone_device *,
		     struct thermal_cooling_device *);
	//解绑降温设备
	int (*unbind) (struct thermal_zone_device *,
		       struct thermal_cooling_device *);
	//获取当前温度
	int (*get_temp) (struct thermal_zone_device *, int *);
	//获取触发等级对应的温度
	int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
    ..................................................................
};

        内核将温控策略抽象为结构体struct thermal_governor,主要成员包括:char name[THERMAL_NAME_LENGTH]策略名称;int (*throttle)()温控决策等等。

struct thermal_governor {
	char name[THERMAL_NAME_LENGTH];
	int (*bind_to_tz)(struct thermal_zone_device *tz);
	void (*unbind_from_tz)(struct thermal_zone_device *tz);
	int (*throttle)(struct thermal_zone_device *tz, int trip);
	struct list_head	governor_list;
};

        执行温控策略的设备成为区域降温设备,内核抽象为结构体struct thermal_cooling_device,struct thermal_cooling_device_ops是区域降温设备的操作集合。

struct thermal_cooling_device {
	int id;                           //每个thermal_cooling_device有独立的id
	char type[THERMAL_NAME_LENGTH];   // 名称
	struct device device;
	struct device_node *np;
	void *devdata;
	const struct thermal_cooling_device_ops *ops;
	bool updated; /* true if the cooling device does not need update */
	struct mutex lock; /* protect thermal_instances list */
	struct list_head thermal_instances;
	struct list_head node;
};
struct thermal_cooling_device_ops {
	int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);   //获取总的状态数,相当于降温等级
	int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);    //获取当前状态
	int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);      //设置状态
	int (*get_requested_power)(struct thermal_cooling_device *,
				   struct thermal_zone_device *, u32 *);
	int (*state2power)(struct thermal_cooling_device *,
			   struct thermal_zone_device *, unsigned long, u32 *);
	int (*power2state)(struct thermal_cooling_device *,
			   struct thermal_zone_device *, u32, unsigned long *);
};

1.1初始化

        thermal_governor注册:调用thermal_register_governor()注册thermal_governor,所有注册的thermal_governor链接到thermal_governor_list,第一个注册的thermal_governor为默认governor,即def_governor。

int thermal_register_governor(struct thermal_governor *governor)
{
	int err;
	const char *name;
	struct thermal_zone_device *pos;

	if (!governor)
		return -EINVAL;

	mutex_lock(&thermal_governor_lock);

	err = -EBUSY;
	if (__find_governor(governor->name) == NULL) {
		err = 0;
		//链接到thermal_governor_list
		list_add(&governor->governor_list, &thermal_governor_list);
		if (!def_governor && !strncmp(governor->name,
			DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))
			def_governor = governor; //第一个设置为def_governor
	}
    .................................................................
}

         thermal_zone_device注册:调用thermal_zone_device_register()注册thermal_zone_device,所有注册的区域温度采集设备,链接到thermal_tz_list。

struct thermal_zone_device *thermal_zone_device_register(const char *type,
	int trips, int mask, void *devdata,
	struct thermal_zone_device_ops *ops,
	struct thermal_zone_params *tzp,
	int passive_delay, int polling_delay)
{
	struct thermal_zone_device *tz;
	enum thermal_trip_type trip_type;
	int trip_temp;
	int result;
	int count;
	int passive = 0;
	struct thermal_governor *governor;
	.........................................................................
    //分配内存
	tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
    .........................................................................
	//初始化idr,并获取id
	idr_init(&tz->idr);
	mutex_init(&tz->lock);
	result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id);
	..........................................................................
	strlcpy(tz->type, type ? : "", sizeof(tz->type));  //设置名称
	tz->ops = ops;    //操作集合
	tz->tzp = tzp;    //参数
	tz->device.class = &thermal_class;
	tz->devdata = devdata;
	tz->trips = trips;
	tz->passive_delay = passive_delay;
	tz->polling_delay = polling_delay;   //采集时间间隔
	/* A new thermal zone needs to be updated anyway. */
	atomic_set(&tz->need_update, 1);
    ........................................................................
	//设置降温策略
	if (tz->tzp)
		governor = __find_governor(tz->tzp->governor_name);
	else
		governor = def_governor;
    .......................................................................
	//链接到thermal_tz_list
	mutex_lock(&thermal_list_lock);
	list_add_tail(&tz->node, &thermal_tz_list);
	mutex_unlock(&thermal_list_lock);

	/* 尝试绑定已注册的降温设备 */
	bind_tz(tz);

	thermal_zone_device_reset(tz);
	/* Update the new thermal zone and mark it as already updated. */
	if (atomic_cmpxchg(&tz->need_update, 1, 0))
		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

	return tz;
    .....................................................................
}

        thermal_cooling_device注册:调用thermal_cooling_device_register()注册thermal_cooling_device,所有注册的区域降温设备,链接到thermal_cdev_list。 

static struct thermal_cooling_device *
__thermal_cooling_device_register(struct device_node *np,
				  char *type, void *devdata,
				  const struct thermal_cooling_device_ops *ops)
{
	struct thermal_cooling_device *cdev;
	struct thermal_zone_device *pos = NULL;
	int result;
    ......................................................................
    //分配内存
	cdev = kzalloc(sizeof(struct thermal_cooling_device), GFP_KERNEL);
    .....................................................................
	//初始化成员
	strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
	mutex_init(&cdev->lock);
	INIT_LIST_HEAD(&cdev->thermal_instances);
	cdev->np = np;
	cdev->ops = ops;
	cdev->updated = false;
	cdev->device.class = &thermal_class;
	cdev->device.groups = cooling_device_attr_groups;
	cdev->devdata = devdata;
	.....................................................................
	//链接到thermal_cdev_list
	mutex_lock(&thermal_list_lock);
	list_add(&cdev->node, &thermal_cdev_list);
	mutex_unlock(&thermal_list_lock);

	/* 尝试绑定已注册的温度采集设备 */
	bind_cdev(cdev);

	mutex_lock(&thermal_list_lock);
	list_for_each_entry(pos, &thermal_tz_list, node)
		if (atomic_cmpxchg(&pos->need_update, 1, 0))
			thermal_zone_device_update(pos,
						   THERMAL_EVENT_UNSPECIFIED);
	mutex_unlock(&thermal_list_lock);

	return cdev;
}

1.2温度采集设备与降温设备的联系

        同一个温度采集设备可以对应多个降温设备,以mtkcpu为例,有mtktscpu-sysrst、cpu_adaptive_0、cpu_adaptive_1等多个降温设备,这些设备有各自的触发温度。结构体strcut thermal_instance用于连接温度采集设备与降温设备,成员struct thermal_zone_device *tz是对应的温度采集设备,struct thermal_cooling_device *cdev是对应的降温设备,int trip触发等级(对应一个温度),当温度采集设备采集的温度达到一定值时,调用对应trip等级的降温设备

struct thermal_instance {
    .................................................................
	struct thermal_zone_device *tz;      //对应温度采集设备
	struct thermal_cooling_device *cdev; //对应降温设备
	int trip;                            //触发等级
	struct list_head tz_node;            //链接到温度采集设备
	struct list_head cdev_node;          //链接到降温设备
    .................................................................
};

         温度采集设备绑定降温设备为例,当温度采集设备注册时会尝试绑定所有已经注册的降温设备。以CPU为例,bind接口对应的是tscpu_bind(),从代码中可以看出如果降温设备的名称为g_bind0--g_bind9中的一个将会绑定CPU温度采集设备和降温设备。tscpu_bind()接口中也定义了各种名称降温设备对应的触发等级。

static void bind_tz(struct thermal_zone_device *tz)
{
	int i, ret;
	struct thermal_cooling_device *pos = NULL;
	const struct thermal_zone_params *tzp = tz->tzp;

	if (!tzp && !tz->ops->bind)
		return;

	mutex_lock(&thermal_list_lock);
	if (tz->ops->bind) {
	    //尝试绑定所有的已经注册的降温设备
		list_for_each_entry(pos, &thermal_cdev_list, node) {
			ret = tz->ops->bind(tz, pos);
			if (ret)
				print_bind_err_msg(tz, pos, ret);
		}
		goto exit;
	}
    ...........................................................
}
static int tscpu_bind(struct thermal_zone_device *thermal, struct thermal_cooling_device *cdev)
{
	int table_val = 0;

	if (!strcmp(cdev->type, g_bind0)) {
		table_val = 0;
		tscpu_config_all_tc_hw_protect(trip_temp[0], tc_mid_trip);
	} else if (!strcmp(cdev->type, g_bind1)) {
		table_val = 1;
		tc_mid_trip = trip_temp[1];
		tscpu_config_all_tc_hw_protect(trip_temp[0], tc_mid_trip);
	} else if (!strcmp(cdev->type, g_bind2)) {
		table_val = 2;
	} else if (!strcmp(cdev->type, g_bind3)) {
		table_val = 3;
	} else if (!strcmp(cdev->type, g_bind4)) {
    .....................................................
	} else {
		return 0;
	}
    //以table_val为触发等级绑定发热设备和降温设备
	if (mtk_thermal_zone_bind_cooling_device(thermal, table_val, cdev)) {
		tscpu_warn("tscpu_bind error binding cooling dev\n");
		return -EINVAL;
	}

	tscpu_printk("tscpu_bind binding OK, %d\n", table_val);
	return 0;
}

        温度采集设备知道了触发等级和降温温度,还需要知道触发等级对应的温度。thermal_zone_device_ops的get_trip_temp()用于查询触发等级对应的温度,以mtkcpu为例,所有降温设备的触发温度保存在数据中,触发等级就是该数组的下标。

static int tscpu_get_trip_temp
(struct thermal_zone_device *thermal, int trip, int *temp)
{
	*temp = trip_temp[trip];
	return 0;
}

        温度采集设备与降温温度的结构体关系如下。

1.3温控循环过程

        Thermal Core使用delayed work循环处理,使整个thermal控制流程运作起来,整体流程如下图所示。注册thermal_zone_device时会调用bind_tz()尝试绑定它的降温设备,当对应的降温设备已经注册时会绑定成功。同理,当注册thermal_cooling_device时也会触发thermal_zone_device来绑定自己。当上述两种情况绑定成功,都会调用到thermal_zone_device_update()进入循环。

1.4thermal_governor决策过程

        绝大多数thermal_zone_device的温控策略都是gov_bang_bang,该策略支持的降温设备只有两种状态----开启和关闭。它的决策过程非常简单:当到达触发温度后就启动对应的降温设备,当低于触发温度后就关闭对应的降温色设备。

static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
	struct thermal_instance *instance;

	thermal_zone_trip_update(tz, trip);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node)
		thermal_cdev_update(instance->cdev);

	mutex_unlock(&tz->lock);

	return 0;
}
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
	int trip_temp, trip_hyst;
	struct thermal_instance *instance;
    ........................................................................
	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		if (instance->trip != trip)
			continue;
        ...................................................................
		//大于触发温度开启,小于触发温度关闭
		if (instance->target == 0 && tz->temperature >= trip_temp)
			instance->target = 1;
		else if (instance->target == 1 &&
				tz->temperature <= trip_temp - trip_hyst)  
			instance->target = 0;

		dev_dbg(&instance->cdev->device, "target=%d\n",
					(int)instance->target);

		mutex_lock(&instance->cdev->lock);
		instance->cdev->updated = false; /* cdev needs update */
		mutex_unlock(&instance->cdev->lock);
	}

	mutex_unlock(&tz->lock);
}

二、MTK温控参数修改

        前面提到thermal进程是通过写目录/proc/driver/thermal/下的节点来配置Thermal Core的,这里我们以电池温控为例跟踪参数设置过程。首先,在模块初始化时会创建节点/proc/driver/thermal/tzbattery。

static int __init mtktsbattery_init(void)
{
	int err = 0;
	struct proc_dir_entry *entry = NULL;
	struct proc_dir_entry *mtktsbattery_dir = NULL;

	mtktsbattery_dprintk("[mtktsbattery_init]\n");

	err = mtktsbattery_register_cooler();
	if (err)
		return err;

	err = mtktsbattery_register_thermal();
	if (err)
		goto err_unreg;

	mtktsbattery_dir = mtk_thermal_get_proc_drv_therm_dir_entry();
	if (!mtktsbattery_dir) {
		mtktsbattery_dprintk("%s mkdir /proc/driver/thermal failed\n",
								__func__);
	} else {
		entry = proc_create("tzbattery", 0664, mtktsbattery_dir,
							&mtkts_battery_fops);
		if (entry)
			proc_set_user(entry, uid, gid);
	}
    .................................................................
}

        thermal进程写节点/proc/driver/thermal/tzbattery的时候,除了保存写入的参数外,还会注销thermal_zone_device,然后再注册thermal_zone_device,再次注册时就用到了新参数。

static ssize_t mtktsbattery_write(struct file *file, const char __user *buffer, size_t count, loff_t *data)
{
	int len = 0, i;
	struct mtktsbattery_data {
		int trip[10];
		int t_type[10];
	char bind0[20], bind1[20], bind2[20], bind3[20], bind4[20];
	char bind5[20], bind6[20], bind7[20], bind8[20], bind9[20];
		int time_msec;
	char desc[512];
	};

        .................................................................
        //注销设备
		mtktsbattery_unregister_thermal();
        //更新参数
		.................................................................
		//再次注册设备
		mtktsbattery_register_thermal();
		up(&sem_mutex);

		kfree(ptr_mtktsbattery_data);
		/* battery_write_flag=1; */
		return count;
	}
    ....................................................................
}

三、MTK常见NTC

名称

描述

mtktscpu

8个核中的最高温度

mtktsap

SOC旁边的NTC温度

mtktsbattery

电池温度

mtktsbtsmdpa

4G PA

mtktsbtsnrpa

5G PA

mtkts_chargerntc

充电IC温度

mtktspmic

PMIC温度

mtktswmt

WiFi温度

mtktspa

射频功放温度

  • 2
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值