关键词:Zone、Cooling、Governor、Step Wise、Fair Share、trip等等。
Linux Thermal的目的是控制系统运行过程中采样点温度,避免温度过高造成器件损坏,确保芯片长期稳定工作。
整个Thermal框架可以分为四部分:
- Thermal Driver负责将获取温度设备,注册成struct thermal_zone_device,比如Temp Sensor、NTC等。
- Thermal Governor则负责如何控制温度,注册成struct thermal_governor,比如Step Wise、Bang Bang等等。
- Thermal Cooling负责将控制温度设备,注册成struct thermal_cooling_device,比如风扇、CPU、DDR、GPU等。
- Thermal Core则是Thermal Driver、Thermal Governor、Thermal Governor的粘合剂,同时提供了用户空间sysfs节点等通用功能。
所以Thermal的工作流程是通过Thermal Driver获取温度,然后经过Thermal Governor决策,最后通过Thermal Cooling执行温度控制。
下面首先从总体详细分析Thermal框架以及数据结构、API(1. Thermal框架分析),然后分别分析Thermal Driver实例(2. Thermal Driver实例)、Thermal Governor(Step Wise和Fair Share)(3. Thermal Governor分析)、以及Thermal Cooling实例(4. Thermal Cooling实例)。
最后将这些内容串起来,分析Thermal是如何控制温度的。
1. Thermal框架分析
1.1 Thermal数据结构
struct thermal_zone_device是对获取温度设备的抽象,成员ops是对该Thermal Zone操作的抽象;governor是该Thermal Zone所使用的调温策略;thermal_instances是该Thermal Zone下的Cooling Device列表。
struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; struct thermal_attr *trip_hyst_attrs; void *devdata; int trips;---------------------------------------------------------thermal zone支持的trip数目。 unsigned long trips_disabled; /* bitmap for disabled trips */ int passive_delay; int polling_delay;-------------------------------------------------轮询读取温度的建个,0表示采用中断形式。 int temperature;---------------------------------------------------当前温度。 int last_temperature;----------------------------------------------最近一次温度。 int emul_temperature; int passive; int prev_low_trip; int prev_high_trip; unsigned int forced_passive; atomic_t need_update; struct thermal_zone_device_ops *ops;------------------------------当前thermal zone操作函数集。 struct thermal_zone_params *tzp;----------------------------------当前thermal zone参数。 struct thermal_governor *governor; void *governor_data; struct list_head thermal_instances;-------------------------------当前thermal zone上thermal_instances列表。 struct idr idr; struct mutex lock; struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; }; struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *);------------------------将cooling device绑定到thermal zone中,两者通过struct thermal_instances在thermal_zone_bind_cooling_device()中绑定。 int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*get_temp) (struct thermal_zone_device *, int *); int (*set_trips) (struct thermal_zone_device *, int, int); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*get_trip_type) (struct thermal_zone_device *, int, enum thermal_trip_type *); int (*get_trip_temp) (struct thermal_zone_device *, int, int *); int (*set_trip_temp) (struct thermal_zone_device *, int, int); int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, enum thermal_trip_type); }; struct thermal_bind_params { struct thermal_cooling_device *cdev; int weight; int trip_mask; unsigned long *binding_limits; int (*match) (struct thermal_zone_device *tz, struct thermal_cooling_device *cdev); }; struct thermal_zone_params { char governor_name[THERMAL_NAME_LENGTH]; bool no_hwmon; int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; ... int slope; int offset; }; struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); int (*get_trend)(void *, int, enum thermal_trend *); int (*set_trips)(void *, int, int); int (*set_emul_temp)(void *, int); int (*set_trip_temp)(void *, int, int); };
struct thermal_cooling_device是对降温设备的抽象,对风扇设备就是不同的转速,对CPU、DDR、GPU就是不同的电压或者频率。
struct thermal_cooling_device_ops是Cooling Device操作函数集,其中set_cur_state()是对设备进行温度控制。
struct thermal_cooling_device { int id; char type[THERMAL_NAME_LENGTH]; struct device device; struct device_node *np; void *devdata; const struct thermal_cooling_device_ops *ops; bool updated; /* true if the cooling device does not need update */ struct mutex lock; /* protect thermal_instances list */ struct list_head thermal_instances; struct list_head node; }; struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); ... };
strcut thermal_governor是对温控策略的抽象,也就是根据Thermal Zone的trip来选择Thermal Cooling设备的行为。比如,温度越高风扇转速越快;温度越高CPU运行在更低电压和频率上。
struct thermal_governor { char name[THERMAL_NAME_LENGTH]; int (*bind_to_tz)(struct thermal_zone_device *tz);---------------------将一个governor绑定到thermal zone得一个trip上。 void (*unbind_from_tz)(struct thermal_zone_device *tz);----------------将一个governor从thermal zone解绑。 int (*throttle)(struct thermal_zone_device *tz, int trip);-------------根据trip遍历当前thermal zone下所有的cooling device执行温控策略。 struct list_head governor_list;-------------------------------------thermal_governor_list上的一个列表元素。 };
所有的策略选择都是通过throttle()函数进行的,不同的Governor的区别也主要在这里。内核已经实现了Step Wise、User等等,并且还在演进中。
通过struct thermal_instances可以将thermal zone和thermal cooling设备绑定起来。
struct thermal_instance { int id; char name[THERMAL_NAME_LENGTH]; struct thermal_zone_device *tz;-------------------------------------------绑定的thermal zone。 struct thermal_cooling_device *cdev;--------------------------------------绑定的thermal cooling设备。 int trip;-----------------------------------------------------------------对应的thermal zone的trip。 bool initialized; unsigned long upper; /* Highest cooling state for this trip point */---cooling设备的最高降温状态。 unsigned long lower; /* Lowest cooling state for this trip point */----cooling设备最低降温状态。 unsigned long target; /* expected cooling state */---------------------cooling设备的当前状态,也是thermal_cooling_device_ops->set_cur_state()设置后的值。 char attr_name[THERMAL_NAME_LENGTH]; struct device_attribute attr; char weight_attr_name[THERMAL_NAME_LENGTH]; struct device_attribute weight_attr; struct list_head tz_node; /* node in tz->thermal_instances */-------------thermal_zone_device->thermal_instances上的节点。 struct list_head cdev_node; /* node in cdev->thermal_instances */---------thermal_cooling_device->thermal_instances上的节点。 unsigned int weight; /* The weight of the cooling device */ };
thermal_device_mode表示当前的thermal zone是否使能。
thermal_trip_type表示thermal zone的当前trip类型,其中ACTIVE和PASSIVE属于non-critical类型,交由Governor进行处理;HOT和CRITICAL属于critical类型,其中CRITICAL会执行orderly_poweroff()。
thermal_trend表示thermal zone的温度趋势,是平缓、上升、下降还是跳跃式的,这就给Governor选择trip提供依据。
enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, THERMAL_DEVICE_ENABLED, }; enum thermal_trip_type { THERMAL_TRIP_ACTIVE = 0, THERMAL_TRIP_PASSIVE, THERMAL_TRIP_HOT, THERMAL_TRIP_CRITICAL, }; enum thermal_trend { THERMAL_TREND_STABLE, /* temperature is stable */-----------------------表示温度平稳。 THERMAL_TREND_RAISING, /* temperature is raising */---------------------表示当前温度趋势是升高的。 THERMAL_TREND_DROPPING, /* temperature is dropping */-------------------表示当前温度趋势是降低的。 THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */------------直接应用upper对应的trip。 THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */--------------直接应用lower对应的trip。 }; /* Thermal notification reason */ enum thermal_notify_event { THERMAL_EVENT_UNSPECIFIED, /* Unspecified event */ THERMAL_EVENT_TEMP_SAMPLE, /* New Temperature sample */ THERMAL_TRIP_VIOLATED, /* TRIP Point violation */ THERMAL_TRIP_CHANGED, /* TRIP Point temperature changed */ THERMAL_DEVICE_DOWN, /* Thermal device is down */ THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ };
1.2 Thermal Core APIs
Thermal core是Thermal Zone、Thermal Cooling、ThermalGovernor的粘合剂。
通过Thermal core提供的API,将这三者相互关联起来;从Thermal Zone设备获取温度,选择对应的Thermal Governor,Thermal Governor设置Thermal Cooling的状态,进而达到控制温度的目的。
通过thermal_zone_device_register()注册thermal zone设备,创建一系列sysfs节点,并且和governor、cooling进行绑定。
struct thermal_zone_device *thermal_zone_device_register(const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { struct thermal_zone_device *tz; enum thermal_trip_type trip_type; int trip_temp; int result; int count; int passive = 0; struct thermal_governor *governor; if (type && strlen(type) >= THERMAL_NAME_LENGTH) return ERR_PTR(-EINVAL); if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) return ERR_PTR(-EINVAL); if (!ops) return ERR_PTR(-EINVAL); if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) return ERR_PTR(-EINVAL); tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&tz->thermal_instances);------------------------------初始化thermal_instances链表,放置struct thermal_instances实例。通过thermal_instances可以关联thermal zone和thermal cooling。 idr_init(&tz->idr); mutex_init(&tz->lock); result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id); if (result) { kfree(tz); return ERR_PTR(result); } strlcpy(tz->type, type ? : "", sizeof(tz->type)); tz->ops = ops; tz->tzp = tzp; tz->device.class = &thermal_class;------------------------------------创建的设备会在/sys/class/thermal下面有个链接。 tz->devdata = d