linux thermal framework(2)_thermal zone

作者:huowj 发布于:2025-4-14 10:15 分类:电源管理子系统

1. 介绍

linux使用thermal zone来描述一个平台各个区域的温度信息,本文详细介绍thermal_zone相关的函数接口和相关调用过程。

2. thermal zone相关的API以及功能分析

2.1 struct thermal_zone

一个thermal zone是用struct thermal_zone_device来表示

struct thermal_zone_device {
	int id;
	char type[THERMAL_NAME_LENGTH];
	struct device device;
	struct completion removal;
	struct completion resume;
	struct attribute_group trips_attribute_group;
	struct list_head trips_high;
	struct list_head trips_reached;
	struct list_head trips_invalid;
	enum thermal_device_mode mode;
	void *devdata;
	int num_trips;
	unsigned long passive_delay_jiffies;
	unsigned long polling_delay_jiffies;
	unsigned long recheck_delay_jiffies;
	int temperature;
	int last_temperature;
	int emul_temperature;
	int passive;
	int prev_low_trip;
	int prev_high_trip;
	struct thermal_zone_device_ops ops;
	struct thermal_zone_params *tzp;
	struct thermal_governor *governor;
	void *governor_data;
	struct ida ida;
	struct mutex lock;
	struct list_head node;
	struct delayed_work poll_queue;
	enum thermal_notify_event notify_event;
	u8 state;
#ifdef CONFIG_THERMAL_DEBUGFS
	struct thermal_debugfs *debugfs;
#endif
	struct list_head user_thresholds;
	struct thermal_trip_desc trips[] __counted_by(num_trips);
};

这里主要说下最后一个成员变量 trips,在概述中我们介绍过,一个thermal zone会有多个温度档位,用trip表示,当thermal zone的温度达到一个档位的时候会有策略介入,其中一个trip用结构体struct thermal_trip_desc来描述,thermal_zone的trips就是它包含的所有trips的数组

struct thermal_trip_desc {
	struct thermal_trip trip;
	struct thermal_trip_attrs trip_attrs;
	struct list_head list_node;
	struct list_head thermal_instances;
	int threshold;
};
1)trip, 这个结构体对应的trip
2)attrs,相关属性,对应为sysfs中的文件节点
3)thermal_instances,用来描述这个trip和哪个cooling device绑定
4)cdev,和这个thermal instance相关的cooling device
5)threshold,这个trip对应的温度档位
2.1 thermal_zone的注册

thermal zone是一个被抽象的虚拟设备。一般情况下,手机主板上会有若干个传感器设备, 这些传感器设备可以通过adc模拟转换芯片将主板上的器件温度模拟信号转换为数字信号,传感器 设备是实际存在的物理设备,一般这些物理传感器设备被称为thermal-sensor。在每个 thermal-sensor驱动初始化(调用probe)时会调用thermal zone注册的接口向 thermal core注册thermal zone。

一般有两种方法供thermal sensor注册thermal zone,一种是使能dts的情况, thermal sensor驱动通过调用devm_thermal_of_zone_register接口自动解析 dts节点中的属性以及引用关系来注册,在没有使能dts的情况下,thermal sensor 驱动内部会初始化相关信息,然后调用thermal_zone_device_register_with_trips(有trips)或者 thermal_tripless_zone_device_register(没有trips)注册。现在主流的终端厂商基本为arm,使用dts来描述设备信息,我们这里主要分析下使能dts的情况。

devm_thermal_of_zone_register的核心逻辑是函数thermal_of_zone_register: 通过解析thermal sensor的device node节点来注册thermal zone。这里说到是解析 thermal sensor的device node节点,需要通过一个实际例子来显示thermal sensor 和thermal zone在device tree中的描述,我们这里选用了mtk的8195平台的dts, 以cpu类型的thermal zone为例。

mt8195.dtsi中thermal_zones节点包含了cpu/gpu/vdec等等子节点,其中cpu0-thermal的节点描述如下:

thermal_zones: thermal-zones {
	cpu0-thermal {
		polling-delay = <1000>;
		polling-delay-passive = <250>;
		thermal-sensors = <&lvts_mcu MT8195_MCU_LITTLE_CPU0>;
	trips {
		cpu0_alert: trip-alert {
			temperature = <85000>;
			hysteresis = <2000>;
			type = "passive";
		};

		cpu0_crit: trip-crit {
			temperature = <100000>;
			hysteresis = <2000>;
			type = "critical";
		};
	};

	cooling-maps {
		map0 {
			trip = <&cpu0_alert>;
			cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
					<&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
					<&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
					<&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
			};
		};
	};

... other thermal zones

}

cpu0-thermal的thermal-sensors节点引用了lvts_mcu,这个节点在device tree中的描述如下,它使用#thermal-sensor-cells来说明应该使用几个u32来指明sensor,在同平台的dts中,会有多个thermal-zone引用同一个thermal-sensor,一般使用一个u32 id来做区分,在thermal-sensor驱动probe的时候,会遍历所有id来注册多个thermal-zone,每个thermal-zone都对应一个dts的node.

lvts_mcu: thermal-sensor@11278000 {
	compatible = "mediatek,mt8195-lvts-mcu";
	reg = <0 0x11278000 0 0x1000>;
	interrupts = ;
	clocks = <&infracfg_ao CLK_INFRA_AO_THERM>;
	resets = <&infracfg_ao MT8195_INFRA_RST4_THERM_CTRL_MCU_SWRST>;
	nvmem-cells = <&lvts_efuse_data1 &lvts_efuse_data2>;
	nvmem-cell-names = "lvts-calib-data-1", "lvts-calib-data-2";
	#thermal-sensor-cells = <1>;
};

我们来看看thermal_of_zone_register接口是如何注册thermal zone的:


static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
							    const struct thermal_zone_device_ops *ops)
{
	struct thermal_zone_device_ops of_ops = *ops;
	struct thermal_zone_device *tz;
	struct thermal_trip *trips;
	struct thermal_zone_params tzp = {};
	struct device_node *np;
	const char *action;
	int delay, pdelay;
	int ntrips;
	int ret;
np = of_thermal_zone_find(sensor, id);
if (IS_ERR(np)) {
	if (PTR_ERR(np) != -ENODEV)
		pr_err("Failed to find thermal zone for %pOFn id=%d\n", sensor, id);
	return ERR_CAST(np);
}

trips = thermal_of_trips_init(np, &ntrips);
if (IS_ERR(trips)) {
	pr_err("Failed to parse trip points for %pOFn id=%d\n", sensor, id);
	ret = PTR_ERR(trips);
	goto out_of_node_put;
}

if (!trips)
	pr_info("No trip points found for %pOFn id=%d\n", sensor, id);

ret = thermal_of_monitor_init(np, &delay, &pdelay);
if (ret) {
	pr_err("Failed to initialize monitoring delays from %pOFn\n", np);
	goto out_kfree_trips;
}

thermal_of_parameters_init(np, &tzp);
of_ops.should_bind = thermal_of_should_bind;

ret = of_property_read_string(np, "critical-action", &action);
if (!ret)
	if (!of_ops.critical && !strcasecmp(action, "reboot"))
		of_ops.critical = thermal_zone_device_critical_reboot;

tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips,
					     data, &of_ops, &tzp,
					     pdelay, delay);
if (IS_ERR(tz)) {
	ret = PTR_ERR(tz);
	pr_err("Failed to register thermal zone %pOFn: %d\n", np, ret);
	goto out_kfree_trips;
}

of_node_put(np);
kfree(trips);

ret = thermal_zone_device_enable(tz);
if (ret) {
	pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n",
	       tz->type, tz->id, ret);
	thermal_of_zone_unregister(tz);
	return ERR_PTR(ret);
}

return tz;

out_kfree_trips:

kfree(trips);

out_of_node_put:

of_node_put(np);

return ERR_PTR(ret);

}
1)函数入参sensor作为thermal-sensor的dts节点, ops为这个thermal-zone的回调函数,包括get_temp来获取温度
2)of_thermal_zone_find会遍历所有dts节点,找到引用该thermal-sensor的thermal-zone节点
3)thermal_of_trips_init会通过读thermal-zone的trips子节点来初始化该thermal-zone的trip信息,对我们上面的cpu0-thermal节点,他有两个trips,一个trip的类型是passive,温度为85摄氏度,另一个trip的类型是critical,温度是100摄氏度
4)thermal_of_monitor_init会通过读取thermal-zone节点的两个属性polling-delay和polling-delay-passive,这两个参数是为后面thermal zone的monitor做准备的,一般用来设置monitor的轮巡间隔时间
5)thermal_of_should_bind是通过读取thermal-zone的cooling-maps节点做thermal-zone和cooling-device的绑定,这个函数会在每个thermal-zone或者cooling-device向thermal core注册的时候被调用,对我们上面例子中的cpu0-thermal节点,passive类型的trip绑定了cpu0到cpu3 4个cooling-device
6)对于critical的trip,如果在dts中没有定义critical-action,则会使用默认的reboot函数,对上面例子中cpu0-thermal,当cpu0的温度达到100摄氏度的时候,reboot系统以防止温度过高损坏硬件
7)thermal_zone_device_register_with_trips是注册的主函数,这个函数中会将这个thermal-zone注册进thermal core,并通过ida分配一个id给这个thermal-zone,我们之前说过,thermal-zone是一个虚拟的设备,linux用thermal_class来作为thermal-zone和cooling-device这一类的虚拟设备的集合,并设置这个thermal-zone的设备名为thermal_zone+id,通过device_register将这个device注册进linux总线设备驱动模型,通过attr和sysfs节点向上提供用户接口,每个thermal-zone都会有一个work,这个work会每隔一定毫秒数通过get_temp回调检测温度然后判断是否用一定的降温措施来控制温度,在注册函数的最后,会通过遍历已有的cooling-device来bind thermal-zone的trips和cooling-device



发表评论:

Copyright @ 2013-2015 蜗窝科技 All rights reserved. Powered by emlog