万字长文|Thermal框架源码剖析

原创 Linux阅码场 2023-03-09 08:00

作者简介:阅码场用户世至其美,目前就职小米,专注于内核调度,Kite热体验优化工具的作者。

1. 框架结构

thermal corethermal主要的程序,驱动初始化程序,维系thermal zonegovernorcooling device三者的关系。

thermal zone device:创建thermal zone节点和连接thermal sensor,在/sys/class/thermal/目录下的thermal_zone*,通过dtsi文件进行配置生成。thermal sensor是温度传感器(即热敏电阻NTC),主要是给thermal提供温度感知。

thermal govnernor:温度控制算法,解决温控发生时(即throttle),cooling device如何选择cooling state的问题。

  • step_wise

  • power_allocator

  • user_space

  • bang_bang

  • fair_share

thermal cooling device:系统温控的执行者,实施冷却措施的驱动(cpufreq_coolingcpuidle_cooling devfreq_cooling等)。cooling device根据governor计算出来的state,实施冷却操作,一般情况下,state越高表示系统的冷却需求越高。cooling device需要与trip point进行绑定,当 trip point 触发后,由相应的cooling device 去实施冷却措施。

2. 代码结构

2.1 thermal core

thermal_core.c源代码 | v5.10.43> 主要是初始化driver,注册governor,解析dtsi文件,创建thermal zone和初始通信。

2.1.1 thermal结构体定义

thermal.h源代码 | v5.10.43> ,公开接口给其他驱动程序调用

// thermal_zone_device ops配置
struct thermal_zone_device_ops {
   
int (*bind) (struct thermal_zone_device *,
             struct thermal_cooling_device *);
   
int (*unbind) (struct thermal_zone_device *,
               struct thermal_cooling_device *);
   
int (*get_temp) (struct thermal_zone_device *, int *);
   
int (*set_trips) (struct thermal_zone_device *, int, int);
   
int (*change_mode) (struct thermal_zone_device *,
       
enumthermal_device_mode);
   
int (*get_trip_type) (struct thermal_zone_device *, int,
       
enumthermal_trip_type *);
   
int(*get_trip_temp) (struct thermal_zone_device *, int, int *);
   
int(*set_trip_temp) (struct thermal_zone_device *, int, int);
   
int(*get_trip_hyst) (struct thermal_zone_device *, int, int *);
   
int(*set_trip_hyst) (struct thermal_zone_device *, int, int);
   
int(*get_crit_temp) (struct thermal_zone_device *, int *);
   
int (*set_emul_temp) (struct thermal_zone_device *, int);
   
int (*get_trend) (struct thermal_zone_device *, int,
             
enum thermal_trend *);
   
int (*notify) (struct thermal_zone_device *, int,
              
enumthermal_trip_type);
};

// thermal_cooling_device ops配置
struct thermal_cooling_device_ops {
   
int(*get_max_state) (struct thermal_cooling_device *, unsigned long *);
   
int(*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
   
int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
   
int(*get_requested_power)(struct thermal_cooling_device *, u32 *);
   
int(*state2power)(struct thermal_cooling_device *, unsigned long, u32 *);
   
int(*power2state)(struct thermal_cooling_device *, u32, unsigned long *);
};

thermal_core.c源代码 | v5.10.43>

//thermal驱动的init入口函数

static int __init thermal_init(void)

{

    int result;

    // generic netlink初始化

    result =thermal_netlink_init();

    if (result)

        goto error;

    // 注册thermal governor

    result =thermal_register_governors();

    if (result)

        goto error;

    // 注册/sys/class/thermal节点

    result =class_register(&thermal_class);

    if (result)

        goto unregister_governors;

    // 解析dtsi配置文件中的thermal-zones字段,并注册thermal_zone_device

    result =of_parse_thermal_zones();

    if (result)

        goto unregister_class;

    // 注册notifier

    result =register_pm_notifier(&thermal_pm_nb);

    if (result)

pr_warn("Thermal: Can not register suspend notifier, return %d\n",

            result);

    return 0;

unregister_class:

class_unregister(&thermal_class);

unregister_governors:

thermal_unregister_governors();

error:

ida_destroy(&thermal_tz_ida);

ida_destroy(&thermal_cdev_ida);

mutex_destroy(&thermal_list_lock);

mutex_destroy(&thermal_governor_lock);

mutex_destroy(&poweroff_lock);

    return result;

}

/* 将所有的governor策略(step_wisepower_allocatoruser_spacefair_share)默认都注册给kernel */

static int __init thermal_register_governors(void)

{

    int ret = 0;

    structthermal_governor **governor;

    //遍历注册所有的governor策略

    for_each_governor_table(governor) {

        //注册governor策略接口

        ret =thermal_register_governor(*governor);

        if (ret) {

            pr_err("Failed to register governor: '%s'",

                   (*governor)->name);

            break;

        }

        pr_info("Registered thermal governor '%s'",

            (*governor)->name);

    }

    if (ret) {

        structthermal_governor **gov;

    for_each_governor_table(gov) {

            if (gov ==governor)

                break;

            thermal_unregister_governor(*gov);

        }

    }

    return ret;

}

// 将新governor添加到全局governor_list,设置默认的governor

int thermal_register_governor(struct thermal_governor *governor)

{

    int err;

    const char *name;

    struct thermal_zone_device *pos;

    if (!governor)

        return -EINVAL;

    mutex_lock(&thermal_governor_lock);

    err = -EBUSY;

    if (!__find_governor(governor->name)) {

        bool match_default;

        err = 0;

        // 将新governor添加到全局governor_list

        list_add(&governor->governor_list, &thermal_governor_list);

        // 查找匹配默认的governor,并设置默认值

        match_default = !strncmp(governor->name,

                     DEFAULT_THERMAL_GOVERNOR,

                     THERMAL_NAME_LENGTH);

        // Kconfig中配置默认的governor

        if (!def_governor && match_default)

            def_governor = governor;

    }

    mutex_lock(&thermal_list_lock);

    list_for_each_entry(pos, &thermal_tz_list, node) {

        /*

         * only thermal zones with specified tz->tzp->governor_name

         * may run with tz->govenor unset

         */

        if (pos->governor)

            continue;

        name = pos->tzp->governor_name;

        if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) {

            int ret;

            ret = thermal_set_governor(pos, governor);

            if (ret)

                dev_err(&pos->device,

                    "Failed to set governor %s for thermal zone %s: %d\n",

                    governor->name, pos->type, ret);

        }

    }

    mutex_unlock(&thermal_list_lock);

    mutex_unlock(&thermal_governor_lock);

    return err;

}

// 注册一个thermal_zone_device

struct thermal_zone_device *

thermal_zone_device_register(const char *type, int trips, int mask,

                 void *devdata, struct thermal_zone_device_ops *ops,

                 struct thermal_zone_params *tzp, int passive_delay,

                 int polling_delay)

{

    struct thermal_zone_device *tz;

    enum thermal_trip_type trip_type;

    int trip_temp;

    int id;

    int result;

    int count;

    struct thermal_governor *governor;

    if (!type || strlen(type) == 0) {

        pr_err("Error: No thermal zone type defined\n");

        return ERR_PTR(-EINVAL);

    }

    if (type && strlen(type) >= THERMAL_NAME_LENGTH) {

        pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n",

               type, THERMAL_NAME_LENGTH);

        return ERR_PTR(-EINVAL);

    }

    if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) {

        pr_err("Error: Incorrect number of thermal trips\n");

        return ERR_PTR(-EINVAL);

    }

    if (!ops) {

        pr_err("Error: Thermal zone device ops not defined\n");

        return ERR_PTR(-EINVAL);

    }

    if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))

        return ERR_PTR(-EINVAL);

    tz = kzalloc(sizeof(*tz), GFP_KERNEL);

    if (!tz)

        return ERR_PTR(-ENOMEM);

    // 初始化一个链表thermal_instances

    INIT_LIST_HEAD(&tz->thermal_instances);

    ida_init(&tz->ida);

    mutex_init(&tz->lock);

    // 自动分配id

    id = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);

    if (id < 0) {

        result = id;

        goto free_tz;

    }

    tz->id = id;

    strlcpy(tz->type, type, sizeof(tz->type));

    tz->ops = ops;

    tz->tzp = tzp;

    tz->device.class = &thermal_class;

    tz->devdata = devdata;

    tz->trips = trips;

    tz->passive_delay = passive_delay;

    tz->polling_delay = polling_delay;

    /* sys I/F */

    /* Add nodes that are always present via .groups */

    result = thermal_zone_create_device_groups(tz, mask);

    if (result)

        goto remove_id;

    /* A new thermal zone needs to be updated anyway. */

    atomic_set(&tz->need_update, 1);

    // 设置thermal_zone节点名称

    dev_set_name(&tz->device, "thermal_zone%d", tz->id);

    result = device_register(&tz->device);

    if (result)

        goto release_device;

    for (count = 0; count < trips; count++) {

        if (tz->ops->get_trip_type(tz, count, &trip_type))

            set_bit(count, &tz->trips_disabled);

        if (tz->ops->get_trip_temp(tz, count, &trip_temp))

            set_bit(count, &tz->trips_disabled);

        /* Check for bogus trip points */

        if (trip_temp == 0)

            set_bit(count, &tz->trips_disabled);

    }

    /* Update 'this' zone's governor information */

    mutex_lock(&thermal_governor_lock);

    // thermal_zone设置governor,否则默认governor

    if (tz->tzp)

        governor = __find_governor(tz->tzp->governor_name);

    else

        governor = def_governor;

    result = thermal_set_governor(tz, governor);

    if (result) {

        mutex_unlock(&thermal_governor_lock);

        goto unregister;

    }

    mutex_unlock(&thermal_governor_lock);

    if (!tz->tzp || !tz->tzp->no_hwmon) {

        result = thermal_add_hwmon_sysfs(tz);

        if (result)

            goto unregister;

    }

    mutex_lock(&thermal_list_lock);

  // thermal zone加入到thermal_tz_list

    list_add_tail(&tz->node, &thermal_tz_list);

    mutex_unlock(&thermal_list_lock);

    // thermal_cdev_list上的cooling设备绑定到thermal_zone_device

    bind_tz(tz);

    // 初始化work queue下半部分,处理中断需要响应的操作,定时去调用thermal_zone_device_update函数

    // 设置polling_delay值为轮询周期

    INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);

    // thermal zone的温度等复位。

    thermal_zone_device_reset(tz);

    /* Update the new thermal zone and mark it as already updated. */

    if (atomic_cmpxchg(&tz->need_update, 1, 0))

        thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

    thermal_notify_tz_create(tz->id, tz->type);

    return tz;

unregister:

    device_del(&tz->device);

release_device:

    put_device(&tz->device);

    tz = NULL;

remove_id:

    ida_simple_remove(&thermal_tz_ida, id);

free_tz:

    kfree(tz);

    return ERR_PTR(result);

}

// thermal_cdev_list上的cooling设备绑定到thermal_zone_device

static void bind_tz(struct thermal_zone_device *tz)

{

    int i, ret;

    struct thermal_cooling_device *pos = NULL;

    const struct thermal_zone_params *tzp = tz->tzp;

    if (!tzp && !tz->ops->bind)

        return;

    mutex_lock(&thermal_list_lock);

    /* If there is ops->bind, try to use ops->bind */

    if (tz->ops->bind) {

        // 遍历thermal_cdev_list,绑定cooling设备

        list_for_each_entry(pos, &thermal_cdev_list, node) {

            ret = tz->ops->bind(tz, pos);

            if (ret)

                print_bind_err_msg(tz, pos, ret);

        }

        goto exit;

    }

    if (!tzp || !tzp->tbp)

        goto exit;

    list_for_each_entry(pos, &thermal_cdev_list, node) {

        for (i = 0; i < tzp->num_tbps; i++) {

            if (tzp->tbp[i].cdev || !tzp->tbp[i].match)

                continue;

            if (tzp->tbp[i].match(tz, pos))

                continue;

            tzp->tbp[i].cdev = pos;

            __bind(tz, tzp->tbp[i].trip_mask, pos,

                   tzp->tbp[i].binding_limits,

                   tzp->tbp[i].weight);

        }

    }

exit:

    mutex_unlock(&thermal_list_lock);

}

// 检查thermal_zone_device

static void thermal_zone_device_check(struct work_struct *work)

{

    //通过结构体成员变量地址来获取这个thermal_zone_device结构体的地址

    struct thermal_zone_device *tz = container_of(work, struct

                              thermal_zone_device,

                              poll_queue.work);

    //更新thermal_zone_device

    thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

}

void thermal_zone_device_update(struct thermal_zone_device *tz,

                enum thermal_notify_event event)

{

    int count;

   

    //判断是否需要轮询的方式

    if (should_stop_polling(tz))

        return;

    if (atomic_read(&in_suspend))

        return;

    // 判断sensor是否实现get_temp函数

    if (!tz->ops->get_temp)

        return;

    // 更新sensor温度,也就是thermal_zone的温度

    update_temperature(tz);

    // 更新trip

    thermal_zone_set_trips(tz);

    tz->notify_event = event;

    for (count = 0; count < tz->trips; count++)

        handle_thermal_trip(tz, count);

}

// 更新thermal zone温度

static void update_temperature(struct thermal_zone_device *tz)

{

    int temp, ret;

    // 获取thermal_zone的温度

    ret = thermal_zone_get_temp(tz, &temp);

    if (ret) {

        if (ret != -EAGAIN)

            dev_warn(&tz->device,

                 "failed to read out thermal zone (%d)\n",

                 ret);

        return;

    }

    mutex_lock(&tz->lock);

    tz->last_temperature = tz->temperature;

    tz->temperature = temp;

    mutex_unlock(&tz->lock);

    trace_thermal_temperature(tz);

    thermal_genl_sampling_temp(tz->id, temp);

}

// 遍历处理符合条件的trips

static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)

{

    enum thermal_trip_type type;

    int trip_temp, hyst = 0;

    /* Ignore disabled trip points */

    if (test_bit(trip, &tz->trips_disabled))

        return;

    // 获取trip_temptrip_typeget_trip_hyst

    tz->ops->get_trip_temp(tz, trip, &trip_temp);

    tz->ops->get_trip_type(tz, trip, &type);

    if (tz->ops->get_trip_hyst)

        tz->ops->get_trip_hyst(tz, trip, &hyst);

    if (tz->last_temperature != THERMAL_TEMP_INVALID) {

        // 触发trip

        if (tz->last_temperature < trip_temp &&

            tz->temperature >= trip_temp)

            thermal_notify_tz_trip_up(tz->id, trip);

    // 触发hysteresis

        if (tz->last_temperature >= trip_temp &&

            tz->temperature < (trip_temp - hyst))

            thermal_notify_tz_trip_down(tz->id, trip);

    }

    // 处理critical||hottrips

    if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)

        handle_critical_trips(tz, trip, type);

    // 如果设置了governor,调用governor->throttle函数

    else

        handle_non_critical_trips(tz, trip);

    /*

     * Alright, we handled this trip successfully.

     * So, start monitoring again.

     */

    monitor_thermal_zone(tz);

}

// 处理typecritical||hottrips

static void handle_critical_trips(struct thermal_zone_device *tz,

                  int trip, enum thermal_trip_type trip_type)

{

    int trip_temp;

    tz->ops->get_trip_temp(tz, trip, &trip_temp);

    /* If we have not crossed the trip_temp, we do not care. */

    if (trip_temp <= 0 || tz->temperature < trip_temp)

        return;

    trace_thermal_zone_trip(tz, trip, trip_type);

    if (tz->ops->notify)

        tz->ops->notify(tz, trip, trip_type);

    // 如果是critical,准备关机

    if (trip_type == THERMAL_TRIP_CRITICAL) {

        dev_emerg(&tz->device,

              "critical temperature reached (%d C), shutting down\n",

              tz->temperature / 1000);

        mutex_lock(&poweroff_lock);

        if (!power_off_triggered) {

            /*

             * Queue a backup emergency shutdown in the event of

             * orderly_poweroff failure

             */

            // 调用thermal_emergency_poweroff准备关机操作

            thermal_emergency_poweroff();

            orderly_poweroff(true);

            power_off_triggered = true;

        }

        mutex_unlock(&poweroff_lock);

    }

}

//处理其他的trips

//一般情况都是这个

static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)

{

    //如果设置了governor,调用governor->throttle函数

    //否则调用默认的

    tz->governor ? tz->governor->throttle(tz, trip) :

               def_governor->throttle(tz, trip);

}

// 监控delay时间进行延时后工作

static void monitor_thermal_zone(struct thermal_zone_device *tz)

{

    bool stop;

    stop = should_stop_polling(tz);

    mutex_lock(&tz->lock);

   

    // 超过阀值轮询时间

    if (!stop && tz->passive)

        thermal_zone_device_set_polling(tz, tz->passive_delay);

    // 未超过阀值轮询时间

    else if (!stop && tz->polling_delay)

        thermal_zone_device_set_polling(tz, tz->polling_delay);

   

  // 取消调用,不轮询

    else

        thermal_zone_device_set_polling(tz, 0);

    mutex_unlock(&tz->lock);

}

static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,

                        int delay)

{

    //需延时后再使用system_freezable_power_efficient_wq的工作队列进行工作

    if (delay > 1000)

        mod_delayed_work(system_freezable_power_efficient_wq,

                 &tz->poll_queue,

                 round_jiffies(msecs_to_jiffies(delay)));

    else if (delay)

        mod_delayed_work(system_freezable_power_efficient_wq,

                 &tz->poll_queue,

                 msecs_to_jiffies(delay));

    // 删除提交到工作队列的任务,不轮询

    else

        cancel_delayed_work(&tz->poll_queue);

}

thermal_helps.c 源代码 | v5.10.43> 定义thermal_zone_get_temp函数

int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)

{

    int ret = -EINVAL;

    int count;

    int crit_temp = INT_MAX;

    enum thermal_trip_type type;

    if (!tz || IS_ERR(tz) || !tz->ops->get_temp)

        goto exit;

    mutex_lock(&tz->lock);

    // 获取当前sensor的温度,sensor里面实现

    ret = tz->ops->get_temp(tz, temp);

    // thermal debug开关

    if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {

        for (count = 0; count < tz->trips; count++) {

            ret = tz->ops->get_trip_type(tz, count, &type);

            if (!ret && type == THERMAL_TRIP_CRITICAL) {

                ret = tz->ops->get_trip_temp(tz, count,

                        &crit_temp);

                break;

            }

        }

        /*

         * Only allow emulating a temperature when the real temperature

         * is below the critical temperature so that the emulation code

         * cannot hide critical conditions.

         */

        if (!ret && *temp < crit_temp)

            *temp = tz->emul_temperature;

    }

    mutex_unlock(&tz->lock);

exit:

    return ret;

}

// 获取当前温度下的thermal zone下一次的trip

void thermal_zone_set_trips(struct thermal_zone_device *tz)

{

    int low = -INT_MAX;

    int high = INT_MAX;

    int trip_temp, hysteresis;

    int i, ret;

    mutex_lock(&tz->lock);

    if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)

        goto exit;

    for (i = 0; i < tz->trips; i++) {

        int trip_low;

       

        // 获取sensor的触发温度

        tz->ops->get_trip_temp(tz, i, &trip_temp);

       

        // 获取sensor下降温度值恢复状态

        tz->ops->get_trip_hyst(tz, i, &hysteresis);

        trip_low = trip_temp - hysteresis;

        if (trip_low < tz->temperature && trip_low > low)

            low = trip_low;

        if (trip_temp > tz->temperature && trip_temp < high)

            high = trip_temp;

    }

    /* No need to change trip points */

    // 与上次对比相同,不需要进行更新

    if (tz->prev_low_trip == low && tz->prev_high_trip == high)

        goto exit;

    tz->prev_low_trip = low;

    tz->prev_high_trip = high;

    dev_dbg(&tz->device,

        "new temperature boundaries: %d < x < %d\n", low, high);

    /*

     * Set a temperature window. When this window is left the driver

     * must inform the thermal core via thermal_zone_device_update.

     */

    // 设置新的温度trip

    ret = tz->ops->set_trips(tz, low, high);

    if (ret)

        dev_err(&tz->device, "Failed to set trips: %d\n", ret);

exit:

    mutex_unlock(&tz->lock);

}

2.2 dtsi文件解析

thermal_of.c源代码 | v5.10.43> 解析dtsi文件,主要函数是of_parse_thermal_zones,创建解析生成thermal zone节点

//解析dtsi中的&thermal_zones

int __init of_parse_thermal_zones(void)

{

    struct device_node *np, *child;

    struct __thermal_zone *tz;

    struct thermal_zone_device_ops *ops;

    //查找到thermal-zones

    np = of_find_node_by_name(NULL, "thermal-zones");

    if (!np) {

        pr_debug("unable to find thermal zones\n");

        return 0; /* Run successfully on systems without thermal DT */

    }

    for_each_available_child_of_node(np, child) {

        struct thermal_zone_device *zone;

        struct thermal_zone_params *tzp;

        int i, mask = 0;

        u32 prop;

        //创建一个thermal zone节点

        tz = thermal_of_build_thermal_zone(child);

        if (IS_ERR(tz)) {

            pr_err("failed to build thermal zone %pOFn: %ld\n",

                   child,

                   PTR_ERR(tz));

            continue;

        }

        //申请一段新内存,并将of_thermal_ops中的内容复制到新申请的这段内存中

        ops = kmemdup(&of_thermal_ops, sizeof(*ops), GFP_KERNEL);

        if (!ops)

            goto exit_free;

        tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);

        if (!tzp) {

            kfree(ops);

            goto exit_free;

        }

        /* No hwmon because there might be hwmon drivers registering */

        tzp->no_hwmon = true;

        // 解析sustainable-power字段

        if (!of_property_read_u32(child, "sustainable-power", &prop))

            tzp->sustainable_power = prop;

        for (i = 0; i < tz->ntrips; i++)

            mask |= 1 << i;

        /* these two are left for temperature drivers to use */

        tzp->slope = tz->slope;

        tzp->offset = tz->offset;

        // thermal_core注册thermal_zone_device

        zone = thermal_zone_device_register(child->name, tz->ntrips,

                            mask, tz,

                            ops, tzp,

                            tz->passive_delay,

                            tz->polling_delay);

        if (IS_ERR(zone)) {

            pr_err("Failed to build %pOFn zone %ld\n", child,

                   PTR_ERR(zone));

            kfree(tzp);

            kfree(ops);

            of_thermal_free_zone(tz);

            /* attempting to build remaining zones still */

        }

    }

    of_node_put(np);

    return 0;

exit_free:

    of_node_put(child);

    of_node_put(np);

    of_thermal_free_zone(tz);

    /* no memory available, so free what we have built */

    of_thermal_destroy_zones();

    return -ENOMEM;

}

//创建一个thermal zone节点

static struct __thermal_zone

__init *thermal_of_build_thermal_zone(struct device_node *np)

{

    struct device_node *child = NULL, *gchild;

    struct __thermal_zone *tz;

    int ret, i;

    u32 prop, coef[2];

    if (!np) {

        pr_err("no thermal zone np\n");

        return ERR_PTR(-EINVAL);

    }

    tz = kzalloc(sizeof(*tz), GFP_KERNEL);

    if (!tz)

        return ERR_PTR(-ENOMEM);

   

    //解析polling-delay-passive,超过阀值轮询时间

    ret = of_property_read_u32(np, "polling-delay-passive", &prop);

    if (ret < 0) {

        pr_err("%pOFn: missing polling-delay-passive property\n", np);

        goto free_tz;

    }

    tz->passive_delay = prop;

    //解析polling-delay,未超阀值轮询时间

    ret = of_property_read_u32(np, "polling-delay", &prop);

    if (ret < 0) {

        pr_err("%pOFn: missing polling-delay property\n", np);

        goto free_tz;

    }

    tz->polling_delay = prop;

    /*

     * REVIST: for now, the thermal framework supports only

     * one sensor per thermal zone. Thus, we are considering

     * only the first two values as slope and offset.

     */

    //暂时支持一个sensor对应一个thermal zone

    ret = of_property_read_u32_array(np, "coefficients", coef, 2);

    if (ret == 0) {

        tz->slope = coef[0];

        tz->offset = coef[1];

    } else {

        tz->slope = 1;

        tz->offset = 0;

    }

    /* trips */

    // 查找trips字段

    child = of_get_child_by_name(np, "trips");

    /* No trips provided */

    if (!child)

        goto finish;

    //获取trips字段下child数量

    tz->ntrips = of_get_child_count(child);

    if (tz->ntrips == 0) /* must have at least one child */

        goto finish;

    tz->trips = kcalloc(tz->ntrips, sizeof(*tz->trips), GFP_KERNEL);

    if (!tz->trips) {

        ret = -ENOMEM;

        goto free_tz;

    }

    i = 0;

    for_each_child_of_node(child, gchild) {

        // 遍历解析trips字段下面的字段

        ret = thermal_of_populate_trip(gchild, &tz->trips[i++]);

        if (ret)

            goto free_trips;

    }

    // 减少节点引用

    of_node_put(child);

    /* cooling-maps */

  // 查找cooling-maps字段

    child = of_get_child_by_name(np, "cooling-maps");

    /* cooling-maps not provided */

    if (!child)

        goto finish;

    tz->num_tbps = of_get_child_count(child);

    if (tz->num_tbps == 0)

        goto finish;

    tz->tbps = kcalloc(tz->num_tbps, sizeof(*tz->tbps), GFP_KERNEL);

    if (!tz->tbps) {

        ret = -ENOMEM;

        goto free_trips;

    }

    i = 0;

    for_each_child_of_node(child, gchild) {

        // 遍历解析cooling-maps下的字段,绑定cooling device

        ret = thermal_of_populate_bind_params(gchild, &tz->tbps[i++],

                              tz->trips, tz->ntrips);

        if (ret)

            goto free_tbps;

    }

finish:

    of_node_put(child);

    return tz;

free_tbps:

    for (i = i - 1; i >= 0; i--) {

        struct __thermal_bind_params *tbp = tz->tbps + i;

        int j;

        for (j = 0; j < tbp->count; j++)

            of_node_put(tbp->tcbp[j].cooling_device);

        kfree(tbp->tcbp);

    }

    kfree(tz->tbps);

free_trips:

    for (i = 0; i < tz->ntrips; i++)

        of_node_put(tz->trips[i].np);

    kfree(tz->trips);

    of_node_put(gchild);

free_tz:

    kfree(tz);

    of_node_put(child);

    return ERR_PTR(ret);

}

// 遍历解析trips下的字段

static int thermal_of_populate_trip(struct device_node *np,

                    struct thermal_trip *trip)

{

    int prop;

    int ret;

    // 解析temperature字段,触发温度值

    ret = of_property_read_u32(np, "temperature", &prop);

    if (ret < 0) {

        pr_err("missing temperature property\n");

        return ret;

    }

    trip->temperature = prop;

    // 解析hysteresis字段,下降温度值恢复状态

    ret = of_property_read_u32(np, "hysteresis", &prop);

    if (ret < 0) {

        pr_err("missing hysteresis property\n");

        return ret;

    }

    trip->hysteresis = prop;

    // 解析type字段,一般配置为passive,当温控发生后由governor控制

    ret = thermal_of_get_trip_type(np, &trip->type);

    if (ret < 0) {

        pr_err("wrong trip type property\n");

        return ret;

    }

    /* Required for cooling map matching */

    trip->np = np;

    of_node_get(np);

    return 0;

}

//解析cooling-maps下字段

static int thermal_of_populate_bind_params(struct device_node *np,

                       struct __thermal_bind_params *__tbp,

                       struct thermal_trip *trips,

                       int ntrips)

{

    struct of_phandle_args cooling_spec;

    struct __thermal_cooling_bind_param *__tcbp;

    struct device_node *trip;

    int ret, i, count;

    u32 prop;

    // 默认contribution字段,表示权重值,可选

    __tbp->usage = THERMAL_WEIGHT_DEFAULT;

    ret = of_property_read_u32(np, "contribution", &prop);

    if (ret == 0)

        __tbp->usage = prop;

    // 获取trip字段下phandle

    trip = of_parse_phandle(np, "trip", 0);

    if (!trip) {

        pr_err("missing trip property\n");

        return -ENODEV;

    }

    //匹配trips列表中的trip

    for (i = 0; i < ntrips; i++)

        if (trip == trips[i].np) {

            __tbp->trip_id = i;

            break;

        }

    if (i == ntrips) {

        ret = -ENODEV;

        goto end;

    }

    //获取cooling-devicephandle个数

    count = of_count_phandle_with_args(np, "cooling-device",

                       "#cooling-cells");

    if (count <= 0) {

        pr_err("Add a cooling_device property with at least one device\n");

        ret = -ENOENT;

        goto end;

    }

    __tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL);

    if (!__tcbp) {

        ret = -ENOMEM;

        goto end;

    }

    for (i = 0; i < count; i++) {

        //获取cooling-devicephandle参数

        ret = of_parse_phandle_with_args(np, "cooling-device",

                "#cooling-cells", i, &cooling_spec);

        if (ret < 0) {

            pr_err("Invalid cooling-device entry\n");

            goto free_tcbp;

        }

        __tcbp[i].cooling_device = cooling_spec.np;

        //参数个数必须大于等于2,写最小最大的范围值,代表可调整最小最大档位

        if (cooling_spec.args_count >= 2) { /* at least min and max */

            __tcbp[i].min = cooling_spec.args[0];

            __tcbp[i].max = cooling_spec.args[1];

        } else {

            pr_err("wrong reference to cooling device, missing limits\n");

        }

    }

    __tbp->tcbp = __tcbp;

    __tbp->count = count;

    goto end;

free_tcbp:

    for (i = i - 1; i >= 0; i--)

        of_node_put(__tcbp[i].cooling_device);

    kfree(__tcbp);

end:

    of_node_put(trip);

    return ret;

}

thermal_extra.drawio.svg

2.3 thermal governor

目前可配置默认的thermal governor策略

/* Default Thermal Governor */

#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)

#define DEFAULT_THERMAL_GOVERNOR       "step_wise"

#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE)

#define DEFAULT_THERMAL_GOVERNOR       "fair_share"

#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)

#define DEFAULT_THERMAL_GOVERNOR       "user_space"

#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR)

#define DEFAULT_THERMAL_GOVERNOR       "power_allocator"

#endif


Kconfig中配置thermal governor默认step_wise下降温度值恢复状态

config THERMAL_WRITABLE_TRIPS

    bool "Enable writable trip points"

    help

      This option allows the system integrator to choose whether

      trip temperatures can be changed from userspace. The

      writable trips need to be specified when setting up the

      thermal zone but the choice here takes precedence.

      Say 'Y' here if you would like to allow userspace tools to

      change trip temperatures.

choice

    prompt "Default Thermal governor"

    default THERMAL_DEFAULT_GOV_STEP_WISE

    help

      This option sets which thermal governor shall be loaded at

      startup. If in doubt, select 'step_wise'.

config THERMAL_DEFAULT_GOV_STEP_WISE

    bool "step_wise"

    select THERMAL_GOV_STEP_WISE

    help

      Use the step_wise governor as default. This throttles the

      devices one step at a time.

config THERMAL_DEFAULT_GOV_FAIR_SHARE

    bool "fair_share"

    select THERMAL_GOV_FAIR_SHARE

    help

      Use the fair_share governor as default. This throttles the

      devices based on their 'contribution' to a zone. The

      contribution should be provided through platform data.

config THERMAL_DEFAULT_GOV_USER_SPACE

    bool "user_space"

    select THERMAL_GOV_USER_SPACE

    help

      Select this if you want to let the user space manage the

      platform thermals.

config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR

    bool "power_allocator"

    depends on THERMAL_GOV_POWER_ALLOCATOR

    help

      Select this if you want to control temperature based on

      system and device power allocation. This governor can only

      operate on cooling devices that implement the power API.

endchoice

2.3.1 step_wise governor

step_wise governor 是每个轮询周期逐级提高冷却状态,是一种相对温和的温控策略。根据cur_state、温升趋势trend、是否throttle去计算cooling_devicetarget_state,从而达到控制cooling_device来控制温升。

对于cooling state的计算策略:

  1. 1.     当温升趋势为上升且发生throttle,使用更高一级的cooling state

  2. 2.     当温升趋势为下降

若发生throttle,不改变cooling state

若解除throttle,使用更低一级的cooling state

  1. 1.     当达到最高温线且发生throttle,使用最高级的 cooling state

  2. 2.     当达到最低温线且发生throttle,使用最低级的cooling state

注意: cooling state 取值范围在[instance->lower,instance->upper],若cur_state < instance->lowertarget_state则取值为THERMAL_NO_TARGET

代码框架图

thermal.h源代码 | v5.10.43> 定义了温升趋势trend

enum thermal_trend {THERMAL_TREND_STABLE, /* 稳定temperature is stable */ THERMAL_TREND_RAISING,/* 上升 temperature is raising */THERMAL_TREND_DROPPING, /* 下降temperature is dropping */THERMAL_TREND_RAISE_FULL, /* 最高温线apply highest cooling action */THERMAL_TREND_DROP_FULL, /* 最低温线apply lowest cooling action */};

gov_step_wise.c源代码 | v5.10.43>

static int step_wise_throttle(struct thermal_zone_device *tz, int trip){  struct thermal_instance*instance; // 更新trip、trend和计算cooling_device的target_statethermal_zone_trip_update(tz, trip); if (tz->forced_passive)thermal_zone_trip_update(tz, THERMAL_TRIPS_NONE);mutex_lock(&tz->lock); // 遍历更新cooling_device的statelist_for_each_entry(instance, &tz->thermal_instances, tz_node)thermal_cdev_update(instance->cdev);mutex_unlock(&tz->lock); return 0;}// 更新trip、trend和计算cooling_device的target_statestatic void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip){ int trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; struct thermal_instance *instance;  bool throttle = false; int old_target; // 获取trip的类型和温度 if (trip == THERMAL_TRIPS_NONE) {trip_temp = tz->forced_passive;trip_type = THERMAL_TRIPS_NONE; } else {tz->ops->get_trip_temp(tz, trip, &trip_temp);tz->ops->get_trip_type(tz, trip, &trip_type); } // 获取温升趋势,稳定(THERMAL_TREND_STABLE),上升(THERMAL_TREND_RAISING),下降(THERMAL_TREND_DROPPING) trend =get_tz_trend(tz, trip); // 当zone温度大于trip_temp,则需要进行触发 if (tz->temperature >= trip_temp) {throttle = true;trace_thermal_zone_trip(tz, trip, trip_type); }dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n",trip, trip_type, trip_temp, trend, throttle);mutex_lock(&tz->lock);list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if (instance->trip != trip)continue;old_target = instance->target;     // 计算cooling_device的target_stateinstance->target = get_target_state(instance, trend, throttle);dev_dbg(&instance->cdev->device, "old_target=%d,target=%d\n", old_target, (int)instance->target); if (instance->initialized && old_target== instance->target)continue; /* Activate a passive thermal instance */ if (old_target == THERMAL_NO_TARGET &&instance->target != THERMAL_NO_TARGET)update_passive_instance(tz, trip_type, 1); /* Deactivate a passive thermal instance */ else if (old_target != THERMAL_NO_TARGET &&instance->target == THERMAL_NO_TARGET)update_passive_instance(tz, trip_type, -1);instance->initialized = true;mutex_lock(&instance->cdev->lock);instance->cdev->updated = false; /* cdev needs update */mutex_unlock(&instance->cdev->lock); } mutex_unlock(&tz->lock);}// 计算cooling_device的target_statestatic unsigned long get_target_state(structthermal_instance *instance,enum thermal_trend trend,bool throttle){ struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; unsigned long next_target; /* * Wekeep this instance the way it is by default. *Otherwise, we use the current state of the * cdevin use to determine the next_target. */ //获取coolingdevice的当前statecdev->ops->get_cur_state(cdev, &cur_state);next_target = instance->target;dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state); //如果没有初始化 if (!instance->initialized) { if (throttle) {// next_target初始值为(cur_state + 1),取值范围在[instance->lower,instance->upper]next_target = (cur_state + 1) >= instance->upper ? instance->upper : ((cur_state + 1) < instance->lower ? instance->lower :(cur_state + 1)); } else {next_target = THERMAL_NO_TARGET; } return next_target; } switch (trend) { // 当温升趋势为上升且发生throttle,使用更高一级的cooling state // 取值范围在[instance->lower,instance->upper]   case THERMAL_TREND_RAISING: if (throttle) {next_target = cur_state < instance->upper ? (cur_state + 1) : instance->upper;if (next_target <instance->lower)next_target = instance->lower; } break; // 当达到最高温线且发生throttle,使用最高级的cooling state,将温度快速降下来 case THERMAL_TREND_RAISE_FULL: if (throttle)next_target = instance->upper; break; // 当温升趋势为下降 // 发生throttle,不改变coolingstate // 解除throttle,使用低一级的coolingstate case THERMAL_TREND_DROPPING: if (cur_state <= instance->lower) {if (!throttle)next_target = THERMAL_NO_TARGET; } else {   if (!throttle) {next_target = cur_state - 1;if (next_target >instance->upper) next_target =instance->upper;} } break; // 当达到最低温线且发生throttle,使用最低级的coolingstate case THERMAL_TREND_DROP_FULL: if (cur_state == instance->lower) {if (!throttle)next_target = THERMAL_NO_TARGET; } elsenext_target = instance->lower; break; default: break; } return next_target;}

thermal_cdev_update函数 源代码 | v5.10.43>

// 更新cooling device的statevoid thermal_cdev_update(structthermal_cooling_device *cdev){ struct thermal_instance *instance; unsigned long target = 0;mutex_lock(&cdev->lock); /* cooling device is updated*/ if (cdev->updated) {mutex_unlock(&cdev->lock); return; } /* Make sure cdev enters the deepest cooling state */list_for_each_entry(instance, &cdev->thermal_instances,cdev_node) {dev_dbg(&cdev->device, "zone%d->target=%lu\n",           instance->tz->id,instance->target); if (instance->target == THERMAL_NO_TARGET)continue; if (instance->target > target)target = instance->target; } // 设置coolingdevice的statethermal_cdev_set_cur_state(cdev, target);cdev->updated = true;mutex_unlock(&cdev->lock);trace_cdev_update(cdev, target);dev_dbg(&cdev->device, "set to state%lu\n", target);}


2.3.2 power_allocator governor

IPA(Intelligent PowerAllocation)源代码 | v5.10.43>是由ARM开发的符合linux内核thermalframeworkgovernor,代码中的名字为power_allocator,旨在满足温控效果的条件下最大化性能。IPAIntelligent Power Allocator)模型的核心是利用 PID 控制器,ThermalZone 的温度作为输入,可分配功耗值作为输出,调节 Allocator 的频率和电压值。

代码框架图

功耗均衡原理图

gov_power_allocator.c源代码 | v5.10.43>

static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)

{

    int ret;

    int switch_on_temp, control_temp;

    struct power_allocator_params *params = tz->governor_data;

    /*

     * We get called for every trip point but we only need to do

     * our calculations once

     */

    if (trip != params->trip_max_desired_temperature)

        return 0;

    // 获取trip温度,作为switch_on触发温度

    ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,

                     &switch_on_temp);

    if (!ret && (tz->temperature < switch_on_temp)) {

        tz->passive = 0;

        reset_pid_controller(params);

        allow_maximum_power(tz);

        return 0;

    }

    tz->passive = 1;

    // 获取trip温度,作为目标的温度值

    ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,

                &control_temp);

    if (ret) {

        dev_warn(&tz->device,

             "Failed to get the maximum desired temperature: %d\n",

             ret);

        return ret;

    }

    // IPA主要的算法逻辑

    return allocate_power(tz, control_temp);

}

// IPA主要的算法逻辑

static int allocate_power(struct thermal_zone_device *tz,

              int control_temp)

{

    struct thermal_instance *instance;

    struct power_allocator_params *params = tz->governor_data;

    u32 *req_power, *max_power, *granted_power, *extra_actor_power;

    u32 *weighted_req_power;

    u32 total_req_power, max_allocatable_power, total_weighted_req_power;

    u32 total_granted_power, power_range;

    int i, num_actors, total_weight, ret = 0;

    int trip_max_desired_temperature = params->trip_max_desired_temperature;

    mutex_lock(&tz->lock);

    num_actors = 0;

    total_weight = 0;

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {

        if ((instance->trip == trip_max_desired_temperature) &&

            cdev_is_power_actor(instance->cdev)) {

            num_actors++;

            total_weight += instance->weight;

        }

    }

    if (!num_actors) {

        ret = -ENODEV;

        goto unlock;

    }

    /*

     * We need to allocate five arrays of the same size:

     * req_power, max_power, granted_power, extra_actor_power and

     * weighted_req_power.  They are going to be needed until this

     * function returns.  Allocate them all in one go to simplify

     * the allocation and deallocation logic.

     */

    BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));

    BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));

    BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));

    BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power));

    req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL);

    if (!req_power) {

        ret = -ENOMEM;

        goto unlock;

    }

    max_power = &req_power[num_actors];

    granted_power = &req_power[2 * num_actors];

    extra_actor_power = &req_power[3 * num_actors];

    weighted_req_power = &req_power[4 * num_actors];

    i = 0;

    total_weighted_req_power = 0;

    total_req_power = 0;

    max_allocatable_power = 0;

    // 遍历所有的cooling device

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {

        int weight;

        struct thermal_cooling_device *cdev = instance->cdev;

        if (instance->trip != trip_max_desired_temperature)

            continue;

       

        // cooling deviceops的函数指针get_requested_powerstate2powerpower2state是否存在

        if (!cdev_is_power_actor(cdev))

            continue;

        // 获取cooling device的功耗需求requested power

        if (cdev->ops->get_requested_power(cdev, &req_power[i]))

            continue;

        if (!total_weight)

            weight = 1 << FRAC_BITS;

        else

            weight = instance->weight;

        //获取cooling device的权重功耗,weight*requested_power

        weighted_req_power[i] = frac_to_int(weight * req_power[i]);

        // 获取cooling device可以消耗的最大功率

        if (power_actor_get_max_power(cdev, &max_power[i]))

            continue;

       

    // 总的cdev需要的功耗

        total_req_power += req_power[i];

        // 总的最大可分配的功耗

        max_allocatable_power += max_power[i];

        // 总的cdev需要的权重功耗

        total_weighted_req_power += weighted_req_power[i];

        i++;

    }

    // PID控制算法,power_range是当前温度下可配置的最大功耗值

    power_range = pid_controller(tz, control_temp, max_allocatable_power);

    // 分摊计算出当前温度下每个cooling device的最终的total granted_power

    // 公式:total granted_power = granted_power + extra_granted_power

    divvy_up_power(weighted_req_power, max_power, num_actors,

               total_weighted_req_power, power_range, granted_power,

               extra_actor_power);

    total_granted_power = 0;

    i = 0;

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {

        if (instance->trip != trip_max_desired_temperature)

            continue;

        if (!cdev_is_power_actor(instance->cdev))

            continue;

       

    // cooling device设置granted_power

        power_actor_set_power(instance->cdev, instance,

                      granted_power[i]);

        total_granted_power += granted_power[i];

        i++;

    }

    trace_thermal_power_allocator(tz, req_power, total_req_power,

                      granted_power, total_granted_power,

                      num_actors, power_range,

                      max_allocatable_power, tz->temperature,

                      control_temp - tz->temperature);

    kfree(req_power);

unlock:

    mutex_unlock(&tz->lock);

    return ret;

}

// 分摊计算出当前温度下cooling device的最终的total_granted_power

static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,

               u32 total_req_power, u32 power_range,

               u32 *granted_power, u32 *extra_actor_power)

{

    u32 extra_power, capped_extra_power;

    int i;

    /*

     * Prevent division by 0 if none of the actors request power.

     */

    if (!total_req_power)

        total_req_power = 1;

    capped_extra_power = 0;

    extra_power = 0;

    for (i = 0; i < num_actors; i++) {

        u64 req_range = (u64)req_power[i] * power_range;

        // granted_power(cooling device被分配的功耗),

        // total_req_power值为total_weighted_req_power

        // req_power值为weighted_req_power

        // power_range:power_range是当前温度下可配置的最大功耗值

    //公式:四舍五入power_range * (weighted_req_power[i] / total_weighted_req_power)

        granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range,

                             total_req_power);

        // device granted_power不能大于max_power

        if (granted_power[i] > max_power[i]) {

            // 额外需要的功耗,累加分配过多的功耗

            extra_power += granted_power[i] - max_power[i];

            granted_power[i] = max_power[i];

        }

        // 计算分配过多的功耗,再分配的权重

    // 公式:(max_power[i] - granted_power[i])/capped_extra_power

        extra_actor_power[i] = max_power[i] - granted_power[i];

        capped_extra_power += extra_actor_power[i];

    }

    if (!extra_power)

        return;

    /*

     * Re-divvy the reclaimed extra among actors based on

     * how far they are from the max

     */

    // 重新分配额外功耗

    // 假设granted_extra_power

    // 公式:granted_extra_power[i] = extra_power * (max_power[i] - granted_power[i])/capped_extra_power

    // cooling device总的分配功耗:granted_power[i] += granted_extra_power[i]

    // extra_power最大取值为capped_extra_power

    extra_power = min(extra_power, capped_extra_power);

    if (capped_extra_power > 0)

        for (i = 0; i < num_actors; i++)

            granted_power[i] += (extra_actor_power[i] *

                    extra_power) / capped_extra_power;

}

// pid控制算法

static u32 pid_controller(struct thermal_zone_device *tz,

              int control_temp,

              u32 max_allocatable_power)

{

    s64 p, i, d, power_range;

    s32 err, max_power_frac;

    u32 sustainable_power;

    struct power_allocator_params *params = tz->governor_data;

    max_power_frac = int_to_frac(max_allocatable_power);

    // sustainable_power:保证所有cooling device的正常运行的最小功耗值。(state最大)

    if (tz->tzp->sustainable_power) {

        //如果设置了,按照设置的来

        sustainable_power = tz->tzp->sustainable_power;

    } else {

        // 默认sustainable_power,所有cooling device在最大state下的最小功耗值进行累加

        sustainable_power = estimate_sustainable_power(tz);

        // 默认pid的参数值,K_puK_poK_pi

        estimate_pid_constants(tz, sustainable_power,

                       params->trip_switch_on, control_temp,

                       true);

    }

    // 当前温度和目标温度的差值

    err = control_temp - tz->temperature;

    err = int_to_frac(err);

    /*

     * 计算比例项

     * 公式:K_p*err(目标温度和当前温度的差值)

     * 当前温度<=目标温度 k_pu = int_to_frac(2*sustainable_power / (control_temp - switch_on_temp))

     * 当前温度>目标温度 k_po = int_to_frac(sustainable_power / (control_temp - switch_on_temp))

     */

    p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);

    /*

     * 计算积分项

     * 公式:K_i*err_integral(差值的累加)

     * 默认:K_i = int_to_frac(10 / 1000)

     * if the error is less than cut off allow integration (but

     * the integral is limited to max power)

     */

    i = mul_frac(tz->tzp->k_i, params->err_integral);

    // integral_cutoff默认为0

    // err < 0,这次的err不进行累加

    if (err < int_to_frac(tz->tzp->integral_cutoff)) {

        s64 i_next = i + mul_frac(tz->tzp->k_i, err);

        // (K_i * err_integral)必须小于max_power_frac

        if (abs(i_next) < max_power_frac) {

            i = i_next;

            params->err_integral += err;

        }

    }

    /*

     * 计算微分项

     * 公式:K_d*(err - prev_err) / passive_delay

     * 默认:K_d = 0

     * We do err - prev_err, so with a positive k_d, a decreasing

     * error (i.e. driving closer to the line) results in less

     * power being applied, slowing down the controller)

     */

    d = mul_frac(tz->tzp->k_d, err - params->prev_err);

    d = div_frac(d, tz->passive_delay);

    params->prev_err = err;

    power_range = p + i + d;

    //当前温度下允许的最大功耗值 = sustainable_power + frac_to_int(p + i + d)

    power_range = sustainable_power + frac_to_int(power_range);

    // power_range 取值在[0,max_allocatable_power]

    power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power);

    trace_thermal_power_allocator_pid(tz, frac_to_int(err),

                      frac_to_int(params->err_integral),

                      frac_to_int(p), frac_to_int(i),

                      frac_to_int(d), power_range);

    return power_range;

}

// 所有cooling device在最大state下的最小功耗值进行累加

static u32 estimate_sustainable_power(struct thermal_zone_device *tz)

{

    u32 sustainable_power = 0;

    struct thermal_instance *instance;

    struct power_allocator_params *params = tz->governor_data;

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {

        struct thermal_cooling_device *cdev = instance->cdev;

        u32 min_power;

        if (instance->trip != params->trip_max_desired_temperature)

            continue;

        // 获取cdev的最小功耗值

        if (power_actor_get_min_power(cdev, &min_power))

            continue;

        // 累加cooling device的最小功耗值

        sustainable_power += min_power;

    }

    return sustainable_power;

}

// 默认pid的参数值

static void estimate_pid_constants(struct thermal_zone_device *tz,

                   u32 sustainable_power, int trip_switch_on,

                   int control_temp, bool force)

{

    int ret;

    int switch_on_temp;

    u32 temperature_threshold;

    // 获取switch_on_temp,触发算法开关

    ret = tz->ops->get_trip_temp(tz, trip_switch_on, &switch_on_temp);

    if (ret)

        switch_on_temp = 0;

    // 目标温度和触发温度的差值

    temperature_threshold = control_temp - switch_on_temp;

    /*

     * estimate_pid_constants() tries to find appropriate default

     * values for thermal zones that don't provide them. If a

     * system integrator has configured a thermal zone with two

     * passive trip points at the same temperature, that person

     * hasn't put any effort to set up the thermal zone properly

     * so just give up.

     */

    if (!temperature_threshold)

        return;

    // Kp的取值分阶段k_puk_point_to_frac只是为了避免小数的影响,先左移动,后在mul_frac中右移

    // k_po = int_to_frac(sustainable_power / (control_temp - switch_on_temp))

    if (!tz->tzp->k_po || force)

        tz->tzp->k_po = int_to_frac(sustainable_power) /

            temperature_threshold;

    // k_pu = int_to_frac(2*sustainable_power / (control_temp - switch_on_temp))

    if (!tz->tzp->k_pu || force)

        tz->tzp->k_pu = int_to_frac(2 * sustainable_power) /

            temperature_threshold;

    // k_i = int_to_frac(10 / 1000)

    if (!tz->tzp->k_i || force)

        tz->tzp->k_i = int_to_frac(10) / 1000;

    /*

     * The default for k_d and integral_cutoff is 0, so we can

     * leave them as they are.

     */

    // 默认k_d = 0 , integral_cutoff = 0

}

power_actor_get_max_power函数power_actor_get_max_power | v5.10.43> ,获取cooling device最大功耗值

int power_actor_get_max_power(struct thermal_cooling_device *cdev,

                  u32 *max_power)

{

    if (!cdev_is_power_actor(cdev))

        return -EINVAL;

    // cooling devicestate转换为power,当power = max_powerstate0

    return cdev->ops->state2power(cdev, 0, max_power);

}

例如,cooling devicecpu,冷却措施是调节cpu frequencycpufreq_cooling.c 函数cpufreq_state2power | v5.10.43>

//  cpu cdev state转换为功耗

static int cpufreq_state2power(struct thermal_cooling_device *cdev,

                   unsigned long state, u32 *power)

{

    unsigned int freq, num_cpus, idx;

    struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;

    /* Request state should be less than max_level */

    if (state > cpufreq_cdev->max_level)

        return -EINVAL;

    //获取同一个簇中的cpu数量

    num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);

    idx = cpufreq_cdev->max_level - state;

    // 获取相应的state对应的CPU频率

    freq = cpufreq_cdev->em->table[idx].frequency;

    // 获取同一簇的cpu频率对应的功耗值,查表

    *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;

    return 0;

}

// 获取CPU freqrequested_power(当前cpu load需要的功耗值)

static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,

                       u32 *power)

{

    unsigned long freq;

    int i = 0, cpu;

    u32 total_load = 0;

    struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;

    struct cpufreq_policy *policy = cpufreq_cdev->policy;

    u32 *load_cpu = NULL;

    // 获取当前的CPU频率

    freq = cpufreq_quick_get(policy->cpu);

    if (trace_thermal_power_cpu_get_power_enabled()) {

        u32 ncpus = cpumask_weight(policy->related_cpus);

        load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);

    }

    // 遍历获取cpu的负载

    for_each_cpu(cpu, policy->related_cpus) {

        u32 load;

        if (cpu_online(cpu))

            load = get_load(cpufreq_cdev, cpu, i);

        else

            load = 0;

        total_load += load;

        if (load_cpu)

            load_cpu[i] = load;

        i++;

    }

    //cpu总负载

    cpufreq_cdev->last_load = total_load;

    // 获取cpu动态功耗值

    // 根据查找表,cpu当前频率对应的功耗值

    // 然后raw_cpu_power * total_load / 100

    *power = get_dynamic_power(cpufreq_cdev, freq);

    if (load_cpu) {

        trace_thermal_power_cpu_get_power(policy->related_cpus, freq,

                          load_cpu, i, *power);

        kfree(load_cpu);

    }

    return 0;

}


2.3.3 bang_bang governor

  • throttle发生,打开风扇

  • throttle解除,关闭风扇。

2.3.4 user_space governor

user_space governor 是通过 uevent 将温区当前温度,温控触发点等信息上报到用户空间,由用户空间软件制定温控的策略。

2.4 绑定sensor

bcl_soc为例,这里是创建一个platform_driverplatform_driver必须实现proberemove函数,bcl_soc是不需要通过polling(轮询)的方式去检查是否触发,polling-delay是轮询的周期。它是通过监听系统电量的变化,去回调battery_supply_callback函数,去唤醒队列中的bcl_evaluate_soc函数,通过bcl_evaluate_soc函数进行获取当前的温度和处理符合触发条件的trips

bcl_soc:bcl-soc {compatible = "qcom,msm-bcl-soc"; #thermal-sensor-cells = <0>;};

bcl_soc.c源代码 | android-12.1.0_r0.24>

#define pr_fmt(fmt) "%s:%s " fmt,KBUILD_MODNAME, __func__#include #include #include #include #include #include #include #include #include #include #include "../thermal_core.h"#define BCL_DRIVER_NAME "bcl_soc_peripheral"struct bcl_device { struct notifier_block            psy_nb; struct work_struct            soc_eval_work; long int                trip_temp; int                    trip_val; struct mutex                state_trans_lock; bool                    irq_enabled; struct thermal_zone_device        *tz_dev; struct thermal_zone_of_device_ops    ops;};static struct bcl_device *bcl_perph;// 绑定trip_temp接口,设置触发值trip_tempstatic int bcl_set_soc(void *data, int low, int high){ if (low == bcl_perph->trip_temp) return 0;mutex_lock(&bcl_perph->state_trans_lock);pr_debug("low soc threshold:%d\n", low); // 设置trip_tempbcl_perph->trip_temp = low; if (low == INT_MIN) {bcl_perph->irq_enabled = false; goto unlock_and_exit; }bcl_perph->irq_enabled = true;schedule_work(&bcl_perph->soc_eval_work);unlock_and_exit:mutex_unlock(&bcl_perph->state_trans_lock); return 0;}// 绑定get_temp接口,获取电量值static int bcl_read_soc(void *data, int *val){ static struct power_supply*batt_psy; union power_supply_propval ret = {0,}; int err = 0; *val = 100; if (!batt_psy) batt_psy= power_supply_get_by_name("battery"); if (batt_psy) { // 获取电量 err= power_supply_get_property(batt_psy,POWER_SUPPLY_PROP_CAPACITY, &ret); if (err) {pr_err("battery percentage read error:%d\n",err);return err; }*val = ret.intval; }pr_debug("soc:%d\n", *val); return err;}// 获取当前温度和处理thermal zone tripstatic void bcl_evaluate_soc(struct work_struct *work){ int battery_percentage; // 获取电量 if (bcl_read_soc(NULL, &battery_percentage)) return;mutex_lock(&bcl_perph->state_trans_lock); if (!bcl_perph->irq_enabled) goto eval_exit; if (battery_percentage >bcl_perph->trip_temp) goto eval_exit; // 当前电量值bcl_perph->trip_val = battery_percentage;mutex_unlock(&bcl_perph->state_trans_lock); // 处理thermalzone trip,调用的是thermalcore中的handle_thermal_tripof_thermal_handle_trip(bcl_perph->tz_dev); return;eval_exit:mutex_unlock(&bcl_perph->state_trans_lock);}// 电量变化回调battery_supply_callback函数,去唤醒队列中的bcl_evaluate_soc函数static int battery_supply_callback(struct notifier_block *nb,unsigned long event, void *data){ struct power_supply *psy = data; if (strcmp(psy->desc->name, "battery")) return NOTIFY_OK;schedule_work(&bcl_perph->soc_eval_work); return NOTIFY_OK;}static int bcl_soc_remove(struct platform_device *pdev){power_supply_unreg_notifier(&bcl_perph->psy_nb);flush_work(&bcl_perph->soc_eval_work); if (bcl_perph->tz_dev)thermal_zone_of_sensor_unregister(&pdev->dev,bcl_perph->tz_dev); return 0;}static int bcl_soc_probe(struct platform_device *pdev){ int ret = 0; //申请内存空间, 当设备被拆卸或者驱动程序卸载时,内存会被自动释放bcl_perph = devm_kzalloc(&pdev->dev, sizeof(*bcl_perph), GFP_KERNEL); if (!bcl_perph) return -ENOMEM;mutex_init(&bcl_perph->state_trans_lock); // 指向get_temp、set_trips函数bcl_perph->ops.get_temp = bcl_read_soc;bcl_perph->ops.set_trips = bcl_set_soc; // 定义初始化工作队列INIT_WORK(&bcl_perph->soc_eval_work, bcl_evaluate_soc); // 回调函数 bcl_perph->psy_nb.notifier_call= battery_supply_callback; //注册监听接口,系统任何PSY设备的状态发生改变,并调用了power_supply_changed接口,powersupply core就通知notifier的监听者。 ret =power_supply_reg_notifier(&bcl_perph->psy_nb); if (ret < 0) {pr_err("soc notifier registration error. defer.err:%d\n",ret); ret= -EPROBE_DEFER; goto bcl_soc_probe_exit; } // 向thermalzone注册sensorbcl_perph->tz_dev =thermal_zone_of_sensor_register(&pdev->dev,0, bcl_perph,&bcl_perph->ops); if (IS_ERR(bcl_perph->tz_dev)) {pr_err("soc TZ register failed. err:%ld\n",PTR_ERR(bcl_perph->tz_dev)); ret= PTR_ERR(bcl_perph->tz_dev);bcl_perph->tz_dev = NULL; goto bcl_soc_probe_exit; }thermal_zone_device_update(bcl_perph->tz_dev, THERMAL_DEVICE_UP); // 将soc_eval_work添加到默认的工作队列schedule_work(&bcl_perph->soc_eval_work); // 设置driverdata的结构体是bcl_perphdev_set_drvdata(&pdev->dev, bcl_perph); return 0;bcl_soc_probe_exit:bcl_soc_remove(pdev); return ret;}//在dtsi中匹配.compatible= "qcom,msm-bcl-soc"的sensor,可以多个static const struct of_device_idbcl_match[]= { {.compatible = "qcom,msm-bcl-soc", }, {},};static struct platform_driver bcl_driver= {.probe  = bcl_soc_probe, .remove= bcl_soc_remove, .driver= {.name           = BCL_DRIVER_NAME,.owner          = THIS_MODULE,.of_match_table = bcl_match, },};builtin_platform_driver(bcl_driver);提供给sensor driver去调用的API接口 .10.43>// 向thermal zone注册sensor,通过data传入sensor_datastruct thermal_zone_device *thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void*data,const structthermal_zone_of_device_ops *ops){ struct device_node *np, *child, *sensor_np; struct thermal_zone_device *tzd = ERR_PTR(-ENODEV); np =of_find_node_by_name(NULL, "thermal-zones"); if (!np) return ERR_PTR(-ENODEV); if (!dev || !dev->of_node) {of_node_put(np); return ERR_PTR(-ENODEV); } sensor_np= of_node_get(dev->of_node);for_each_available_child_of_node(np, child) { int ret, id; // //解析dtsi中thermal-sensors节点 ret= thermal_zone_of_get_sensor_id(child, sensor_np, &id); if (ret)continue; if (id == sensor_id) {// 在thermal zone中绑定sensortzd = thermal_zone_of_add_sensor(child, sensor_np, data, ops);if (!IS_ERR(tzd))thermal_zone_device_enable(tzd);of_node_put(child);goto exit; } }exit:of_node_put(sensor_np);of_node_put(np); return tzd;}/***sensor API   ***/// 在thermal zone中绑定sensorstatic structthermal_zone_device *thermal_zone_of_add_sensor(struct device_node *zone,struct device_node *sensor, void *data,const structthermal_zone_of_device_ops *ops){ struct thermal_zone_device *tzd; struct __thermal_zone *tz; // 获取当前的thermalzone tzd =thermal_zone_get_zone_by_name(zone->name); if (IS_ERR(tzd)) return ERR_PTR(-EPROBE_DEFER); tz =tzd->devdata; if (!ops) return ERR_PTR(-EINVAL);mutex_lock(&tzd->lock); // 绑定ops tz->ops= ops; // 绑定sensor_datatz->sensor_data = data; // 绑定sensor中实现的get_temp、get_trendtzd->ops->get_temp = of_thermal_get_temp;tzd->ops->get_trend = of_thermal_get_trend; /* * Thethermal zone core will calculate the window if they have set the *optional set_trips pointer. */ if (ops->set_trips)tzd->ops->set_trips = of_thermal_set_trips; if (ops->set_emul_temp)tzd->ops->set_emul_temp = of_thermal_set_emul_temp;mutex_unlock(&tzd->lock); return tzd;}

加@小马微信进群与作者交流

Linux阅码场 专业的Linux技术社区和Linux操作系统学习平台,内容涉及Linux内核,Linux内存管理,Linux进程管理,Linux文件系统和IO,Linux性能调优,Linux设备驱动以及Linux虚拟化和云计算等各方各面.
评论
  • 铁氧体芯片是一种基于铁氧体磁性材料制成的芯片,在通信、传感器、储能等领域有着广泛的应用。铁氧体磁性材料能够通过外加磁场调控其导电性质和反射性质,因此在信号处理和传感器技术方面有着独特的优势。以下是对半导体划片机在铁氧体划切领域应用的详细阐述: 一、半导体划片机的工作原理与特点半导体划片机是一种使用刀片或通过激光等方式高精度切割被加工物的装置,是半导体后道封测中晶圆切割和WLP切割环节的关键设备。它结合了水气电、空气静压高速主轴、精密机械传动、传感器及自动化控制等先进技术,具有高精度、高
    博捷芯划片机 2024-12-12 09:16 87浏览
  • 一、SAE J1939协议概述SAE J1939协议是由美国汽车工程师协会(SAE,Society of Automotive Engineers)定义的一种用于重型车辆和工业设备中的通信协议,主要应用于车辆和设备之间的实时数据交换。J1939基于CAN(Controller Area Network)总线技术,使用29bit的扩展标识符和扩展数据帧,CAN通信速率为250Kbps,用于车载电子控制单元(ECU)之间的通信和控制。小北同学在之前也对J1939协议做过扫盲科普【科普系列】SAE J
    北汇信息 2024-12-11 15:45 115浏览
  • 天问Block和Mixly是两个不同的编程工具,分别在单片机开发和教育编程领域有各自的应用。以下是对它们的详细比较: 基本定义 天问Block:天问Block是一个基于区块链技术的数字身份验证和数据交换平台。它的目标是为用户提供一个安全、去中心化、可信任的数字身份验证和数据交换解决方案。 Mixly:Mixly是一款由北京师范大学教育学部创客教育实验室开发的图形化编程软件,旨在为初学者提供一个易于学习和使用的Arduino编程环境。 主要功能 天问Block:支持STC全系列8位单片机,32位
    丙丁先生 2024-12-11 13:15 71浏览
  • 应用环境与极具挑战性的测试需求在服务器制造领域里,系统整合测试(System Integration Test;SIT)是确保产品质量和性能的关键步骤。随着服务器系统的复杂性不断提升,包括:多种硬件组件、操作系统、虚拟化平台以及各种应用程序和服务的整合,服务器制造商面临着更有挑战性的测试需求。这些挑战主要体现在以下五个方面:1. 硬件和软件的高度整合:现代服务器通常包括多个处理器、内存模块、储存设备和网络接口。这些硬件组件必须与操作系统及应用软件无缝整合。SIT测试可以帮助制造商确保这些不同组件
    百佳泰测试实验室 2024-12-12 17:45 74浏览
  • 在智能化技术快速发展当下,图像数据的采集与处理逐渐成为自动驾驶、工业等领域的一项关键技术。高质量的图像数据采集与算法集成测试都是确保系统性能和可靠性的关键。随着技术的不断进步,对于图像数据的采集、处理和分析的需求日益增长,这不仅要求我们拥有高性能的相机硬件,还要求我们能够高效地集成和测试各种算法。我们探索了一种多源相机数据采集与算法集成测试方案,能够满足不同应用场景下对图像采集和算法测试的多样化需求,确保数据的准确性和算法的有效性。一、相机组成相机一般由镜头(Lens),图像传感器(Image
    康谋 2024-12-12 09:45 83浏览
  • 习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习笔记&记录学习习笔记&记学习学习笔记&记录学习学习笔记&记录学习习笔记&记录学习学习笔记&记录学习学习笔记记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记
    youyeye 2024-12-11 17:58 88浏览
  • RK3506 是瑞芯微推出的MPU产品,芯片制程为22nm,定位于轻量级、低成本解决方案。该MPU具有低功耗、外设接口丰富、实时性高的特点,适合用多种工商业场景。本文将基于RK3506的设计特点,为大家分析其应用场景。RK3506核心板主要分为三个型号,各型号间的区别如下图:​图 1  RK3506核心板处理器型号场景1:显示HMIRK3506核心板显示接口支持RGB、MIPI、QSPI输出,且支持2D图形加速,轻松运行QT、LVGL等GUI,最快3S内开
    万象奥科 2024-12-11 15:42 88浏览
  • 本文介绍瑞芯微RK3588主板/开发板Android12系统下,APK签名文件生成方法。触觉智能EVB3588开发板演示,搭载了瑞芯微RK3588芯片,该开发板是核心板加底板设计,音视频接口、通信接口等各类接口一应俱全,可帮助企业提高产品开发效率,缩短上市时间,降低成本和设计风险。工具准备下载Keytool-ImportKeyPair工具在源码:build/target/product/security/系统初始签名文件目录中,将以下三个文件拷贝出来:platform.pem;platform.
    Industio_触觉智能 2024-12-12 10:27 81浏览
  • 时源芯微——RE超标整机定位与解决详细流程一、 初步测量与问题确认使用专业的电磁辐射测量设备,对整机的辐射发射进行精确测量。确认是否存在RE超标问题,并记录超标频段和幅度。二、电缆检查与处理若存在信号电缆:步骤一:拔掉所有信号电缆,仅保留电源线,再次测量整机的辐射发射。若测量合格:判定问题出在信号电缆上,可能是电缆的共模电流导致。逐一连接信号电缆,每次连接后测量,定位具体哪根电缆或接口导致超标。对问题电缆进行处理,如加共模扼流圈、滤波器,或优化电缆布局和屏蔽。重新连接所有电缆,再次测量
    时源芯微 2024-12-11 17:11 117浏览
  • 首先在gitee上打个广告:ad5d2f3b647444a88b6f7f9555fd681f.mp4 · 丙丁先生/香河英茂工作室中国 - Gitee.com丙丁先生 (mr-bingding) - Gitee.com2024年对我来说是充满挑战和机遇的一年。在这一年里,我不仅进行了多个开发板的测评,还尝试了多种不同的项目和技术。今天,我想分享一下这一年的故事,希望能给大家带来一些启发和乐趣。 年初的时候,我开始对各种开发板进行测评。从STM32WBA55CG到瑞萨、平头哥和平海的开发板,我都
    丙丁先生 2024-12-11 20:14 78浏览
  • 习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习笔记&记录学习习笔记&记学习学习笔记&记录学习学习笔记&记录学习习笔记&记录学习学习笔记&记录学习学习笔记记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记录学习学习笔记&记
    youyeye 2024-12-12 10:13 50浏览
  • 全球智能电视时代来临这年头若是消费者想随意地从各个通路中选购电视时,不难发现目前市场上的产品都已是具有智能联网功能的智能电视了,可以宣告智能电视的普及时代已到临!Google从2021年开始大力推广Google TV(即原Android TV的升级版),其他各大品牌商也都跟进推出搭载Google TV操作系统的机种,除了Google TV外,LG、Samsung、Panasonic等大厂牌也开发出自家的智能电视平台,可以看出各家业者都一致地看好这块大饼。智能电视的Wi-Fi连线怎么消失了?智能电
    百佳泰测试实验室 2024-12-12 17:33 67浏览
我要评论
0
点击右上角,分享到朋友圈 我知道啦
请使用浏览器分享功能 我知道啦