参考文章:

https://www.jb51.net/article/133579.htm

https://blog.csdn.net/flaoter/article/details/77509553

https://www.cnblogs.com/arnoldlu/p/7078204.html 中时间子系统相关系列blog,讲的比较详细。

主要文件所在目录:

kernel/msm-4.9/kernel/time/tick-common.c、tick-dchrf.c、timer.c、hrtimer.c、clockevent.c等相关源文件

kernel/msm-4.9/drivers/clocksource.c

kernel/msm-4.9/drivers/clocksource文件夹下的一些源文件

kernel/sched/core.c

简述

作为进程调度中,最关键的函数:scheduler_tick()。它也是大多数调度函数的源,那么它自身又被谁调用的呢?

scheduler_tick()是所有调度子函数的父函数,而其是由Linux时间子系统的tick_device调用。tick_device是一个周期性定时器,定时时间为1个tick,当触发中断后,会在中断处理函数中,调用scheduler_tick()。

而打开了tickless,即动态tick后,那么就会切换至oneshot模式,并负责调用scheduler_tick()。

这篇文章会简要地解释这个原理。

NO_HZ动态时钟 & hrtimer高精度时钟

因为是tick是由tick device周期性触发,所以当系统在idle时,为了减少系统功耗,应该关闭周期性tick。所以,NO_HZ的动态时钟应运而生。它会在系统空闲,仅有idle进程时,关闭周期性tick;而当跳出idle进程时,会重新再开启周期性tick。

关于NO_HZ的详细资料,可以参考:https://www.kernel.org/doc/Documentation/timers/NO_HZ.txt

SDM845平台的时间子系统就是基于NO_HZ动态时钟, 以及高精度定时时钟的,对应.config配置如下:

#
# Timers subsystem
#
CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ_COMMON=y
# CONFIG_HZ_PERIODIC is not set
CONFIG_NO_HZ_IDLE=y
# CONFIG_NO_HZ_FULL is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y

---CONFIG_NO_HZ_IDLE(处于idle状态没有tick,非idle状态正常tick)------------当前平台处于这个config

---CONFIG_NO_HZ_FULL(处于idle或者cpu仅有一个进程运行,停止tick;其他情况正常)

NO_HZ的情况下,有3种模式:

  • 系统动态时钟尚未激活模式
  • 高精度工作模式
  • 低精度工作模式
enum tick_nohz_mode {
NOHZ_MODE_INACTIVE,
NOHZ_MODE_LOWRES,
NOHZ_MODE_HIGHRES,
}; /**
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
* @last_tick: Store the last tick expiry time when the tick
* timer is modified for nohz sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from nohz sleep.
* @tick_stopped: Indicator that the idle tick has been stopped
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_calls: Total number of idle calls
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
* @idle_entrytime: Time when the idle call was entered
* @idle_waketime: Time when the idle was interrupted
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @sleep_length: Duration of the current idle sleep
* @do_timer_lst: CPU was the last one doing do_timer before going idle
*/
struct tick_sched {
struct hrtimer sched_timer;
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
ktime_t last_tick;
int inidle;
int tick_stopped;
unsigned long idle_jiffies;
unsigned long idle_calls;
unsigned long idle_sleeps;
int idle_active;
ktime_t idle_entrytime;
ktime_t idle_waketime;
ktime_t idle_exittime;
ktime_t idle_sleeptime;
ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies;
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
atomic_t tick_dep_mask;
};

tick_device

tick_device相关数据结构如下:

1、tick_device的工作模式,支持两种:一种是周期性periodic,另一种是一次性oneshot。

enum tick_device_mode {
TICKDEV_MODE_PERIODIC,
TICKDEV_MODE_ONESHOT,
}; struct tick_device {
struct clock_event_device *evtdev;
enum tick_device_mode mode;
};

tick device是通过tick_check_new_device函数进行创建

/*
* Check, if the new registered device should be used. Called with
* clockevents_lock held and interrupts disabled.
*/
void tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu; cpu = smp_processor_id(); //获取当前CPU id
td = &per_cpu(tick_cpu_device, cpu); //获取当前CPU的tick device结构体
curdev = td->evtdev; /* cpu local device ? */
if (!tick_check_percpu(curdev, newdev, cpu)) //判断是否是只服务local CPU,否则就会注册broadcast,走下面的分支。
goto out_bc; /* Preference decision */
if (!tick_check_preferred(curdev, newdev)) //如果是onshot模式,并且已有一个tick device,那么就选用其中高rate的。但是如果高rate的是non-CPU local device,那仍然会选用低rate的local tick device
goto out_bc; if (!try_module_get(newdev->owner))
return; /*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev); //更新clock_event_device
tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); //setup,下面详细解析
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
return; out_bc:
/*
* Can the new device be used as a broadcast device ?
*/
tick_install_broadcast_device(newdev);
}

而在tick_setup_device函数,会进一步初始化tick device。如果是第一次setup,那么模式只能是periodic周期性的tick device。

/*
* Setup the tick device
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL; /*
* First device setup ?
*/
if (!td->evtdev) { //如果是当前CPU第一个注册的tick device
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { //此tick device将会让其管理全局jiffies等时间信息
if (!tick_nohz_full_cpu(cpu))        
tick_do_timer_cpu = cpu;
else
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
tick_next_period = ktime_get();
tick_period = ktime_set(, NSEC_PER_SEC / HZ); //HZ为1秒内需要有多少的脉冲,基于此来设定定时时间
} /*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC; //当前cpu第一次设定tick device的时候,缺省设定为周期性的tick
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
} td->evtdev = newdev;      //将系统clock_event_device赋值给对应tick device的evtdev指针。这是比较关键的一步,代表了tick device找到合适挂载 /*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask); /*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way. This function also returns !=0 when we keep the
* current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return; if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, );
else
tick_setup_oneshot(newdev, handler, next_event);
}

在tick_setup_periodic中,

先设置中断handler,再开启定时器。

/*
* Setup the device for a periodic tick
*/
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast); //设置handler /* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {
clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); //设置clock工作状态
} else {
unsigned long seq;
ktime_t next; do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq)); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
}
}

中断handler,使用的是不支持broadcast的。

/*
* Set the periodic handler depending on broadcast on/off
*/
void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
if (!broadcast)
dev->event_handler = tick_handle_periodic; //非broadcast
else
dev->event_handler = tick_handle_periodic_broadcast;
}
/**
* clockevents_switch_state - set the operating state of a clock event device
* @dev: device to modify
* @state: new state
*
* Must be called with interrupts disabled !
*/
void clockevents_switch_state(struct clock_event_device *dev,
enum clock_event_state state)
{
if (clockevent_get_state(dev) != state) {
if (__clockevents_switch_state(dev, state)) //设置工作状态
return; clockevent_set_state(dev, state); /*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
if (clockevent_state_oneshot(dev)) {
if (unlikely(!dev->mult)) {
dev->mult = ;
WARN_ON();
}
}
}
}

最后会调用device特定的periodic工作函数

static int __clockevents_switch_state(struct clock_event_device *dev,
enum clock_event_state state)
{
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return ; /* Transition with new state-specific callbacks */
switch (state) {
case CLOCK_EVT_STATE_DETACHED:
/* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN:
if (dev->set_state_shutdown)
return dev->set_state_shutdown(dev);
return ; case CLOCK_EVT_STATE_PERIODIC:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
return -ENOSYS;
if (dev->set_state_periodic)
return dev->set_state_periodic(dev); //调用device set_state_periodic工作函数,但实际当前平台没有这个函数。直接return 0
return ; case CLOCK_EVT_STATE_ONESHOT:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return -ENOSYS;
if (dev->set_state_oneshot)
return dev->set_state_oneshot(dev);
return ; case CLOCK_EVT_STATE_ONESHOT_STOPPED:
/* Core internal bug */
if (WARN_ONCE(!clockevent_state_oneshot(dev),
"Current state: %d\n",
clockevent_get_state(dev)))
return -EINVAL; if (dev->set_state_oneshot_stopped)
return dev->set_state_oneshot_stopped(dev);
else
return -ENOSYS; default:
return -ENOSYS;
}
}

上面提到set_state_periodic并未定义。那么应该走到哪里呢?我们看前面在 tick_setup_periodic 函数中,会判断:

    if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active())

实际当前平台的clock event device不支持CLOCK_EVT_FEAT_PERIODIC模式,所以代码会走到else中,模拟周期tick:

    if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {
clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
} else { //走到else中,模拟周期tick
unsigned long seq;
ktime_t next; do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq)); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);  //这里同样因为没有set_state_onshot的接口函数,直接return 0。 for (;;) {
if (!clockevents_program_event(dev, next, false))   //这里是一个无限循环,但是正常情况下,由于return 0,直接跳出循环
return;
next = ktime_add(next, tick_period);
}
}
/**
* clockevents_program_event - Reprogram the clock event device.
* @dev: device to program
* @expires: absolute expiry time (monotonic clock)
* @force: program minimum delay if expires can not be set
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
bool force)
{
unsigned long long clc;
int64_t delta;
int rc; if (unlikely(expires.tv64 < )) {
WARN_ON_ONCE();
return -ETIME;
} dev->next_event = expires; if (clockevent_state_shutdown(dev))
return ; /* We must be in ONESHOT state here */
WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
clockevent_get_state(dev)); /* Shortcut for clockevent devices that can deal with ktime. */
if (dev->features & CLOCK_EVT_FEAT_KTIME)
return dev->set_next_ktime(expires, dev); delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
if (delta <= )
return force ? clockevents_program_min_delta(dev) : -ETIME; delta = min(delta, (int64_t) dev->max_delta_ns);
delta = max(delta, (int64_t) dev->min_delta_ns); clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
rc = dev->set_next_event((unsigned long) clc, dev); return (rc && force) ? clockevents_program_min_delta(dev) : rc;
}
/**
* clockevents_program_min_delta - Set clock event device to the minimum delay.
* @dev: device to program
*
* Returns 0 on success, -ETIME when the retry loop failed.
*/
static int clockevents_program_min_delta(struct clock_event_device *dev)
{
unsigned long long clc;
int64_t delta;
int i; for (i = ;;) {
delta = dev->min_delta_ns;
dev->next_event = ktime_add_ns(ktime_get(), delta); if (clockevent_state_shutdown(dev))
return ; dev->retries++;
clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
if (dev->set_next_event((unsigned long) clc, dev) == )
return ; if (++i > ) {
/*
* We tried 3 times to program the device with the
* given min_delta_ns. Try to increase the minimum
* delta, if that fails as well get out of here.
*/
if (clockevents_increase_min_delta(dev))
return -ETIME;
i = ;
}
}
}

当触发了定时器之后,会调用终端handler函数:tick_handle_periodic,函数下半部分会重新设置timer触发的时间。下次触发,仍然进入中断handler函数。如此往复,模拟周期tick。

/*
* Event handler for periodic ticks
*/
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next = dev->next_event; tick_periodic(cpu);        //更新wall time等操作,调用update_process() #if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
/*
* The cpu might have transitioned to HIGHRES or NOHZ mode via
* update_process_times() -> run_local_timers() ->
* hrtimer_run_queues().
*/
if (dev->event_handler != tick_handle_periodic)
return;
#endif if (!clockevent_state_oneshot(dev))
return;
for (;;) {
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(next, tick_period); if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* which then will increment time, possibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
}
}

在tick_periodic中,

/*
* Periodic tick
*/
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&jiffies_lock); /* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period); do_timer();
write_sequnlock(&jiffies_lock);
update_wall_time();  //更新wall time
} update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);  //代码采集器
}
/*
* Called from the timer interrupt handler to charge one tick to the current
* process. user_tick is 1 if the tick is user time, 0 for system.
*/
void update_process_times(int user_tick)
{
struct task_struct *p = current; /* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers();
rcu_check_callbacks(user_tick);
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_tick();
#endif
scheduler_tick();    //调用scheculer_tick()
run_posix_cpu_timers(p);
}

关于代码采集器profile_tick的简要知识:

        profile_tick()函数为代码监管器采集数据。这个函数在单处理器系统上是由do_timer_interrupt()调用的(即全局时钟中断处理程序调用的),在多处理器系统上是由smp_local_timer_interrupt()函数调用的(即本地时钟中断处理程序调用的)

        为了激活代码监管器,在Linux内核启动时必须传递字符串参数"profile=N" ,这里2的N次方,表示要监管的代码段的大小。采集的数据可以从/proc/profile文件中读取。可以通过修改这个文件来重置计数器;在多处理器系统上,修改这个文件还可以改变抽样频率。不过,内核开发者并不直接访问/proc/profile文件,而是用readprofile系统命令

        Linux2.6内核还包含了另一个监管器,叫做oprofile .比起readprofile,oprofile除了更灵活、更可定制外,还能用于发现内核代码、用户态应用程序以及系统库中的热点。当使用oprofile时,profile_tick()调用timer_notify()函数来收集这个新监管器所使用的数据。

回归原题,scheduler_tick()具体被调用流程:tick中断->tick_periodic()->update_process_times()->scheduler_tick()或者tick中断->tick_sched_handle()->update_process_times()->scheduler_tick()。本文分析了前者,后者有兴趣可以自行读代码了解。

下面为补充linux时间子系统的相关知识,dev这个结构体是如何初始化,并填充的(包括clock_event_device->feature在哪里定义为CLOCK_EVT_FEAT_ONESHOT)。

clock source & clock event device

我们可以看到是在DTS中有2个timer配置。

DTS配置:

    timer {
compatible = "arm,armv8-timer";
interrupts = < 0xf08>,
< 0xf08>,
< 0xf08>,
< 0xf08>;
clock-frequency = <>;
}; timer@0x17C90000{
#address-cells = <>;
#size-cells = <>;
ranges;
compatible = "arm,armv7-timer-mem";
reg = <0x17C90000 0x1000>;
clock-frequency = <>;
.....
};

./kernel/msm-4.9/drivers/clocksource/arm_arch_timer.c中,会根据这2个timer进行clock source初始化。

CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);

static int __init arch_timer_of_init(struct device_node *np)
{
int i; if (arch_timers_present & ARCH_CP15_TIMER) {
pr_warn("arch_timer: multiple nodes in dt, skipping\n");
return ;
} arch_timers_present |= ARCH_CP15_TIMER; //CP15 timer
for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
arch_timer_ppi[i] = irq_of_parse_and_map(np, i); arch_timer_detect_rate(NULL, np); //从dts获取频率:19.2M Hz arch_timer_c3stop = !of_property_read_bool(np, "always-on"); #ifdef CONFIG_FSL_ERRATUM_A008585
if (fsl_a008585_enable < )
fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585");
if (fsl_a008585_enable) {
static_branch_enable(&arch_timer_read_ool_enabled);
pr_info("Enabling workaround for FSL erratum A-008585\n");
}
#endif /*
* If we cannot rely on firmware initializing the timer registers then
* we should use the physical timers instead.
*/
if (IS_ENABLED(CONFIG_ARM) &&
of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
arch_timer_uses_ppi = PHYS_SECURE_PPI; /* On some systems, the counter stops ticking when in suspend. */
arch_counter_suspend_stop = of_property_read_bool(np,
"arm,no-tick-in-suspend"); return arch_timer_init(); //继续进行后续初始化
}
static int __init arch_timer_init(void)
{
int ret;
/*
* If HYP mode is available, we know that the physical timer
* has been configured to be accessible from PL1. Use it, so
* that a guest can use the virtual timer instead.
*
* If no interrupt provided for virtual timer, we'll have to
* stick to the physical timer. It'd better be accessible...
*
* On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
* accesses to CNTP_*_EL1 registers are silently redirected to
* their CNTHP_*_EL2 counterparts, and use a different PPI
* number.
*/
if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
bool has_ppi; if (is_kernel_in_hyp_mode()) {
arch_timer_uses_ppi = HYP_PPI;
has_ppi = !!arch_timer_ppi[HYP_PPI];
} else {
arch_timer_uses_ppi = PHYS_SECURE_PPI;
has_ppi = (!!arch_timer_ppi[PHYS_SECURE_PPI] ||
!!arch_timer_ppi[PHYS_NONSECURE_PPI]);
} if (!has_ppi) {
pr_warn("arch_timer: No interrupt available, giving up\n");
return -EINVAL;
}
} ret = arch_timer_register(); //(1)注册timer
if (ret)
return ret; ret = arch_timer_common_init(); //(2)timer相关初始化
if (ret)
return ret; arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI]; return ;
}

(1)注册timer:

static int __init arch_timer_register(void)
{
int err;
int ppi; arch_timer_evt = alloc_percpu(struct clock_event_device);
if (!arch_timer_evt) {
err = -ENOMEM;
goto out;
} ppi = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case VIRT_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_virt, //仅注册percpu的irq中断(单个cpu独享,非多cpu共享),没有enable irq(真正enable在startup接口中),arch_timer_handler_virt为中断处理函数
"arch_timer", arch_timer_evt);
break;
case PHYS_SECURE_PPI:
case PHYS_NONSECURE_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
ppi = arch_timer_ppi[PHYS_NONSECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (err)
free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
arch_timer_evt);
}
break;
case HYP_PPI:
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
break;
default:
BUG();
} if (err) {
pr_err("arch_timer: can't register interrupt %d (%d)\n",
ppi, err);
goto out_free;
} err = arch_timer_cpu_pm_init(); //注册cpu和cpu cluster进入/退出low power的notify
if (err)
goto out_unreg_notify; /* Register and immediately configure the timer on the boot CPU */
err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING, //(1.1)设置cpu状态为TIMER_STARTING,注册并马上在boot cpu上配置timer。后2个函数为对应 开启/关闭cpu的callback函数,
"AP_ARM_ARCH_TIMER_STARTING",
arch_timer_starting_cpu, arch_timer_dying_cpu);
if (err)
goto out_unreg_cpupm;
return ; out_unreg_cpupm:
arch_timer_cpu_pm_deinit(); out_unreg_notify:
free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
if (arch_timer_has_nonsecure_ppi())
free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI],
arch_timer_evt); out_free:
free_percpu(arch_timer_evt);
out:
return err;
}

(1.1)通过__cpuhp_setup_state,注册并调用arch_timer_starting_cpu

/**
* __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
* @state: The state to setup
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
* @startup: startup callback function
* @teardown: teardown callback function
*
* Returns 0 if successful, otherwise a proper error code
*/
int __cpuhp_setup_state(enum cpuhp_state state,
const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu),
bool multi_instance)
{
int cpu, ret = ;
int dyn_state = ; if (cpuhp_cb_check(state) || !name)
return -EINVAL; get_online_cpus();
mutex_lock(&cpuhp_state_mutex); /* currently assignments for the ONLINE state are possible */
if (state == CPUHP_AP_ONLINE_DYN) {
dyn_state = ;
ret = cpuhp_reserve_state(state);
if (ret < )
goto out;
state = ret;
} cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);    //配置并保存接口sp->startup.single = startup; sp->teardown.single = teardown; if (!invoke || !startup)
goto out; /*
* Try to call the startup callback for each present cpu
* depending on the hotplug state of the cpu.
*/
for_each_present_cpu(cpu) {
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int cpustate = st->state; if (cpustate < state)
continue; ret = cpuhp_issue_call(cpu, state, true, NULL);               //(1.1.1)调用各个处于online cpu的startup
if (ret) {
if (teardown)
cpuhp_rollback_install(cpu, state, NULL);
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
goto out;
}
}
out:
mutex_unlock(&cpuhp_state_mutex); put_online_cpus();
if (!ret && dyn_state)
return state;
return ret;
}

(1.1.1)调用startup接口,配置clock event device

...
cb = bringup ? step->startup.single : step->teardown.single;
if (!cb)
return ;
ret = cb(cpu);
...
static int arch_timer_starting_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
u32 flags; __arch_timer_setup(ARCH_CP15_TIMER, clk);                      //(1.1.1.1)setup和配置clock event device flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);          //这里真正enable timer的per cpu irq if (arch_timer_has_nonsecure_ppi()) {
flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]);
enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags);
} arch_counter_set_user_access();                            //设置user上层无法access timer和 physical counter,只能access virtual counter
if (evtstrm_enable)
arch_timer_configure_evtstream(); return ;
}

1.1.1.1 配置clock event device

其中arch_sys_timer为tick_device,arch_mem_timer为boardcast device。

当前这里是arch_sys_timer

static void __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
clk->features = CLOCK_EVT_FEAT_ONESHOT; if (type == ARCH_CP15_TIMER) {
if (arch_timer_c3stop)
clk->features |= CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = ;
clk->cpumask = cpumask_of(smp_processor_id());
clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
switch (arch_timer_uses_ppi) {
case VIRT_PPI:
clk->set_state_shutdown = arch_timer_shutdown_virt;        //这里就是配置clock event device的api,确实并没有set_state_periodic
clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
break;
case PHYS_SECURE_PPI:
case PHYS_NONSECURE_PPI:
case HYP_PPI:
clk->set_state_shutdown = arch_timer_shutdown_phys;
clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
break;
default:
BUG();
} fsl_a008585_set_sne(clk);
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
clk->rating = ;
clk->cpumask = cpu_all_mask;
if (arch_timer_mem_use_virtual) {
clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
clk->set_next_event =
arch_timer_set_next_event_virt_mem;
} else {
clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
} clk->set_state_shutdown(clk);                          //先关闭该clock event device clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); //将配置好的clock event device进一步配置并注册到系统中
}
/**
* clockevents_config_and_register - Configure and register a clock event device
* @dev: device to register
* @freq: The clock frequency
* @min_delta: The minimum clock ticks to program in oneshot mode
* @max_delta: The maximum clock ticks to program in oneshot mode
*
* min/max_delta can be 0 for devices which do not support oneshot mode.
*/
void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);          //对应19.2MHz的clk,并根据max ticks配置最长的sleep时间
clockevents_register_device(dev);        //注册device
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
*/
void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags; /* Initialize state to DETACHED */
clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);  //初始化state if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > );
dev->cpumask = cpumask_of(smp_processor_id());
} raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices);         //加入链表
tick_check_new_device(dev);                  //回到最开始分析的tick device创建
clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);

(2)timer相关初始化:

static int __init arch_timer_common_init(void)
{
unsigned mask = ARCH_CP15_TIMER | ARCH_MEM_TIMER; /* Wait until both nodes are probed if we have two timers */
if ((arch_timers_present & mask) != mask) { //这里会等待"arm,armv7-timer-mem"(下面会分析) 和 "arm,armv8-timer"都probe完成,才进行下一步。
if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match))
return ;
if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match))
return ;
} arch_timer_banner(arch_timers_present); //打印timer相关重要debug信息,LOG:03-09 04:17:39.725 root 0 0 I arm_arch_timer: Architected cp15 and mmio timer(s) running at 19.20MHz (virt/virt).
arch_counter_register(arch_timers_present); //(2.1)counter注册和初始化
clocksource_select_force();              //选择clock source,即上一步注册进clock list中的arch_sys_counter
return arch_timer_arch_init();            //配置并注册delay timer

(2.1)计时器注册和初始化

static struct clocksource clocksource_counter = {
.name = "arch_sys_counter",
.rating = 400,
.read = arch_counter_read,
.mask = CLOCKSOURCE_MASK(56),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
}; static void __init arch_counter_register(unsigned type)
{
u64 start_count; /* Register the CP15 based counter if we have one */
if (type & ARCH_CP15_TIMER) {
if (IS_ENABLED(CONFIG_ARM64) || arch_timer_uses_ppi == VIRT_PPI)
arch_timer_read_counter = arch_counter_get_cntvct; //提供read接口
else
arch_timer_read_counter = arch_counter_get_cntpct; clocksource_counter.archdata.vdso_direct = true; #ifdef CONFIG_FSL_ERRATUM_A008585
/*
* Don't use the vdso fastpath if errata require using
* the out-of-line counter accessor.
*/
if (static_branch_unlikely(&arch_timer_read_ool_enabled))
clocksource_counter.archdata.vdso_direct = false;
#endif
} else {
arch_timer_read_counter = arch_counter_get_cntvct_mem;
} if (!arch_counter_suspend_stop)
clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
start_count = arch_timer_read_counter();
clocksource_register_hz(&clocksource_counter, arch_timer_rate); //install clocksource(19.2MHz),将其加入clocksource list,计算mult,shift。 LOG: 03-09 04:17:39.725 root 0 0 I clocksource: arch_sys_counter: mask: 0xffffffffffffff max_cycles: 0x46d987e47, max_idle_ns: 440795202767 ns
cyclecounter.mult = clocksource_counter.mult;
cyclecounter.shift = clocksource_counter.shift;
timecounter_init(&arch_timer_kvm_info.timecounter,              //计算出来的mult,shift到计时器进行初始化配置。
&cyclecounter, start_count); /* 56 bits minimum, so we assume worst case rollover */
sched_clock_register(arch_timer_read_counter, , arch_timer_rate);   //(2.1.1)注册sched clock source
}

(2.1.1)注册sched clock source

void __init
sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
unsigned long r;
char r_unit;
struct clock_read_data rd; if (cd.rate > rate)
return; WARN_ON(!irqs_disabled()); /* Calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, );    //计算mult,shift.转换tick数到ns单位 new_mask = CLOCKSOURCE_MASK(bits);
cd.rate = rate; /* Calculate how many nanosecs until we risk wrapping */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, , new_mask, NULL);     //计算:多少ns,可能会溢出
cd.wrap_kt = ns_to_ktime(wrap); rd = cd.read_data[]; /* Update epoch for new counter and update 'epoch_ns' from old counter*/
new_epoch = read();
cyc = cd.actual_read_sched_clock();
ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
cd.actual_read_sched_clock = read; rd.read_sched_clock = read;
rd.sched_clock_mask = new_mask;
rd.mult = new_mult;
rd.shift = new_shift;
rd.epoch_cyc = new_epoch;
rd.epoch_ns = ns; update_clock_read_data(&rd);                            //配置read clock data接口 if (sched_clock_timer.function != NULL) {
/* update timeout for clock wrap */
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
} r = rate;
if (r >= ) {
r /= ;
r_unit = 'M';
} else {
if (r >= ) {
r /= ;
r_unit = 'k';
} else {
r_unit = ' ';
}
} /* Calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift); pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",            //LOG:03-09 04:17:39.725 root 0 0 I sched_clock: 56 bits at 19MHz, resolution 52ns, wraps every 4398046511078ns
bits, r, r_unit, res, wrap); /* Enable IRQ time accounting if we have a fast enough sched_clock() */
if (irqtime > || (irqtime == - && rate >= ))
enable_sched_clock_irqtime(); pr_debug("Registered %pF as sched_clock source\n", read);
}

下面为mem_timer的部分相关流程,读者有兴趣可以自行跟踪代码。

CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
arch_timer_mem_init); static int __init arch_timer_mem_init(struct device_node *np)
{
struct device_node *frame, *best_frame = NULL;
void __iomem *cntctlbase, *base;
unsigned int irq, ret = -EINVAL;
u32 cnttidr; arch_timers_present |= ARCH_MEM_TIMER;
cntctlbase = of_iomap(np, );
if (!cntctlbase) {
pr_err("arch_timer: Can't find CNTCTLBase\n");
return -ENXIO;
} cnttidr = readl_relaxed_no_log(cntctlbase + CNTTIDR); /*
* Try to find a virtual capable frame. Otherwise fall back to a
* physical capable frame.
*/
for_each_available_child_of_node(np, frame) {
int n;
u32 cntacr; if (of_property_read_u32(frame, "frame-number", &n)) {
pr_err("arch_timer: Missing frame-number\n");
of_node_put(frame);
goto out;
} /* Try enabling everything, and see what sticks */
cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
writel_relaxed(cntacr, cntctlbase + CNTACR(n));
cntacr = readl_relaxed(cntctlbase + CNTACR(n)); if ((cnttidr & CNTTIDR_VIRT(n)) &&
!(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
of_node_put(best_frame);
best_frame = frame;
arch_timer_mem_use_virtual = true;
break;
} if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
continue; of_node_put(best_frame);
best_frame = of_node_get(frame);
} ret= -ENXIO;
base = arch_counter_base = of_iomap(best_frame, );
if (!base) {
pr_err("arch_timer: Can't map frame's registers\n");
goto out;
} if (arch_timer_mem_use_virtual)
irq = irq_of_parse_and_map(best_frame, );
else
irq = irq_of_parse_and_map(best_frame, ); ret = -EINVAL;
if (!irq) {
pr_err("arch_timer: Frame missing %s irq",
arch_timer_mem_use_virtual ? "virt" : "phys");
goto out;
} arch_timer_detect_rate(base, np);
ret = arch_timer_mem_register(base, irq);
if (ret)
goto out; return arch_timer_common_init();
out:
iounmap(cntctlbase);
of_node_put(best_frame);
return ret;
}

adb下Debug 信息

通过adb可以确认,其中arch_sys_timer为tick_device,arch_mem_timer为boardcast device:

cat /sys/devices/system/clocksource/clocksource0/available_clocksource 
arch_sys_counter
cat /sys/devices/system/clockevents/clockevent*/current_device
arch_sys_timer
arch_sys_timer
arch_sys_timer
arch_sys_timer
arch_sys_timer
arch_sys_timer
arch_sys_timer
arch_sys_timer

Tick Device list也可以通过adb确认:

tc_ocla1_sprout:/ # cat /proc/timer_list
Timer List Version: v0.
HRTIMER_MAX_CLOCK_BASES:
now at nsecs cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in - to nsecs]
#: <>, tick_sched_timer, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, timerfd_tmrproc, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, sched_clock_poll, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, it_real_fn, S:
# expires at - nsecs [in to nsecs]
#: <>, timerfd_tmrproc, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
#: <>, alarmtimer_fired, S:
# expires at - nsecs [in to nsecs]
#: <>, timerfd_tmrproc, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
#: <>, tick_sched_timer, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
#: <>, tick_sched_timer, S:
# expires at - nsecs [in to nsecs]
#: <>, sched_rt_period_timer, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, posix_timer_fn, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
#: <>, alarmtimer_fired, S:
# expires at - nsecs [in to nsecs]
#: <>, alarmtimer_fired, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
#: <>, tick_sched_timer, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
#: <>, hrtimer_wakeup, S:
# expires at - nsecs [in to nsecs]
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: cpu:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_real
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_boottime
.offset: nsecs
active timers:
clock :
.base:
.index:
.resolution: nsecs
.get_time: ktime_get_clocktai
.offset: nsecs
active timers:
.expires_next : nsecs
.hres_active :
.nr_events :
.nr_retries :
.nr_hangs :
.max_hang_time :
.nohz_mode :
.last_tick : nsecs
.tick_stopped :
.idle_jiffies :
.idle_calls :
.idle_sleeps :
.idle_entrytime : nsecs
.idle_waketime : nsecs
.idle_exittime : nsecs
.idle_sleeptime : nsecs
.iowait_sleeptime: nsecs
.last_jiffies :
.next_timer :
.idle_expires : nsecs
jiffies: Tick Device: mode:
Broadcast device
Clock Event Device: arch_mem_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt_mem
shutdown: arch_timer_shutdown_virt_mem
event_handler: tick_handle_oneshot_broadcast
retries: tick_broadcast_mask:
tick_broadcast_oneshot_mask: fc Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries: Tick Device: mode:
Per CPU device:
Clock Event Device: arch_sys_timer
max_delta_ns:
min_delta_ns:
mult:
shift:
mode:
next_event: nsecs
set_next_event: arch_timer_set_next_event_virt
shutdown: arch_timer_shutdown_virt
event_handler: hrtimer_interrupt
retries:

Ftrace抓取的call stack:

          <idle>-     [] d.h2 98348.520507: scheduler_tick <-update_process_times
<idle>- [] d.h2 98348.520534: <stack trace>
=> tick_sched_timer
=> __hrtimer_run_queues
=> hrtimer_interrupt
=> tick_handle_oneshot_broadcast
=> arch_timer_handler_virt_mem
=> handle_irq_event_percpu
=> handle_irq_event
=> handle_fasteoi_irq
=> generic_handle_irq
=> __handle_domain_irq
=> gic_handle_irq
=> el1_irq
=> lpm_cpuidle_enter
=> cpuidle_enter_state
=> cpuidle_enter
=> cpu_startup_entry
=> secondary_start_kernel
=>
<idle>- [] d.h2 98348.540237: scheduler_tick <-update_process_times
<idle>- [] d.h2 98348.540267: <stack trace>
=> tick_sched_timer
=> __hrtimer_run_queues
=> hrtimer_interrupt
=> arch_timer_handler_virt
=> handle_percpu_devid_irq
=> generic_handle_irq
=> __handle_domain_irq
=> gic_handle_irq
=> el1_irq
=> lpm_cpuidle_enter
=> cpuidle_enter_state
=> cpuidle_enter
=> cpu_startup_entry
=> secondary_start_kernel
=>
05-23 11:31