Andy Ross | 662b0bf | 2021-05-05 10:37:39 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2021 Intel Corporation |
| 3 | * SPDX-License-Identifier: Apache-2.0 |
| 4 | */ |
Gerard Marull-Paretas | fb60aab | 2022-05-06 10:25:46 +0200 | [diff] [blame] | 5 | #include <zephyr/device.h> |
| 6 | #include <zephyr/drivers/timer/system_timer.h> |
| 7 | #include <zephyr/sys_clock.h> |
| 8 | #include <zephyr/spinlock.h> |
| 9 | #include <zephyr/drivers/interrupt_controller/loapic.h> |
Andy Ross | 662b0bf | 2021-05-05 10:37:39 -0700 | [diff] [blame] | 10 | |
| 11 | #define IA32_TSC_DEADLINE_MSR 0x6e0 |
| 12 | #define IA32_TSC_ADJUST_MSR 0x03b |
| 13 | |
| 14 | #define CYC_PER_TICK (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC \ |
| 15 | / (uint64_t) CONFIG_SYS_CLOCK_TICKS_PER_SEC) |
| 16 | |
| 17 | struct apic_timer_lvt { |
| 18 | uint8_t vector : 8; |
| 19 | uint8_t unused0 : 8; |
| 20 | uint8_t masked : 1; |
| 21 | enum { ONE_SHOT, PERIODIC, TSC_DEADLINE } mode: 2; |
| 22 | uint32_t unused2 : 13; |
| 23 | }; |
| 24 | |
| 25 | static struct k_spinlock lock; |
| 26 | static uint64_t last_announce; |
| 27 | static union { uint32_t val; struct apic_timer_lvt lvt; } lvt_reg; |
| 28 | |
| 29 | static ALWAYS_INLINE uint64_t rdtsc(void) |
| 30 | { |
| 31 | uint32_t hi, lo; |
| 32 | |
| 33 | __asm__ volatile("rdtsc" : "=d"(hi), "=a"(lo)); |
| 34 | return lo + (((uint64_t)hi) << 32); |
| 35 | } |
| 36 | |
| 37 | static void isr(const void *arg) |
| 38 | { |
| 39 | ARG_UNUSED(arg); |
| 40 | k_spinlock_key_t key = k_spin_lock(&lock); |
| 41 | uint32_t ticks = (rdtsc() - last_announce) / CYC_PER_TICK; |
| 42 | |
| 43 | last_announce += ticks * CYC_PER_TICK; |
| 44 | k_spin_unlock(&lock, key); |
| 45 | sys_clock_announce(ticks); |
| 46 | |
| 47 | if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) { |
| 48 | sys_clock_set_timeout(1, false); |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | static inline void wrmsr(int32_t msr, uint64_t val) |
| 53 | { |
| 54 | uint32_t hi = (uint32_t) (val >> 32); |
| 55 | uint32_t lo = (uint32_t) val; |
| 56 | |
| 57 | __asm__ volatile("wrmsr" :: "d"(hi), "a"(lo), "c"(msr)); |
| 58 | } |
| 59 | |
| 60 | void sys_clock_set_timeout(int32_t ticks, bool idle) |
| 61 | { |
| 62 | ARG_UNUSED(idle); |
| 63 | |
| 64 | uint64_t now = rdtsc(); |
| 65 | k_spinlock_key_t key = k_spin_lock(&lock); |
| 66 | uint64_t expires = now + MAX(ticks - 1, 0) * CYC_PER_TICK; |
| 67 | |
| 68 | expires = last_announce + (((expires - last_announce + CYC_PER_TICK - 1) |
| 69 | / CYC_PER_TICK) * CYC_PER_TICK); |
| 70 | |
| 71 | /* The second condition is to catch the wraparound. |
| 72 | * Interpreted strictly, the IA SDM description of the |
| 73 | * TSC_DEADLINE MSR implies that it will trigger an immediate |
| 74 | * interrupt if we try to set an expiration across the 64 bit |
| 75 | * rollover. Unfortunately there's no way to test that as on |
| 76 | * real hardware it requires more than a century of uptime, |
| 77 | * but this is cheap and safe. |
| 78 | */ |
| 79 | if (ticks == K_TICKS_FOREVER || expires < last_announce) { |
| 80 | expires = UINT64_MAX; |
| 81 | } |
| 82 | |
| 83 | wrmsr(IA32_TSC_DEADLINE_MSR, expires); |
| 84 | k_spin_unlock(&lock, key); |
| 85 | } |
| 86 | |
| 87 | uint32_t sys_clock_elapsed(void) |
| 88 | { |
| 89 | k_spinlock_key_t key = k_spin_lock(&lock); |
| 90 | uint32_t ret = (rdtsc() - last_announce) / CYC_PER_TICK; |
| 91 | |
| 92 | k_spin_unlock(&lock, key); |
| 93 | return ret; |
| 94 | } |
| 95 | |
| 96 | uint32_t sys_clock_cycle_get_32(void) |
| 97 | { |
| 98 | return (uint32_t) rdtsc(); |
| 99 | } |
| 100 | |
Christopher Friedt | 918a574 | 2021-10-29 20:10:35 -0400 | [diff] [blame] | 101 | uint64_t sys_clock_cycle_get_64(void) |
| 102 | { |
| 103 | return rdtsc(); |
| 104 | } |
| 105 | |
Andy Ross | 662b0bf | 2021-05-05 10:37:39 -0700 | [diff] [blame] | 106 | static inline uint32_t timer_irq(void) |
| 107 | { |
| 108 | /* The Zephyr APIC API is... idiosyncratic. The timer is a |
| 109 | * "local vector table" interrupt. These aren't system IRQs |
| 110 | * presented to the IO-APIC, they're indices into a register |
| 111 | * array in the local APIC. By Zephyr convention they come |
| 112 | * after all the external IO-APIC interrupts, but that number |
| 113 | * changes depending on device configuration so we have to |
| 114 | * fetch it at runtime. The timer happens to be the first |
| 115 | * entry in the table. |
| 116 | */ |
| 117 | return z_loapic_irq_base(); |
| 118 | } |
| 119 | |
| 120 | /* The TSC_ADJUST MSR implements a synchronized offset such that |
| 121 | * multiple CPUs (within a socket, anyway) can synchronize exactly, or |
| 122 | * implement managed timing spaces for guests in a recoverable way, |
| 123 | * etc... We set it to zero on all cores for simplicity, because |
| 124 | * firmware often leaves it in an inconsistent state between cores. |
| 125 | */ |
| 126 | static void clear_tsc_adjust(void) |
| 127 | { |
| 128 | /* But don't touch it on ACRN, where an hypervisor bug |
| 129 | * confuses the APIC emulation and deadline interrupts don't |
| 130 | * arrive. |
| 131 | */ |
| 132 | #ifndef CONFIG_BOARD_ACRN |
| 133 | wrmsr(IA32_TSC_ADJUST_MSR, 0); |
| 134 | #endif |
| 135 | } |
| 136 | |
| 137 | void smp_timer_init(void) |
| 138 | { |
| 139 | /* Copy the LVT configuration from CPU0, because IRQ_CONNECT() |
| 140 | * doesn't know how to manage LVT interrupts for anything |
| 141 | * other than the calling/initial CPU. Same fence needed to |
| 142 | * prevent later MSR writes from reordering before the APIC |
| 143 | * configuration write. |
| 144 | */ |
| 145 | x86_write_loapic(LOAPIC_TIMER, lvt_reg.val); |
| 146 | __asm__ volatile("mfence" ::: "memory"); |
| 147 | clear_tsc_adjust(); |
| 148 | irq_enable(timer_irq()); |
| 149 | } |
| 150 | |
| 151 | static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) |
| 152 | { |
| 153 | __asm__ volatile("cpuid" |
| 154 | : "=b"(*ebx), "=c"(*ecx), "=d"(*edx) |
| 155 | : "a"(*eax), "c"(*ecx)); |
| 156 | } |
| 157 | |
Gerard Marull-Paretas | b1ced75 | 2021-11-04 12:51:39 +0100 | [diff] [blame] | 158 | static int sys_clock_driver_init(const struct device *dev) |
Andy Ross | 662b0bf | 2021-05-05 10:37:39 -0700 | [diff] [blame] | 159 | { |
| 160 | #ifdef CONFIG_ASSERT |
| 161 | uint32_t eax, ebx, ecx, edx; |
| 162 | |
| 163 | eax = 1; ecx = 0; |
| 164 | cpuid(&eax, &ebx, &ecx, &edx); |
| 165 | __ASSERT((ecx & BIT(24)) != 0, "No TSC Deadline support"); |
| 166 | |
| 167 | eax = 0x80000007; ecx = 0; |
| 168 | cpuid(&eax, &ebx, &ecx, &edx); |
| 169 | __ASSERT((edx & BIT(8)) != 0, "No Invariant TSC support"); |
| 170 | |
| 171 | eax = 7; ecx = 0; |
| 172 | cpuid(&eax, &ebx, &ecx, &edx); |
| 173 | __ASSERT((ebx & BIT(1)) != 0, "No TSC_ADJUST MSR support"); |
| 174 | #endif |
| 175 | |
| 176 | clear_tsc_adjust(); |
| 177 | |
| 178 | /* Timer interrupt number is runtime-fetched, so can't use |
| 179 | * static IRQ_CONNECT() |
| 180 | */ |
| 181 | irq_connect_dynamic(timer_irq(), CONFIG_APIC_TIMER_IRQ_PRIORITY, isr, 0, 0); |
| 182 | |
| 183 | lvt_reg.val = x86_read_loapic(LOAPIC_TIMER); |
| 184 | lvt_reg.lvt.mode = TSC_DEADLINE; |
| 185 | lvt_reg.lvt.masked = 0; |
| 186 | x86_write_loapic(LOAPIC_TIMER, lvt_reg.val); |
| 187 | |
| 188 | /* Per the SDM, the TSC_DEADLINE MSR is not serializing, so |
| 189 | * this fence is needed to be sure that an upcoming MSR write |
| 190 | * (i.e. a timeout we're about to set) cannot possibly reorder |
| 191 | * around the initialization we just did. |
| 192 | */ |
| 193 | __asm__ volatile("mfence" ::: "memory"); |
| 194 | |
| 195 | last_announce = rdtsc(); |
| 196 | irq_enable(timer_irq()); |
| 197 | |
| 198 | if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) { |
| 199 | sys_clock_set_timeout(1, false); |
| 200 | } |
| 201 | |
| 202 | return 0; |
| 203 | } |
Gerard Marull-Paretas | b1ced75 | 2021-11-04 12:51:39 +0100 | [diff] [blame] | 204 | |
| 205 | SYS_INIT(sys_clock_driver_init, PRE_KERNEL_2, |
| 206 | CONFIG_SYSTEM_CLOCK_INIT_PRIORITY); |