kernel: mmu: collect more demand paging statistics
This adds more bits to gather statistics on demand paging,
e.g. clean vs dirty pages evicted, # page faults with
IRQ locked/unlocked, etc.
Also extends this to gather per-thread demand paging
statistics.
Signed-off-by: Daniel Leung <daniel.leung@intel.com>
diff --git a/arch/Kconfig b/arch/Kconfig
index cafae92..9021730 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -648,7 +648,7 @@
implement a notion of "high" memory in Zephyr to work around physical
RAM size larger than the defined bounds of the virtual address space.
-config DEMAND_PAGING
+menuconfig DEMAND_PAGING
bool "Enable demand paging [EXPERIMENTAL]"
depends on ARCH_HAS_DEMAND_PAGING
help
@@ -671,6 +671,25 @@
If this option is disabled, the page fault servicing logic
runs with interrupts disabled for the entire operation. However,
ISRs may also page fault.
+
+config DEMAND_PAGING_STATS
+ bool "Gather Demand Paging Statistics"
+ help
+ This enables gathering various statistics related to demand paging,
+ e.g. number of pagefaults. This is useful for tuning eviction
+ algorithms and optimizing backing store.
+
+ Should say N in production system as this is not without cost.
+
+config DEMAND_PAGING_THREAD_STATS
+ bool "Gather per Thread Demand Paging Statistics"
+ depends on DEMAND_PAGING_STATS
+ help
+ This enables gathering per thread statistics related to demand
+ paging.
+
+ Should say N in production system as this is not without cost.
+
endif # DEMAND_PAGING
endif # MMU
diff --git a/include/kernel/thread.h b/include/kernel/thread.h
index 6cdef33..b615fda 100644
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@@ -7,6 +7,10 @@
#ifndef ZEPHYR_INCLUDE_KERNEL_THREAD_H_
#define ZEPHYR_INCLUDE_KERNEL_THREAD_H_
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+#include <sys/mem_manage.h>
+#endif
+
/**
* @typedef k_thread_entry_t
* @brief Thread entry point function type.
@@ -279,6 +283,11 @@
struct _thread_runtime_stats rt_stats;
#endif
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+ /** Paging statistics */
+ struct k_mem_paging_stats_t paging_stats;
+#endif
+
/** arch-specifics: must always be at the end */
struct _thread_arch arch;
};
diff --git a/include/sys/mem_manage.h b/include/sys/mem_manage.h
index 2d70cc2..27a7523 100644
--- a/include/sys/mem_manage.h
+++ b/include/sys/mem_manage.h
@@ -79,6 +79,34 @@
#include <inttypes.h>
#include <sys/__assert.h>
+struct k_mem_paging_stats_t {
+#ifdef CONFIG_DEMAND_PAGING_STATS
+ struct {
+ /** Number of page faults */
+ unsigned long cnt;
+
+ /** Number of page faults with IRQ locked */
+ unsigned long irq_locked;
+
+ /** Number of page faults with IRQ unlocked */
+ unsigned long irq_unlocked;
+
+#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
+ /** Number of page faults while in ISR */
+ unsigned long in_isr;
+#endif
+ } pagefaults;
+
+ struct {
+ /** Number of clean pages selected for eviction */
+ unsigned long clean;
+
+ /** Number of dirty pages selected for eviction */
+ unsigned long dirty;
+ } eviction;
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+};
+
/* Just like Z_MEM_PHYS_ADDR() but with type safety and assertions */
static inline uintptr_t z_mem_phys_addr(void *virt)
{
@@ -349,6 +377,36 @@
void k_mem_unpin(void *addr, size_t size);
#endif /* CONFIG_DEMAND_PAGING */
+#ifdef CONFIG_DEMAND_PAGING_STATS
+/**
+ * Get the paging statistics since system startup
+ *
+ * This populates the paging statistics struct being passed in
+ * as argument.
+ *
+ * @param[in,out] stats Paging statistics struct to be filled.
+ */
+__syscall void k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats);
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+/**
+ * Get the paging statistics since system startup for a thread
+ *
+ * This populates the paging statistics struct being passed in
+ * as argument for a particular thread.
+ *
+ * @param[in] tid Thread ID
+ * @param[in,out] stats Paging statistics struct to be filled.
+ */
+__syscall
+void k_mem_paging_thread_stats_get(k_tid_t tid,
+ struct k_mem_paging_stats_t *stats);
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
+
+#include <syscalls/mem_manage.h>
+
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+
#ifdef __cplusplus
}
#endif
diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt
index 46a0be3..107d1f2 100644
--- a/kernel/CMakeLists.txt
+++ b/kernel/CMakeLists.txt
@@ -33,6 +33,11 @@
xip.c)
endif()
+if(CONFIG_DEMAND_PAGING_STATS)
+list(APPEND kernel_files
+ paging/statistics.c)
+endif()
+
add_library(kernel ${kernel_files})
# Kernel files has the macro __ZEPHYR_SUPERVISOR__ set so that it
diff --git a/kernel/mmu.c b/kernel/mmu.c
index 6e936b1..99f347d 100644
--- a/kernel/mmu.c
+++ b/kernel/mmu.c
@@ -12,6 +12,7 @@
#include <mmu.h>
#include <init.h>
#include <kernel_internal.h>
+#include <syscall_handler.h>
#include <linker/linker-defs.h>
#include <logging/log.h>
LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
@@ -563,7 +564,10 @@
}
#ifdef CONFIG_DEMAND_PAGING
-static unsigned long z_num_pagefaults;
+
+#ifdef CONFIG_DEMAND_PAGING_STATS
+struct k_mem_paging_stats_t paging_stats;
+#endif
/* Current implementation relies on interrupt locking to any prevent page table
* access, which falls over if other CPUs are active. Addressing this is not
@@ -786,6 +790,65 @@
return ret;
}
+static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
+ int key)
+{
+#ifdef CONFIG_DEMAND_PAGING_STATS
+ bool is_irq_unlocked = arch_irq_unlocked(key);
+
+ paging_stats.pagefaults.cnt++;
+
+ if (is_irq_unlocked) {
+ paging_stats.pagefaults.irq_unlocked++;
+ } else {
+ paging_stats.pagefaults.irq_locked++;
+ }
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+ faulting_thread->paging_stats.pagefaults.cnt++;
+
+ if (is_irq_unlocked) {
+ faulting_thread->paging_stats.pagefaults.irq_unlocked++;
+ } else {
+ faulting_thread->paging_stats.pagefaults.irq_locked++;
+ }
+#else
+ ARG_UNUSED(faulting_thread);
+#endif
+
+#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
+ if (k_is_in_isr()) {
+ paging_stats.pagefaults.in_isr++;
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+ faulting_thread->paging_stats.pagefaults.in_isr++;
+#endif
+ }
+#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+}
+
+static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
+ bool dirty)
+{
+#ifdef CONFIG_DEMAND_PAGING_STATS
+ if (dirty) {
+ paging_stats.eviction.dirty++;
+ } else {
+ paging_stats.eviction.clean++;
+ }
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+ if (dirty) {
+ faulting_thread->paging_stats.eviction.dirty++;
+ } else {
+ faulting_thread->paging_stats.eviction.clean++;
+ }
+#else
+ ARG_UNUSED(faulting_thread);
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+}
+
static bool do_page_fault(void *addr, bool pin)
{
struct z_page_frame *pf;
@@ -794,6 +857,7 @@
enum arch_page_location status;
bool result;
bool dirty = false;
+ struct k_thread *faulting_thread = _current_cpu->current;
__ASSERT(page_frames_initialized, "page fault at %p happened too early",
addr);
@@ -802,13 +866,7 @@
/*
* TODO: Add performance accounting:
- * - Number of pagefaults
- * * gathered on a per-thread basis:
- * . Pagefaults with IRQs locked in faulting thread (bad)
- * . Pagefaults with IRQs unlocked in faulting thread
- * * Pagefaults in ISRs (if allowed)
* - z_eviction_select() metrics
- * * Clean vs dirty page eviction counts
* * execution time histogram
* * periodic timer execution time histogram (if implemented)
* - z_backing_store_page_out() execution time histogram
@@ -853,6 +911,9 @@
goto out;
}
result = true;
+
+ paging_stats_faults_inc(faulting_thread, key);
+
if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
if (pin) {
/* It's a physical memory address */
@@ -874,6 +935,8 @@
__ASSERT(pf != NULL, "failed to get a page frame");
LOG_DBG("evicting %p at 0x%lx", pf->addr,
z_page_frame_to_phys(pf));
+
+ paging_stats_eviction_inc(faulting_thread, dirty);
}
ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
__ASSERT(ret == 0, "failed to prepare page frame");
@@ -946,30 +1009,7 @@
bool z_page_fault(void *addr)
{
- bool ret;
-
- ret = do_page_fault(addr, false);
- if (ret) {
- /* Wasn't an error, increment page fault count */
- int key;
-
- key = irq_lock();
- z_num_pagefaults++;
- irq_unlock(key);
- }
- return ret;
-}
-
-unsigned long z_num_pagefaults_get(void)
-{
- unsigned long ret;
- int key;
-
- key = irq_lock();
- ret = z_num_pagefaults;
- irq_unlock(key);
-
- return ret;
+ return do_page_fault(addr, false);
}
static void do_mem_unpin(void *addr)
@@ -995,4 +1035,5 @@
addr);
virt_region_foreach(addr, size, do_mem_unpin);
}
+
#endif /* CONFIG_DEMAND_PAGING */
diff --git a/kernel/paging/statistics.c b/kernel/paging/statistics.c
new file mode 100644
index 0000000..cc0a909
--- /dev/null
+++ b/kernel/paging/statistics.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <kernel.h>
+#include <kernel_internal.h>
+#include <syscall_handler.h>
+#include <toolchain.h>
+#include <sys/mem_manage.h>
+
+extern struct k_mem_paging_stats_t paging_stats;
+
+unsigned long z_num_pagefaults_get(void)
+{
+ unsigned long ret;
+ int key;
+
+ key = irq_lock();
+ ret = paging_stats.pagefaults.cnt;
+ irq_unlock(key);
+
+ return ret;
+}
+
+void z_impl_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats)
+{
+ if (stats == NULL) {
+ return;
+ }
+
+ /* Copy statistics */
+ memcpy(stats, &paging_stats, sizeof(paging_stats));
+}
+
+#ifdef CONFIG_USERSPACE
+static inline
+void z_vrfy_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats)
+{
+ Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats)));
+ z_impl_k_mem_paging_stats_get(stats);
+}
+#include <syscalls/k_mem_paging_stats_get_mrsh.c>
+#endif /* CONFIG_USERSPACE */
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+void z_impl_k_mem_paging_thread_stats_get(k_tid_t tid,
+ struct k_mem_paging_stats_t *stats)
+{
+ if ((tid == NULL) || (stats == NULL)) {
+ return;
+ }
+
+ /* Copy statistics */
+ memcpy(stats, &tid->paging_stats, sizeof(tid->paging_stats));
+}
+
+#ifdef CONFIG_USERSPACE
+static inline
+void z_vrfy_k_mem_paging_thread_stats_get(k_tid_t tid,
+ struct k_mem_paging_stats_t *stats)
+{
+ Z_OOPS(Z_SYSCALL_OBJ(tid, K_OBJ_THREAD));
+ Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats)));
+ z_impl_k_mem_paging_thread_stats_get(tid, stats);
+}
+#include <syscalls/k_mem_paging_thread_stats_get_mrsh.c>
+#endif /* CONFIG_USERSPACE */
+
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
diff --git a/tests/kernel/mem_protect/demand_paging/prj.conf b/tests/kernel/mem_protect/demand_paging/prj.conf
index 9467c29..06c2fde 100644
--- a/tests/kernel/mem_protect/demand_paging/prj.conf
+++ b/tests/kernel/mem_protect/demand_paging/prj.conf
@@ -1 +1,2 @@
CONFIG_ZTEST=y
+CONFIG_DEMAND_PAGING_STATS=y