kernel: mmu: collect more demand paging statistics

This adds more bits to gather statistics on demand paging,
e.g. clean vs dirty pages evicted, # page faults with
IRQ locked/unlocked, etc.

Also extends this to gather per-thread demand paging
statistics.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
diff --git a/arch/Kconfig b/arch/Kconfig
index cafae92..9021730 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -648,7 +648,7 @@
 	  implement a notion of "high" memory in Zephyr to work around physical
 	  RAM size larger than the defined bounds of the virtual address space.
 
-config DEMAND_PAGING
+menuconfig DEMAND_PAGING
 	bool "Enable demand paging [EXPERIMENTAL]"
 	depends on ARCH_HAS_DEMAND_PAGING
 	help
@@ -671,6 +671,25 @@
 	  If this option is disabled, the page fault servicing logic
 	  runs with interrupts disabled for the entire operation. However,
 	  ISRs may also page fault.
+
+config DEMAND_PAGING_STATS
+	bool "Gather Demand Paging Statistics"
+	help
+	  This enables gathering various statistics related to demand paging,
+	  e.g. number of pagefaults. This is useful for tuning eviction
+	  algorithms and optimizing backing store.
+
+	  Should say N in production system as this is not without cost.
+
+config DEMAND_PAGING_THREAD_STATS
+	bool "Gather per Thread Demand Paging Statistics"
+	depends on DEMAND_PAGING_STATS
+	help
+	  This enables gathering per thread statistics related to demand
+	  paging.
+
+	  Should say N in production system as this is not without cost.
+
 endif	# DEMAND_PAGING
 endif   # MMU
 
diff --git a/include/kernel/thread.h b/include/kernel/thread.h
index 6cdef33..b615fda 100644
--- a/include/kernel/thread.h
+++ b/include/kernel/thread.h
@@ -7,6 +7,10 @@
 #ifndef ZEPHYR_INCLUDE_KERNEL_THREAD_H_
 #define ZEPHYR_INCLUDE_KERNEL_THREAD_H_
 
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+#include <sys/mem_manage.h>
+#endif
+
 /**
  * @typedef k_thread_entry_t
  * @brief Thread entry point function type.
@@ -279,6 +283,11 @@
 	struct _thread_runtime_stats rt_stats;
 #endif
 
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+	/** Paging statistics */
+	struct k_mem_paging_stats_t paging_stats;
+#endif
+
 	/** arch-specifics: must always be at the end */
 	struct _thread_arch arch;
 };
diff --git a/include/sys/mem_manage.h b/include/sys/mem_manage.h
index 2d70cc2..27a7523 100644
--- a/include/sys/mem_manage.h
+++ b/include/sys/mem_manage.h
@@ -79,6 +79,34 @@
 #include <inttypes.h>
 #include <sys/__assert.h>
 
+struct k_mem_paging_stats_t {
+#ifdef CONFIG_DEMAND_PAGING_STATS
+	struct {
+		/** Number of page faults */
+		unsigned long			cnt;
+
+		/** Number of page faults with IRQ locked */
+		unsigned long			irq_locked;
+
+		/** Number of page faults with IRQ unlocked */
+		unsigned long			irq_unlocked;
+
+#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
+		/** Number of page faults while in ISR */
+		unsigned long			in_isr;
+#endif
+	} pagefaults;
+
+	struct {
+		/** Number of clean pages selected for eviction */
+		unsigned long			clean;
+
+		/** Number of dirty pages selected for eviction */
+		unsigned long			dirty;
+	} eviction;
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+};
+
 /* Just like Z_MEM_PHYS_ADDR() but with type safety and assertions */
 static inline uintptr_t z_mem_phys_addr(void *virt)
 {
@@ -349,6 +377,36 @@
 void k_mem_unpin(void *addr, size_t size);
 #endif /* CONFIG_DEMAND_PAGING */
 
+#ifdef CONFIG_DEMAND_PAGING_STATS
+/**
+ * Get the paging statistics since system startup
+ *
+ * This populates the paging statistics struct being passed in
+ * as argument.
+ *
+ * @param[in,out] stats Paging statistics struct to be filled.
+ */
+__syscall void k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats);
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+/**
+ * Get the paging statistics since system startup for a thread
+ *
+ * This populates the paging statistics struct being passed in
+ * as argument for a particular thread.
+ *
+ * @param[in] tid Thread ID
+ * @param[in,out] stats Paging statistics struct to be filled.
+ */
+__syscall
+void k_mem_paging_thread_stats_get(k_tid_t tid,
+				   struct k_mem_paging_stats_t *stats);
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
+
+#include <syscalls/mem_manage.h>
+
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt
index 46a0be3..107d1f2 100644
--- a/kernel/CMakeLists.txt
+++ b/kernel/CMakeLists.txt
@@ -33,6 +33,11 @@
      xip.c)
 endif()
 
+if(CONFIG_DEMAND_PAGING_STATS)
+list(APPEND kernel_files
+     paging/statistics.c)
+endif()
+
 add_library(kernel ${kernel_files})
 
 # Kernel files has the macro __ZEPHYR_SUPERVISOR__ set so that it
diff --git a/kernel/mmu.c b/kernel/mmu.c
index 6e936b1..99f347d 100644
--- a/kernel/mmu.c
+++ b/kernel/mmu.c
@@ -12,6 +12,7 @@
 #include <mmu.h>
 #include <init.h>
 #include <kernel_internal.h>
+#include <syscall_handler.h>
 #include <linker/linker-defs.h>
 #include <logging/log.h>
 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
@@ -563,7 +564,10 @@
 }
 
 #ifdef CONFIG_DEMAND_PAGING
-static unsigned long z_num_pagefaults;
+
+#ifdef CONFIG_DEMAND_PAGING_STATS
+struct k_mem_paging_stats_t paging_stats;
+#endif
 
 /* Current implementation relies on interrupt locking to any prevent page table
  * access, which falls over if other CPUs are active. Addressing this is not
@@ -786,6 +790,65 @@
 	return ret;
 }
 
+static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
+					   int key)
+{
+#ifdef CONFIG_DEMAND_PAGING_STATS
+	bool is_irq_unlocked = arch_irq_unlocked(key);
+
+	paging_stats.pagefaults.cnt++;
+
+	if (is_irq_unlocked) {
+		paging_stats.pagefaults.irq_unlocked++;
+	} else {
+		paging_stats.pagefaults.irq_locked++;
+	}
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+	faulting_thread->paging_stats.pagefaults.cnt++;
+
+	if (is_irq_unlocked) {
+		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
+	} else {
+		faulting_thread->paging_stats.pagefaults.irq_locked++;
+	}
+#else
+	ARG_UNUSED(faulting_thread);
+#endif
+
+#ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
+	if (k_is_in_isr()) {
+		paging_stats.pagefaults.in_isr++;
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+		faulting_thread->paging_stats.pagefaults.in_isr++;
+#endif
+	}
+#endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+}
+
+static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
+					     bool dirty)
+{
+#ifdef CONFIG_DEMAND_PAGING_STATS
+	if (dirty) {
+		paging_stats.eviction.dirty++;
+	} else {
+		paging_stats.eviction.clean++;
+	}
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+	if (dirty) {
+		faulting_thread->paging_stats.eviction.dirty++;
+	} else {
+		faulting_thread->paging_stats.eviction.clean++;
+	}
+#else
+	ARG_UNUSED(faulting_thread);
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
+#endif /* CONFIG_DEMAND_PAGING_STATS */
+}
+
 static bool do_page_fault(void *addr, bool pin)
 {
 	struct z_page_frame *pf;
@@ -794,6 +857,7 @@
 	enum arch_page_location status;
 	bool result;
 	bool dirty = false;
+	struct k_thread *faulting_thread = _current_cpu->current;
 
 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
 		 addr);
@@ -802,13 +866,7 @@
 
 	/*
 	 * TODO: Add performance accounting:
-	 * - Number of pagefaults
-	 *   * gathered on a per-thread basis:
-	 *     . Pagefaults with IRQs locked in faulting thread (bad)
-	 *     . Pagefaults with IRQs unlocked in faulting thread
-	 *   * Pagefaults in ISRs (if allowed)
 	 * - z_eviction_select() metrics
-	 *   * Clean vs dirty page eviction counts
 	 *   * execution time histogram
 	 *   * periodic timer execution time histogram (if implemented)
 	 * - z_backing_store_page_out() execution time histogram
@@ -853,6 +911,9 @@
 		goto out;
 	}
 	result = true;
+
+	paging_stats_faults_inc(faulting_thread, key);
+
 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
 		if (pin) {
 			/* It's a physical memory address */
@@ -874,6 +935,8 @@
 		__ASSERT(pf != NULL, "failed to get a page frame");
 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
 			z_page_frame_to_phys(pf));
+
+		paging_stats_eviction_inc(faulting_thread, dirty);
 	}
 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
 	__ASSERT(ret == 0, "failed to prepare page frame");
@@ -946,30 +1009,7 @@
 
 bool z_page_fault(void *addr)
 {
-	bool ret;
-
-	ret = do_page_fault(addr, false);
-	if (ret) {
-		/* Wasn't an error, increment page fault count */
-		int key;
-
-		key = irq_lock();
-		z_num_pagefaults++;
-		irq_unlock(key);
-	}
-	return ret;
-}
-
-unsigned long z_num_pagefaults_get(void)
-{
-	unsigned long ret;
-	int key;
-
-	key = irq_lock();
-	ret = z_num_pagefaults;
-	irq_unlock(key);
-
-	return ret;
+	return do_page_fault(addr, false);
 }
 
 static void do_mem_unpin(void *addr)
@@ -995,4 +1035,5 @@
 		 addr);
 	virt_region_foreach(addr, size, do_mem_unpin);
 }
+
 #endif /* CONFIG_DEMAND_PAGING */
diff --git a/kernel/paging/statistics.c b/kernel/paging/statistics.c
new file mode 100644
index 0000000..cc0a909
--- /dev/null
+++ b/kernel/paging/statistics.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <kernel.h>
+#include <kernel_internal.h>
+#include <syscall_handler.h>
+#include <toolchain.h>
+#include <sys/mem_manage.h>
+
+extern struct k_mem_paging_stats_t paging_stats;
+
+unsigned long z_num_pagefaults_get(void)
+{
+	unsigned long ret;
+	int key;
+
+	key = irq_lock();
+	ret = paging_stats.pagefaults.cnt;
+	irq_unlock(key);
+
+	return ret;
+}
+
+void z_impl_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats)
+{
+	if (stats == NULL) {
+		return;
+	}
+
+	/* Copy statistics */
+	memcpy(stats, &paging_stats, sizeof(paging_stats));
+}
+
+#ifdef CONFIG_USERSPACE
+static inline
+void z_vrfy_k_mem_paging_stats_get(struct k_mem_paging_stats_t *stats)
+{
+	Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats)));
+	z_impl_k_mem_paging_stats_get(stats);
+}
+#include <syscalls/k_mem_paging_stats_get_mrsh.c>
+#endif /* CONFIG_USERSPACE */
+
+#ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
+void z_impl_k_mem_paging_thread_stats_get(k_tid_t tid,
+					  struct k_mem_paging_stats_t *stats)
+{
+	if ((tid == NULL) || (stats == NULL)) {
+		return;
+	}
+
+	/* Copy statistics */
+	memcpy(stats, &tid->paging_stats, sizeof(tid->paging_stats));
+}
+
+#ifdef CONFIG_USERSPACE
+static inline
+void z_vrfy_k_mem_paging_thread_stats_get(k_tid_t tid,
+					  struct k_mem_paging_stats_t *stats)
+{
+	Z_OOPS(Z_SYSCALL_OBJ(tid, K_OBJ_THREAD));
+	Z_OOPS(Z_SYSCALL_MEMORY_WRITE(stats, sizeof(*stats)));
+	z_impl_k_mem_paging_thread_stats_get(tid, stats);
+}
+#include <syscalls/k_mem_paging_thread_stats_get_mrsh.c>
+#endif /* CONFIG_USERSPACE */
+
+#endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
diff --git a/tests/kernel/mem_protect/demand_paging/prj.conf b/tests/kernel/mem_protect/demand_paging/prj.conf
index 9467c29..06c2fde 100644
--- a/tests/kernel/mem_protect/demand_paging/prj.conf
+++ b/tests/kernel/mem_protect/demand_paging/prj.conf
@@ -1 +1,2 @@
 CONFIG_ZTEST=y
+CONFIG_DEMAND_PAGING_STATS=y