riscv: improve contended FPU switching
We can leverage the FPU dirty state as an indicator for preemptively
reloading the FPU content when a thread that did use the FPU before
being scheduled out is scheduled back in. This avoids the FPU access
trap overhead when switching between multiple threads with heavy FPU
usage.
Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
diff --git a/arch/riscv/core/fpu.c b/arch/riscv/core/fpu.c
index 3d87538..8d38209 100644
--- a/arch/riscv/core/fpu.c
+++ b/arch/riscv/core/fpu.c
@@ -74,6 +74,22 @@
}
}
+static void z_riscv_fpu_load(void)
+{
+ __ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
+ "must be called with IRQs disabled");
+ __ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
+ "must be called with FPU access disabled");
+
+ /* become new owner */
+ atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
+
+ /* restore our content */
+ csr_set(mstatus, MSTATUS_FS_INIT);
+ z_riscv_fpu_restore(&_current->arch.saved_fp_context);
+ DBG("restore", _current);
+}
+
/*
* Flush FPU content and clear ownership. If the saved FPU state is "clean"
* then we know the in-memory copy is up to date and skip the FPU content
@@ -101,6 +117,9 @@
z_riscv_fpu_save(&owner->arch.saved_fp_context);
}
+ /* dirty means active use */
+ owner->arch.fpu_recently_used = dirty;
+
/* disable FPU access */
csr_clear(mstatus, MSTATUS_FS);
@@ -217,16 +236,11 @@
flush_owned_fpu(_current);
#endif
- /* become new owner */
- atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
-
/* make it accessible and clean to the returning context */
esf->mstatus |= MSTATUS_FS_CLEAN;
- /* restore our content */
- csr_set(mstatus, MSTATUS_FS_INIT);
- z_riscv_fpu_restore(&_current->arch.saved_fp_context);
- DBG("restore", _current);
+ /* and load it with corresponding content */
+ z_riscv_fpu_load();
}
/*
@@ -244,7 +258,28 @@
if (_current->arch.exception_depth == exc_update_level) {
/* We're about to execute non-exception code */
- return (_current_cpu->arch.fpu_owner == _current);
+ if (_current_cpu->arch.fpu_owner == _current) {
+ /* everything is already in place */
+ return true;
+ }
+ if (_current->arch.fpu_recently_used) {
+ /*
+ * Before this thread was context-switched out,
+ * it made active use of the FPU, but someone else
+ * took it away in the mean time. Let's preemptively
+ * claim it back to avoid the likely exception trap
+ * to come otherwise.
+ */
+ z_riscv_fpu_disable();
+ z_riscv_flush_local_fpu();
+#ifdef CONFIG_SMP
+ flush_owned_fpu(_current);
+#endif
+ z_riscv_fpu_load();
+ _current_cpu->arch.fpu_state = MSTATUS_FS_CLEAN;
+ return true;
+ }
+ return false;
}
/*
* Any new exception level should always trap on FPU
diff --git a/include/zephyr/arch/riscv/thread.h b/include/zephyr/arch/riscv/thread.h
index 6ad4880..08f14b6 100644
--- a/include/zephyr/arch/riscv/thread.h
+++ b/include/zephyr/arch/riscv/thread.h
@@ -68,6 +68,7 @@
struct _thread_arch {
#ifdef CONFIG_FPU_SHARING
struct z_riscv_fp_context saved_fp_context;
+ bool fpu_recently_used;
uint8_t exception_depth;
#endif
#ifdef CONFIG_USERSPACE