riscv: improve contended FPU switching We can leverage the FPU dirty state as an indicator for preemptively reloading the FPU content when a thread that did use the FPU before being scheduled out is scheduled back in. This avoids the FPU access trap overhead when switching between multiple threads with heavy FPU usage. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>

commit: a211970b42d618b588417daec647d2e15a3c1e52 [log] [tgz]
author: Nicolas Pitre <npitre@baylibre.com> Mon Jan 23 18:14:50 2023 -0500
committer: Carles Cufí <carles.cufi@nordicsemi.no> Tue Jan 24 15:26:18 2023 +0100
tree: b2d97781ce5dccd7ae85e8d6e5714a0c58ec0a8f
parent: 189fcbb3af5d1e3e34427537755f84f5254eeaff [diff]
diff --git a/arch/riscv/core/fpu.c b/arch/riscv/core/fpu.c
index 3d87538..8d38209 100644
--- a/arch/riscv/core/fpu.c
+++ b/arch/riscv/core/fpu.c

@@ -74,6 +74,22 @@
 	}
 }
 
+static void z_riscv_fpu_load(void)
+{
+	__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
+		 "must be called with IRQs disabled");
+	__ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
+		 "must be called with FPU access disabled");
+
+	/* become new owner */
+	atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
+
+	/* restore our content */
+	csr_set(mstatus, MSTATUS_FS_INIT);
+	z_riscv_fpu_restore(&_current->arch.saved_fp_context);
+	DBG("restore", _current);
+}
+
 /*
  * Flush FPU content and clear ownership. If the saved FPU state is "clean"
  * then we know the in-memory copy is up to date and skip the FPU content
@@ -101,6 +117,9 @@
 			z_riscv_fpu_save(&owner->arch.saved_fp_context);
 		}
 
+		/* dirty means active use */
+		owner->arch.fpu_recently_used = dirty;
+
 		/* disable FPU access */
 		csr_clear(mstatus, MSTATUS_FS);
 
@@ -217,16 +236,11 @@
 	flush_owned_fpu(_current);
 #endif
 
-	/* become new owner */
-	atomic_ptr_set(&_current_cpu->arch.fpu_owner, _current);
-
 	/* make it accessible and clean to the returning context */
 	esf->mstatus |= MSTATUS_FS_CLEAN;
 
-	/* restore our content */
-	csr_set(mstatus, MSTATUS_FS_INIT);
-	z_riscv_fpu_restore(&_current->arch.saved_fp_context);
-	DBG("restore", _current);
+	/* and load it with corresponding content */
+	z_riscv_fpu_load();
 }
 
 /*
@@ -244,7 +258,28 @@
 
 	if (_current->arch.exception_depth == exc_update_level) {
 		/* We're about to execute non-exception code */
-		return (_current_cpu->arch.fpu_owner == _current);
+		if (_current_cpu->arch.fpu_owner == _current) {
+			/* everything is already in place */
+			return true;
+		}
+		if (_current->arch.fpu_recently_used) {
+			/*
+			 * Before this thread was context-switched out,
+			 * it made active use of the FPU, but someone else
+			 * took it away in the mean time. Let's preemptively
+			 * claim it back to avoid the likely exception trap
+			 * to come otherwise.
+			 */
+			z_riscv_fpu_disable();
+			z_riscv_flush_local_fpu();
+#ifdef CONFIG_SMP
+			flush_owned_fpu(_current);
+#endif
+			z_riscv_fpu_load();
+			_current_cpu->arch.fpu_state = MSTATUS_FS_CLEAN;
+			return true;
+		}
+		return false;
 	}
 	/*
 	 * Any new exception level should always trap on FPU

diff --git a/include/zephyr/arch/riscv/thread.h b/include/zephyr/arch/riscv/thread.h
index 6ad4880..08f14b6 100644
--- a/include/zephyr/arch/riscv/thread.h
+++ b/include/zephyr/arch/riscv/thread.h

@@ -68,6 +68,7 @@
 struct _thread_arch {
 #ifdef CONFIG_FPU_SHARING
 	struct z_riscv_fp_context saved_fp_context;
+	bool fpu_recently_used;
 	uint8_t exception_depth;
 #endif
 #ifdef CONFIG_USERSPACE
commit	a211970b42d618b588417daec647d2e15a3c1e52	[log] [tgz]
author	Nicolas Pitre <npitre@baylibre.com>	Mon Jan 23 18:14:50 2023 -0500
committer	Carles Cufí <carles.cufi@nordicsemi.no>	Tue Jan 24 15:26:18 2023 +0100
tree	b2d97781ce5dccd7ae85e8d6e5714a0c58ec0a8f
parent	189fcbb3af5d1e3e34427537755f84f5254eeaff [diff]