Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch x86: Add kthread_fpu_enter/exit support, take two.
details: https://anonhg.NetBSD.org/src/rev/b5cb4674fcae
branches: trunk
changeset: 373749:b5cb4674fcae
user: riastradh <riastradh%NetBSD.org@localhost>
date: Sat Feb 25 18:04:42 2023 +0000
description:
x86: Add kthread_fpu_enter/exit support, take two.
This time, make sure to restore the FPU state when switching to a
kthread in the middle of kthread_fpu_enter/exit.
This adds a single predicted-taken branch for the case of kthreads
that are not in kthread_fpu_enter/exit, so it incurs a penalty only
for threads that actually use it. Since it avoids FPU state
switching in kthreads that do use the FPU, namely cgd worker threads,
this should be a net performance win on systems using it and have
negligible impact otherwise.
XXX pullup-10
diffstat:
sys/arch/amd64/amd64/genassym.cf | 3 +-
sys/arch/amd64/amd64/locore.S | 19 ++++++++-
sys/arch/x86/x86/fpu.c | 82 +++++++++++++++++++++++++++++++++------
3 files changed, 87 insertions(+), 17 deletions(-)
diffs (231 lines):
diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/amd64/amd64/genassym.cf
--- a/sys/arch/amd64/amd64/genassym.cf Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.93 2022/12/27 08:40:40 msaitoh Exp $
+# $NetBSD: genassym.cf,v 1.94 2023/02/25 18:04:42 riastradh Exp $
#
# Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -166,6 +166,7 @@
define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending)
define LW_SYSTEM LW_SYSTEM
+define LW_SYSTEM_FPU LW_SYSTEM_FPU
define MDL_IRET MDL_IRET
define MDL_COMPAT32 MDL_COMPAT32
define MDL_FPU_IN_CPU MDL_FPU_IN_CPU
diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/amd64/amd64/locore.S Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: locore.S,v 1.215 2022/12/26 17:46:00 christos Exp $ */
+/* $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $ */
/*
* Copyright-o-rama!
@@ -1247,7 +1247,7 @@
/* Don't bother with the rest if switching to a system process. */
testl $LW_SYSTEM,L_FLAG(%r12)
- jnz .Lswitch_return
+ jnz .Lswitch_system
/* Is this process using RAS (restartable atomic sequences)? */
movq L_PROC(%r12),%rdi
@@ -1336,6 +1336,21 @@
popq %r12
popq %rbx
ret
+
+.Lswitch_system:
+ /*
+ * If it has LWP_SYSTEM_FPU set, meaning it's running in
+ * kthread_fpu_enter/exit, we need to restore the FPU state
+ * and enable FPU instructions with fpu_handle_deferred.
+ *
+ * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU --
+ * fpu_switch guarantees it is clear, so we can just call
+ * fpu_handle_deferred unconditionally.
+ */
+ testl $LW_SYSTEM_FPU,L_FLAG(%r12)
+ jz .Lswitch_return
+ callq _C_LABEL(fpu_handle_deferred)
+ jmp .Lswitch_return
END(cpu_switchto)
/*
diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/x86/x86/fpu.c
--- a/sys/arch/x86/x86/fpu.c Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/x86/x86/fpu.c Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $ */
+/* $NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $ */
/*
* Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All
@@ -96,7 +96,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $");
#include "opt_multiprocessor.h"
@@ -107,6 +107,7 @@
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/sysctl.h>
#include <sys/xcall.h>
@@ -131,13 +132,35 @@
uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
+/*
+ * True if this a thread that is allowed to use the FPU -- either a
+ * user thread, or a system thread with LW_SYSTEM_FPU enabled.
+ */
+static inline bool
+lwp_can_haz_fpu(struct lwp *l)
+{
+
+ return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM;
+}
+
+/*
+ * True if this is a system thread with its own private FPU state.
+ */
+static inline bool
+lwp_system_fpu_p(struct lwp *l)
+{
+
+ return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
+ (LW_SYSTEM|LW_SYSTEM_FPU);
+}
+
static inline union savefpu *
fpu_lwp_area(struct lwp *l)
{
struct pcb *pcb = lwp_getpcb(l);
union savefpu *area = &pcb->pcb_savefpu;
- KASSERT((l->l_flag & LW_SYSTEM) == 0);
+ KASSERT(lwp_can_haz_fpu(l));
if (l == curlwp) {
fpu_save();
}
@@ -155,7 +178,7 @@
s = splvm();
if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
- KASSERT((l->l_flag & LW_SYSTEM) == 0);
+ KASSERT(lwp_can_haz_fpu(l));
fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
}
@@ -314,7 +337,7 @@
cpu_index(ci), ci->ci_ilevel);
if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
- KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
+ KASSERT(lwp_can_haz_fpu(oldlwp));
pcb = lwp_getpcb(oldlwp);
fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
!(oldlwp->l_proc->p_flag & PK_32));
@@ -330,11 +353,11 @@
union savefpu *fpu_save;
/* Kernel threads have no FPU. */
- if (__predict_false(l2->l_flag & LW_SYSTEM)) {
+ if (__predict_false(!lwp_can_haz_fpu(l2))) {
return;
}
/* For init(8). */
- if (__predict_false(l1->l_flag & LW_SYSTEM)) {
+ if (__predict_false(!lwp_can_haz_fpu(l1))) {
memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
return;
}
@@ -358,6 +381,13 @@
/* -------------------------------------------------------------------------- */
+static const union savefpu safe_fpu __aligned(64) = {
+ .sv_xmm = {
+ .fx_mxcsr = __SAFE_MXCSR__,
+ },
+};
+static const union savefpu zero_fpu __aligned(64);
+
/*
* fpu_kern_enter()
*
@@ -373,15 +403,15 @@
void
fpu_kern_enter(void)
{
- static const union savefpu safe_fpu __aligned(64) = {
- .sv_xmm = {
- .fx_mxcsr = __SAFE_MXCSR__,
- },
- };
struct lwp *l = curlwp;
struct cpu_info *ci;
int s;
+ if (lwp_system_fpu_p(l) && !cpu_intr_p()) {
+ KASSERT(!cpu_softintr_p());
+ return;
+ }
+
s = splvm();
ci = curcpu();
@@ -427,10 +457,16 @@
void
fpu_kern_leave(void)
{
- static const union savefpu zero_fpu __aligned(64);
- struct cpu_info *ci = curcpu();
+ struct cpu_info *ci;
int s;
+ if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
+ KASSERT(!cpu_softintr_p());
+ return;
+ }
+
+ ci = curcpu();
+
#if 0
/*
* Can't assert this because if the caller holds a spin lock at
@@ -459,6 +495,24 @@
splx(s);
}
+void
+kthread_fpu_enter_md(void)
+{
+
+ /* Enable the FPU by clearing CR0_TS, and enter a safe FPU state. */
+ clts();
+ fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
+}
+
+void
+kthread_fpu_exit_md(void)
+{
+
+ /* Zero the FPU state and disable the FPU by setting CR0_TS. */
+ fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
+ stts();
+}
+
/* -------------------------------------------------------------------------- */
/*
Home |
Main Index |
Thread Index |
Old Index