Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/usr.sbin/kgmon Improved the performance of kernel profiling ...
details: https://anonhg.NetBSD.org/src/rev/015fca9e3ff8
branches: trunk
changeset: 985232:015fca9e3ff8
user: ryo <ryo%NetBSD.org@localhost>
date: Sat Aug 14 17:51:18 2021 +0000
description:
Improved the performance of kernel profiling on MULTIPROCESSOR, and possible to get profiling data for each CPU.
In the current implementation, locks are acquired at the entrance of the mcount
internal function, so the higher the number of cores, the more lock conflict
occurs, making profiling performance in a MULTIPROCESSOR environment unusable
and slow. Profiling buffers has been changed to be reserved for each CPU,
improving profiling performance in MP by several to several dozen times.
- Eliminated cpu_simple_lock in mcount internal function, using per-CPU buffers.
- Add ci_gmon member to struct cpu_info of each MP arch.
- Add kern.profiling.percpu node in sysctl tree.
- Add new -c <cpuid> option to kgmon(8) to specify the cpuid, like openbsd.
For compatibility, if the -c option is not specified, the entire system can be
operated as before, and the -p option will get the total profiling data for
all CPUs.
diffstat:
common/lib/libc/gmon/mcount.c | 126 +++++++++++++++--
sys/arch/aarch64/include/cpu.h | 6 +-
sys/arch/alpha/include/cpu.h | 6 +-
sys/arch/arm/include/cpu.h | 7 +-
sys/arch/hppa/include/cpu.h | 7 +-
sys/arch/mips/include/cpu.h | 6 +-
sys/arch/or1k/include/cpu.h | 5 +-
sys/arch/powerpc/include/cpu.h | 6 +-
sys/arch/riscv/include/cpu.h | 5 +-
sys/arch/sparc/include/cpu.h | 7 +-
sys/arch/sparc64/include/cpu.h | 7 +-
sys/arch/vax/include/cpu.h | 6 +-
sys/arch/x86/include/cpu.h | 6 +-
sys/kern/kern_clock.c | 11 +-
sys/kern/subr_prof.c | 284 +++++++++++++++++++++++++++++++++++++++-
sys/sys/gmon.h | 3 +-
usr.sbin/kgmon/kgmon.8 | 23 +++-
usr.sbin/kgmon/kgmon.c | 188 ++++++++++++++++++--------
18 files changed, 608 insertions(+), 101 deletions(-)
diffs (truncated from 1322 to 300 lines):
diff -r 923a130d0126 -r 015fca9e3ff8 common/lib/libc/gmon/mcount.c
--- a/common/lib/libc/gmon/mcount.c Sat Aug 14 17:38:44 2021 +0000
+++ b/common/lib/libc/gmon/mcount.c Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $ */
+/* $NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $ */
/*
* Copyright (c) 2003, 2004 Wasabi Systems, Inc.
@@ -76,13 +76,14 @@
#if 0
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#else
-__RCSID("$NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $");
+__RCSID("$NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $");
#endif
#endif
#include <sys/param.h>
#include <sys/gmon.h>
#include <sys/lock.h>
+#include <sys/proc.h>
#ifndef _KERNEL
#include "reentrant.h"
@@ -94,10 +95,6 @@
struct gmonparam *_m_gmon_alloc(void);
#endif
-#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
-__cpu_simple_lock_t __mcount_lock;
-#endif
-
#ifndef __LINT__
_MCOUNT_DECL(u_long, u_long)
#ifdef _KERNEL
@@ -168,8 +165,11 @@
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
MCOUNT_ENTER;
#ifdef MULTIPROCESSOR
- __cpu_simple_lock(&__mcount_lock);
- __insn_barrier();
+ p = curcpu()->ci_gmon;
+ if (p == NULL || p->state != GMON_PROF_ON) {
+ MCOUNT_EXIT;
+ return;
+ }
#endif
#endif
p->state = GMON_PROF_BUSY;
@@ -264,10 +264,6 @@
done:
p->state = GMON_PROF_ON;
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
-#ifdef MULTIPROCESSOR
- __insn_barrier();
- __cpu_simple_unlock(&__mcount_lock);
-#endif
MCOUNT_EXIT;
#endif
return;
@@ -275,10 +271,6 @@
overflow:
p->state = GMON_PROF_ERROR;
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
-#ifdef MULTIPROCESSOR
- __insn_barrier();
- __cpu_simple_unlock(&__mcount_lock);
-#endif
MCOUNT_EXIT;
#endif
return;
@@ -293,4 +285,106 @@
MCOUNT
#endif
+#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
+void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
+
+void
+_gmonparam_merge(struct gmonparam *p, struct gmonparam *q)
+{
+ u_long fromindex;
+ u_short *frompcindex, qtoindex, toindex;
+ u_long selfpc;
+ u_long endfrom;
+ long count;
+ struct tostruct *top;
+ int i;
+
+ count = q->kcountsize / sizeof(*q->kcount);
+ for (i = 0; i < count; i++)
+ p->kcount[i] += q->kcount[i];
+
+ endfrom = (q->fromssize / sizeof(*q->froms));
+ for (fromindex = 0; fromindex < endfrom; fromindex++) {
+ if (q->froms[fromindex] == 0)
+ continue;
+ for (qtoindex = q->froms[fromindex]; qtoindex != 0;
+ qtoindex = q->tos[qtoindex].link) {
+ selfpc = q->tos[qtoindex].selfpc;
+ count = q->tos[qtoindex].count;
+ /* cribbed from mcount */
+ frompcindex = &p->froms[fromindex];
+ toindex = *frompcindex;
+ if (toindex == 0) {
+ /*
+ * first time traversing this arc
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ /* halt further profiling */
+ goto overflow;
+
+ *frompcindex = (u_short)toindex;
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = 0;
+ goto done;
+ }
+ top = &p->tos[(size_t)toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count+= count;
+ goto done;
+ }
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * we know it is not at the head of the chain.
+ */
+ for (; /* goto done */; ) {
+ if (top->link == 0) {
+ /*
+ * top is end of the chain and
+ * none of the chain had
+ * top->selfpc == selfpc. so
+ * we allocate a new tostruct
+ * and link it to the head of
+ * the chain.
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ goto overflow;
+
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = *frompcindex;
+ *frompcindex = (u_short)toindex;
+ goto done;
+ }
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ top = &p->tos[top->link];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * add to its count.
+ */
+ top->count += count;
+ goto done;
+ }
+ }
+
+ done: ;
+ }
+
+ }
+ overflow: ;
+
+}
+#endif
+
#endif /* (!_KERNEL || GPROF) && !_STANDALONE */
diff -r 923a130d0126 -r 015fca9e3ff8 sys/arch/aarch64/include/cpu.h
--- a/sys/arch/aarch64/include/cpu.h Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/aarch64/include/cpu.h Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.37 2021/08/08 19:28:08 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.38 2021/08/14 17:51:18 ryo Exp $ */
/*-
* Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
@@ -37,6 +37,7 @@
#ifdef __aarch64__
#ifdef _KERNEL_OPT
+#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
@@ -133,6 +134,9 @@
struct aarch64_cache_info *ci_cacheinfo;
struct aarch64_cpufuncs ci_cpufuncs;
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+ struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
+#endif
} __aligned(COHERENCY_UNIT);
#ifdef _KERNEL
diff -r 923a130d0126 -r 015fca9e3ff8 sys/arch/alpha/include/cpu.h
--- a/sys/arch/alpha/include/cpu.h Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/alpha/include/cpu.h Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.103 2021/07/22 01:39:18 thorpej Exp $ */
+/* $NetBSD: cpu.h,v 1.104 2021/08/14 17:51:18 ryo Exp $ */
/*-
* Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
@@ -72,6 +72,7 @@
#define _ALPHA_CPU_H_
#if defined(_KERNEL_OPT)
+#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
#endif
@@ -140,6 +141,9 @@
uint64_t ci_pcc_freq; /* cpu cycles/second */
struct trapframe *ci_db_regs; /* registers for debuggers */
u_int ci_nintrhand; /* # of interrupt handlers */
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+ struct gmonparam *ci_gmon; /* [MI] per-cpu GPROF */
+#endif
};
/* Ensure some cpu_info fields are within the signed 16-bit displacement. */
diff -r 923a130d0126 -r 015fca9e3ff8 sys/arch/arm/include/cpu.h
--- a/sys/arch/arm/include/cpu.h Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/arm/include/cpu.h Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.118 2021/08/08 19:28:08 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.119 2021/08/14 17:51:18 ryo Exp $ */
/*
* Copyright (c) 1994-1996 Mark Brinicombe.
@@ -92,6 +92,7 @@
*/
#if !defined(_MODULE) && defined(_KERNEL_OPT)
+#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_cpuoptions.h"
#include "opt_lockdebug.h"
@@ -223,6 +224,10 @@
struct arm_cache_info *
ci_cacheinfo;
+
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+ struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
+#endif
};
extern struct cpu_info cpu_info_store[];
diff -r 923a130d0126 -r 015fca9e3ff8 sys/arch/hppa/include/cpu.h
--- a/sys/arch/hppa/include/cpu.h Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/hppa/include/cpu.h Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.10 2020/04/16 09:28:52 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.11 2021/08/14 17:51:19 ryo Exp $ */
/* $OpenBSD: cpu.h,v 1.55 2008/07/23 17:39:35 kettenis Exp $ */
@@ -55,6 +55,7 @@
#ifdef _KERNEL_OPT
#include "opt_cputype.h"
+#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
@@ -300,7 +301,9 @@
struct cpu_softc *ci_softc;
#endif
-
+#if defined(GPROF) && defined(MULTIPROCESSOR)
+ struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
+#endif
#endif /* !_KMEMUSER */
} __aligned(64);
diff -r 923a130d0126 -r 015fca9e3ff8 sys/arch/mips/include/cpu.h
--- a/sys/arch/mips/include/cpu.h Sat Aug 14 17:38:44 2021 +0000
+++ b/sys/arch/mips/include/cpu.h Sat Aug 14 17:51:18 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.132 2021/03/29 01:47:45 simonb Exp $ */
+/* $NetBSD: cpu.h,v 1.133 2021/08/14 17:51:19 ryo Exp $ */
/*-
* Copyright (c) 1992, 1993
@@ -49,6 +49,7 @@
Home |
Main Index |
Thread Index |
Old Index