src/sys/kern/subr_prof.c

388 lines
9.9 KiB
C

/* $OpenBSD: subr_prof.c,v 1.41 2024/01/24 19:23:38 cheloha Exp $ */
/* $NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $ */
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/clockintr.h>
#include <sys/pledge.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/user.h>
uint64_t profclock_period;
#if defined(GPROF) || defined(DDBPROF)
#include <sys/malloc.h>
#include <sys/gmon.h>
#include <uvm/uvm_extern.h>
#include <machine/db_machdep.h>
#include <ddb/db_extern.h>
/*
* Flag to prevent CPUs from executing the mcount() monitor function
* until we're sure they are in a sane state.
*/
int gmoninit = 0;
u_int gmon_cpu_count; /* [K] number of CPUs with profiling enabled */
extern char etext[];
void gmonclock(struct clockrequest *, void *, void *);
void
prof_init(void)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
struct gmonparam *p;
u_long lowpc, highpc, textsize;
u_long kcountsize, fromssize, tossize;
long tolimit;
char *cp;
int size;
/*
* Round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
*/
lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
textsize = highpc - lowpc;
#ifdef GPROF
printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
textsize, lowpc, highpc);
#endif
kcountsize = textsize / HISTFRACTION;
fromssize = textsize / HASHFRACTION;
tolimit = textsize * ARCDENSITY / 100;
if (tolimit < MINARCS)
tolimit = MINARCS;
else if (tolimit > MAXARCS)
tolimit = MAXARCS;
tossize = tolimit * sizeof(struct tostruct);
size = sizeof(*p) + kcountsize + fromssize + tossize;
/* Allocate and initialize one profiling buffer per CPU. */
CPU_INFO_FOREACH(cii, ci) {
cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait);
if (cp == NULL) {
printf("No memory for profiling.\n");
return;
}
clockintr_bind(&ci->ci_gmonclock, ci, gmonclock, NULL);
clockintr_stagger(&ci->ci_gmonclock, profclock_period,
CPU_INFO_UNIT(ci), MAXCPUS);
p = (struct gmonparam *)cp;
cp += sizeof(*p);
p->tos = (struct tostruct *)cp;
cp += tossize;
p->kcount = (u_short *)cp;
cp += kcountsize;
p->froms = (u_short *)cp;
p->state = GMON_PROF_OFF;
p->lowpc = lowpc;
p->highpc = highpc;
p->textsize = textsize;
p->hashfraction = HASHFRACTION;
p->kcountsize = kcountsize;
p->fromssize = fromssize;
p->tolimit = tolimit;
p->tossize = tossize;
ci->ci_gmon = p;
}
}
int
prof_state_toggle(struct cpu_info *ci, int oldstate)
{
struct gmonparam *gp = ci->ci_gmon;
int error = 0;
KERNEL_ASSERT_LOCKED();
if (gp->state == oldstate)
return (0);
switch (gp->state) {
case GMON_PROF_ON:
#if !defined(GPROF)
/*
* If this is not a profiling kernel, we need to patch
* all symbols that can be instrumented.
*/
error = db_prof_enable();
#endif
if (error == 0) {
if (++gmon_cpu_count == 1)
startprofclock(&process0);
clockintr_advance(&ci->ci_gmonclock, profclock_period);
}
break;
default:
error = EINVAL;
gp->state = GMON_PROF_OFF;
/* FALLTHROUGH */
case GMON_PROF_OFF:
clockintr_cancel(&ci->ci_gmonclock);
if (--gmon_cpu_count == 0)
stopprofclock(&process0);
#if !defined(GPROF)
db_prof_disable();
#endif
break;
}
return (error);
}
/*
* Return kernel profiling information.
*/
int
sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
struct gmonparam *gp = NULL;
int error, cpuid, op, state;
/* all sysctl names at this level are name and field */
if (namelen != 2)
return (ENOTDIR); /* overloaded */
op = name[0];
cpuid = name[1];
CPU_INFO_FOREACH(cii, ci) {
if (cpuid == CPU_INFO_UNIT(ci)) {
gp = ci->ci_gmon;
break;
}
}
if (gp == NULL)
return (EOPNOTSUPP);
/* Assume that if we're here it is safe to execute profiling. */
gmoninit = 1;
switch (op) {
case GPROF_STATE:
state = gp->state;
error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
if (error)
return (error);
return prof_state_toggle(ci, state);
case GPROF_COUNT:
return (sysctl_struct(oldp, oldlenp, newp, newlen,
gp->kcount, gp->kcountsize));
case GPROF_FROMS:
return (sysctl_struct(oldp, oldlenp, newp, newlen,
gp->froms, gp->fromssize));
case GPROF_TOS:
return (sysctl_struct(oldp, oldlenp, newp, newlen,
gp->tos, gp->tossize));
case GPROF_GMONPARAM:
return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
default:
return (EOPNOTSUPP);
}
/* NOTREACHED */
}
void
gmonclock(struct clockrequest *cr, void *cf, void *arg)
{
uint64_t count;
struct clockframe *frame = cf;
struct gmonparam *g = curcpu()->ci_gmon;
u_long i;
count = clockrequest_advance(cr, profclock_period);
if (count > ULONG_MAX)
count = ULONG_MAX;
/*
* Kernel statistics are just like addupc_intr(), only easier.
*/
if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) {
i = CLKF_PC(frame) - g->lowpc;
if (i < g->textsize) {
i /= HISTFRACTION * sizeof(*g->kcount);
g->kcount[i] += (u_long)count;
}
}
}
#endif /* GPROF || DDBPROF */
/*
* Profiling system call.
*
* The scale factor is a fixed point number with 16 bits of fraction, so that
* 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
*/
int
sys_profil(struct proc *p, void *v, register_t *retval)
{
struct sys_profil_args /* {
syscallarg(caddr_t) samples;
syscallarg(size_t) size;
syscallarg(u_long) offset;
syscallarg(u_int) scale;
} */ *uap = v;
struct process *pr = p->p_p;
struct uprof *upp;
int error, s;
error = pledge_profil(p, SCARG(uap, scale));
if (error)
return error;
if (SCARG(uap, scale) > (1 << 16))
return (EINVAL);
if (SCARG(uap, scale) == 0) {
stopprofclock(pr);
need_resched(curcpu());
return (0);
}
upp = &pr->ps_prof;
/* Block profile interrupts while changing state. */
s = splstatclock();
upp->pr_off = SCARG(uap, offset);
upp->pr_scale = SCARG(uap, scale);
upp->pr_base = (caddr_t)SCARG(uap, samples);
upp->pr_size = SCARG(uap, size);
startprofclock(pr);
splx(s);
need_resched(curcpu());
return (0);
}
void
profclock(struct clockrequest *cr, void *cf, void *arg)
{
uint64_t count;
struct clockframe *frame = cf;
struct proc *p = curproc;
count = clockrequest_advance(cr, profclock_period);
if (count > ULONG_MAX)
count = ULONG_MAX;
if (CLKF_USERMODE(frame)) {
if (ISSET(p->p_p->ps_flags, PS_PROFIL))
addupc_intr(p, CLKF_PC(frame), (u_long)count);
} else {
if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL))
addupc_intr(p, PROC_PC(p), (u_long)count);
}
}
/*
* Scale is a fixed-point number with the binary point 16 bits
* into the value, and is <= 1.0. pc is at most 32 bits, so the
* intermediate result is at most 48 bits.
*/
#define PC_TO_INDEX(pc, prof) \
((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
(u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
/*
* Collect user-level profiling statistics; called on a profiling tick,
* when a process is running in user-mode. This routine may be called
* from an interrupt context. Schedule an AST that will vector us to
* trap() with a context in which copyin and copyout will work.
* Trap will then call addupc_task().
*/
void
addupc_intr(struct proc *p, u_long pc, u_long nticks)
{
struct uprof *prof;
prof = &p->p_p->ps_prof;
if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size)
return; /* out of range; ignore */
p->p_prof_addr = pc;
p->p_prof_ticks += nticks;
atomic_setbits_int(&p->p_flag, P_OWEUPC);
need_proftick(p);
}
/*
* Much like before, but we can afford to take faults here. If the
* update fails, we simply turn off profiling.
*/
void
addupc_task(struct proc *p, u_long pc, u_int nticks)
{
struct process *pr = p->p_p;
struct uprof *prof;
caddr_t addr;
u_int i;
u_short v;
/* Testing PS_PROFIL may be unnecessary, but is certainly safe. */
if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0)
return;
prof = &pr->ps_prof;
if (pc < prof->pr_off ||
(i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
return;
addr = prof->pr_base + i;
if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
v += nticks;
if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
return;
}
stopprofclock(pr);
}