305 lines
6.8 KiB
C
305 lines
6.8 KiB
C
/* $OpenBSD: kern_smr.c,v 1.16 2022/08/14 01:58:27 jsg Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 2019-2020 Visa Hankala
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/percpu.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/smr.h>
|
|
#include <sys/time.h>
|
|
#include <sys/tracepoint.h>
|
|
#include <sys/witness.h>
|
|
|
|
#include <machine/cpu.h>
|
|
|
|
#define SMR_PAUSE 100 /* pause between rounds in msec */
|
|
|
|
void smr_dispatch(struct schedstate_percpu *);
|
|
void smr_grace_wait(void);
|
|
void smr_thread(void *);
|
|
void smr_wakeup(void *);
|
|
|
|
struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
|
|
struct smr_entry_list smr_deferred;
|
|
struct timeout smr_wakeup_tmo;
|
|
unsigned int smr_expedite;
|
|
unsigned int smr_ndeferred;
|
|
unsigned char smr_grace_period;
|
|
|
|
#ifdef WITNESS
|
|
static const char smr_lock_name[] = "smr";
|
|
struct lock_object smr_lock_obj = {
|
|
.lo_name = smr_lock_name,
|
|
.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
|
|
(LO_CLASS_RWLOCK << LO_CLASSSHIFT)
|
|
};
|
|
struct lock_type smr_lock_type = {
|
|
.lt_name = smr_lock_name
|
|
};
|
|
#endif
|
|
|
|
static inline int
|
|
smr_cpu_is_idle(struct cpu_info *ci)
|
|
{
|
|
return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
|
|
}
|
|
|
|
void
|
|
smr_startup(void)
|
|
{
|
|
SIMPLEQ_INIT(&smr_deferred);
|
|
WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
|
|
timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
|
|
}
|
|
|
|
void
|
|
smr_startup_thread(void)
|
|
{
|
|
if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
|
|
panic("could not create smr thread");
|
|
}
|
|
|
|
struct timeval smr_logintvl = { 300, 0 };
|
|
|
|
void
|
|
smr_thread(void *arg)
|
|
{
|
|
struct timeval elapsed, end, loglast, start;
|
|
struct smr_entry_list deferred;
|
|
struct smr_entry *smr;
|
|
unsigned long count;
|
|
|
|
KERNEL_ASSERT_LOCKED();
|
|
KERNEL_UNLOCK();
|
|
|
|
memset(&loglast, 0, sizeof(loglast));
|
|
SIMPLEQ_INIT(&deferred);
|
|
|
|
for (;;) {
|
|
mtx_enter(&smr_lock);
|
|
if (smr_ndeferred == 0) {
|
|
while (smr_ndeferred == 0)
|
|
msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
|
|
"bored", INFSLP);
|
|
} else {
|
|
if (smr_expedite == 0)
|
|
msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
|
|
"pause", MSEC_TO_NSEC(SMR_PAUSE));
|
|
}
|
|
|
|
SIMPLEQ_CONCAT(&deferred, &smr_deferred);
|
|
smr_ndeferred = 0;
|
|
smr_expedite = 0;
|
|
mtx_leave(&smr_lock);
|
|
|
|
getmicrouptime(&start);
|
|
|
|
smr_grace_wait();
|
|
|
|
WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
|
|
WITNESS_LOCK(&smr_lock_obj, 0);
|
|
|
|
count = 0;
|
|
while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
|
|
SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
|
|
TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
|
|
smr->smr_func(smr->smr_arg);
|
|
count++;
|
|
}
|
|
|
|
WITNESS_UNLOCK(&smr_lock_obj, 0);
|
|
|
|
getmicrouptime(&end);
|
|
timersub(&end, &start, &elapsed);
|
|
if (elapsed.tv_sec >= 2 &&
|
|
ratecheck(&loglast, &smr_logintvl)) {
|
|
printf("smr: dispatch took %ld.%06lds\n",
|
|
(long)elapsed.tv_sec,
|
|
(long)elapsed.tv_usec);
|
|
}
|
|
TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Announce next grace period and wait until all CPUs have entered it
|
|
* by crossing quiescent state.
|
|
*/
|
|
void
|
|
smr_grace_wait(void)
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
unsigned char smrgp;
|
|
|
|
smrgp = READ_ONCE(smr_grace_period) + 1;
|
|
WRITE_ONCE(smr_grace_period, smrgp);
|
|
|
|
curcpu()->ci_schedstate.spc_smrgp = smrgp;
|
|
|
|
CPU_INFO_FOREACH(cii, ci) {
|
|
if (!CPU_IS_RUNNING(ci))
|
|
continue;
|
|
if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
|
|
continue;
|
|
sched_peg_curproc(ci);
|
|
KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
|
|
}
|
|
atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
|
|
#endif /* MULTIPROCESSOR */
|
|
}
|
|
|
|
void
|
|
smr_wakeup(void *arg)
|
|
{
|
|
TRACEPOINT(smr, wakeup, NULL);
|
|
wakeup(&smr_ndeferred);
|
|
}
|
|
|
|
void
|
|
smr_read_enter(void)
|
|
{
|
|
#ifdef DIAGNOSTIC
|
|
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
|
|
|
|
spc->spc_smrdepth++;
|
|
#endif
|
|
}
|
|
|
|
void
|
|
smr_read_leave(void)
|
|
{
|
|
#ifdef DIAGNOSTIC
|
|
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
|
|
|
|
KASSERT(spc->spc_smrdepth > 0);
|
|
spc->spc_smrdepth--;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Move SMR entries from the local queue to the system-wide queue.
|
|
*/
|
|
void
|
|
smr_dispatch(struct schedstate_percpu *spc)
|
|
{
|
|
int expedite = 0, wake = 0;
|
|
|
|
mtx_enter(&smr_lock);
|
|
if (smr_ndeferred == 0)
|
|
wake = 1;
|
|
SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
|
|
smr_ndeferred += spc->spc_ndeferred;
|
|
spc->spc_ndeferred = 0;
|
|
smr_expedite |= spc->spc_smrexpedite;
|
|
spc->spc_smrexpedite = 0;
|
|
expedite = smr_expedite;
|
|
mtx_leave(&smr_lock);
|
|
|
|
if (expedite)
|
|
smr_wakeup(NULL);
|
|
else if (wake)
|
|
timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
|
|
}
|
|
|
|
/*
|
|
* Signal that the current CPU is in quiescent state.
|
|
*/
|
|
void
|
|
smr_idle(void)
|
|
{
|
|
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
|
|
unsigned char smrgp;
|
|
|
|
SMR_ASSERT_NONCRITICAL();
|
|
|
|
if (spc->spc_ndeferred > 0)
|
|
smr_dispatch(spc);
|
|
|
|
/*
|
|
* Update this CPU's view of the system's grace period.
|
|
* The update must become visible after any preceding reads
|
|
* of SMR-protected data.
|
|
*/
|
|
smrgp = READ_ONCE(smr_grace_period);
|
|
if (__predict_false(spc->spc_smrgp != smrgp)) {
|
|
membar_exit();
|
|
WRITE_ONCE(spc->spc_smrgp, smrgp);
|
|
}
|
|
}
|
|
|
|
void
|
|
smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
|
|
int expedite)
|
|
{
|
|
struct cpu_info *ci = curcpu();
|
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
|
int s;
|
|
|
|
KASSERT(smr->smr_func == NULL);
|
|
|
|
smr->smr_func = func;
|
|
smr->smr_arg = arg;
|
|
|
|
s = splhigh();
|
|
SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
|
|
spc->spc_ndeferred++;
|
|
spc->spc_smrexpedite |= expedite;
|
|
splx(s);
|
|
TRACEPOINT(smr, call, func, arg, expedite);
|
|
|
|
/*
|
|
* If this call was made from an interrupt context that
|
|
* preempted idle state, dispatch the local queue to the shared
|
|
* queue immediately.
|
|
* The entries would linger in the local queue long if the CPU
|
|
* went to sleep without calling smr_idle().
|
|
*/
|
|
if (smr_cpu_is_idle(ci))
|
|
smr_dispatch(spc);
|
|
}
|
|
|
|
void
|
|
smr_barrier_func(void *arg)
|
|
{
|
|
struct cond *c = arg;
|
|
|
|
cond_signal(c);
|
|
}
|
|
|
|
void
|
|
smr_barrier_impl(int expedite)
|
|
{
|
|
struct cond c = COND_INITIALIZER();
|
|
struct smr_entry smr;
|
|
|
|
if (panicstr != NULL || db_active)
|
|
return;
|
|
|
|
WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
|
|
|
|
TRACEPOINT(smr, barrier_enter, expedite);
|
|
smr_init(&smr);
|
|
smr_call_impl(&smr, smr_barrier_func, &c, expedite);
|
|
cond_wait(&c, "smrbar");
|
|
TRACEPOINT(smr, barrier_exit, expedite);
|
|
}
|