HardenedBSD/sys/kern/kern_thr.c

271 lines
6.0 KiB
C
Raw Normal View History

/*
* Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sysent.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/ucontext.h>
#include <sys/thr.h>
#include <machine/frame.h>
/*
* Back end support functions.
*/
void
thr_exit1(void)
{
struct ksegrp *kg;
struct thread *td;
struct kse *ke;
struct proc *p;
td = curthread;
p = td->td_proc;
kg = td->td_ksegrp;
ke = td->td_kse;
mtx_assert(&sched_lock, MA_OWNED);
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
/*
* Shutting down last thread in the proc. This will actually
* call exit() in the trampoline when it returns.
*/
if (p->p_numthreads == 1) {
PROC_UNLOCK(p);
return;
}
/*
* XXX Undelivered process wide signals should be reposted to the
* proc.
*/
/* Clean up cpu resources. */
cpu_thread_exit(td);
/* XXX make thread_unlink() */
TAILQ_REMOVE(&p->p_threads, td, td_plist);
p->p_numthreads--;
TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
kg->kg_numthreads--;
ke->ke_state = KES_UNQUEUED;
ke->ke_thread = NULL;
kse_unlink(ke);
sched_exit_kse(TAILQ_NEXT(ke, ke_kglist), ke);
/*
* If we were stopped while waiting for all threads to exit and this
* is the last thread wakeup the exiting thread.
*/
if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE)
if (p->p_numthreads == 1)
thread_unsuspend_one(p->p_singlethread);
PROC_UNLOCK(p);
td->td_kse = NULL;
td->td_state = TDS_INACTIVE;
#if 0
td->td_proc = NULL;
#endif
td->td_ksegrp = NULL;
td->td_last_kse = NULL;
sched_exit_thread(TAILQ_NEXT(td, td_kglist), td);
thread_stash(td);
Revamp of the syscall path, exception and context handling. The prime objectives are: o Implement a syscall path based on the epc inststruction (see sys/ia64/ia64/syscall.s). o Revisit the places were we need to save and restore registers and define those contexts in terms of the register sets (see sys/ia64/include/_regset.h). Secundairy objectives: o Remove the requirement to use contigmalloc for kernel stacks. o Better handling of the high FP registers for SMP systems. o Switch to the new cpu_switch() and cpu_throw() semantics. o Add a good unwinder to reconstruct contexts for the rare cases we need to (see sys/contrib/ia64/libuwx) Many files are affected by this change. Functionally it boils down to: o The EPC syscall doesn't preserve registers it does not need to preserve and places the arguments differently on the stack. This affects libc and truss. o The address of the kernel page directory (kptdir) had to be unstaticized for use by the nested TLB fault handler. The name has been changed to ia64_kptdir to avoid conflicts. The renaming affects libkvm. o The trapframe only contains the special registers and the scratch registers. For syscalls using the EPC syscall path no scratch registers are saved. This affects all places where the trapframe is accessed. Most notably the unaligned access handler, the signal delivery code and the debugger. o Context switching only partly saves the special registers and the preserved registers. This affects cpu_switch() and triggered the move to the new semantics, which additionally affects cpu_throw(). o The high FP registers are either in the PCB or on some CPU. context switching for them is done lazily. This affects trap(). o The mcontext has room for all registers, but not all of them have to be defined in all cases. This mostly affects signal delivery code now. The *context syscalls are as of yet still unimplemented. Many details went into the removal of the requirement to use contigmalloc for kernel stacks. The details are mostly CPU specific and limited to exception_save() and exception_restore(). The few places where we create, destroy or switch stacks were mostly simplified by not having to construct physical addresses and additionally saving the virtual addresses for later use. Besides more efficient context saving and restoring, which of course yields a noticable speedup, this also fixes the dreaded SMP bootup problem as a side-effect. The details of which are still not fully understood. This change includes all the necessary backward compatibility code to have it handle older userland binaries that use the break instruction for syscalls. Support for break-based syscalls has been pessimized in favor of a clean implementation. Due to the overall better performance of the kernel, this will still be notived as an improvement if it's noticed at all. Approved by: re@ (jhb)
2003-05-16 23:26:42 +02:00
#if !defined(__alpha__) && !defined(__powerpc__)
Commit a partial lazy thread switch mechanism for i386. it isn't as lazy as it could be and can do with some more cleanup. Currently its under options LAZY_SWITCH. What this does is avoid %cr3 reloads for short context switches that do not involve another user process. ie: we can take an interrupt, switch to a kthread and return to the user without explicitly flushing the tlb. However, this isn't as exciting as it could be, the interrupt overhead is still high and too much blocks on Giant still. There are some debug sysctls, for stats and for an on/off switch. The main problem with doing this has been "what if the process that you're running on exits while we're borrowing its address space?" - in this case we use an IPI to give it a kick when we're about to reclaim the pmap. Its not compiled in unless you add the LAZY_SWITCH option. I want to fix a few more things and get some more feedback before turning it on by default. This is NOT a replacement for Bosko's lazy interrupt stuff. This was more meant for the kthread case, while his was for interrupts. Mine helps a little for interrupts, but his helps a lot more. The stats are enabled with options SWTCH_OPTIM_STATS - this has been a pseudo-option for years, I just added a bunch of stuff to it. One non-trivial change was to select a new thread before calling cpu_switch() in the first place. This allows us to catch the silly case of doing a cpu_switch() to the current process. This happens uncomfortably often. This simplifies a bit of the asm code in cpu_switch (no longer have to call choosethread() in the middle). This has been implemented on i386 and (thanks to jake) sparc64. The others will come soon. This is actually seperate to the lazy switch stuff. Glanced at by: jake, jhb
2003-04-03 01:53:30 +02:00
cpu_throw(td, choosethread());
#else
cpu_throw();
Commit a partial lazy thread switch mechanism for i386. it isn't as lazy as it could be and can do with some more cleanup. Currently its under options LAZY_SWITCH. What this does is avoid %cr3 reloads for short context switches that do not involve another user process. ie: we can take an interrupt, switch to a kthread and return to the user without explicitly flushing the tlb. However, this isn't as exciting as it could be, the interrupt overhead is still high and too much blocks on Giant still. There are some debug sysctls, for stats and for an on/off switch. The main problem with doing this has been "what if the process that you're running on exits while we're borrowing its address space?" - in this case we use an IPI to give it a kick when we're about to reclaim the pmap. Its not compiled in unless you add the LAZY_SWITCH option. I want to fix a few more things and get some more feedback before turning it on by default. This is NOT a replacement for Bosko's lazy interrupt stuff. This was more meant for the kthread case, while his was for interrupts. Mine helps a little for interrupts, but his helps a lot more. The stats are enabled with options SWTCH_OPTIM_STATS - this has been a pseudo-option for years, I just added a bunch of stuff to it. One non-trivial change was to select a new thread before calling cpu_switch() in the first place. This allows us to catch the silly case of doing a cpu_switch() to the current process. This happens uncomfortably often. This simplifies a bit of the asm code in cpu_switch (no longer have to call choosethread() in the middle). This has been implemented on i386 and (thanks to jake) sparc64. The others will come soon. This is actually seperate to the lazy switch stuff. Glanced at by: jake, jhb
2003-04-03 01:53:30 +02:00
#endif
}
#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
/*
* System call interface.
*/
int
thr_create(struct thread *td, struct thr_create_args *uap)
/* ucontext_t *ctx, thr_id_t *id, int flags */
{
struct kse *ke0;
struct thread *td0;
ucontext_t ctx;
int error;
if ((error = copyin(uap->ctx, &ctx, sizeof(ctx))))
return (error);
/* Initialize our td. */
td0 = thread_alloc();
/*
* Try the copyout as soon as we allocate the td so we don't have to
* tear things down in a failure case below.
*/
if ((error = copyout(&td0, uap->id, sizeof(thr_id_t)))) {
thread_free(td0);
return (error);
}
bzero(&td0->td_startzero,
(unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
bcopy(&td->td_startcopy, &td0->td_startcopy,
(unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
td0->td_proc = td->td_proc;
2003-04-18 00:21:57 +02:00
PROC_LOCK(td->td_proc);
td0->td_sigmask = td->td_sigmask;
2003-04-18 00:21:57 +02:00
PROC_UNLOCK(td->td_proc);
td0->td_ucred = crhold(td->td_ucred);
/* Initialize our kse structure. */
ke0 = kse_alloc();
bzero(&ke0->ke_startzero,
RANGEOF(struct kse, ke_startzero, ke_endzero));
/* Set up our machine context. */
cpu_set_upcall(td0, td);
error = set_mcontext(td0, &ctx.uc_mcontext);
if (error != 0) {
kse_free(ke0);
thread_free(td0);
goto out;
}
/* Link the thread and kse into the ksegrp and make it runnable. */
mtx_lock_spin(&sched_lock);
thread_link(td0, td->td_ksegrp);
kse_link(ke0, td->td_ksegrp);
/* Bind this thread and kse together. */
td0->td_kse = ke0;
ke0->ke_thread = td0;
sched_fork_kse(td->td_kse, ke0);
sched_fork_thread(td, td0);
TD_SET_CAN_RUN(td0);
if ((uap->flags & THR_SUSPENDED) == 0)
setrunqueue(td0);
mtx_unlock_spin(&sched_lock);
out:
return (error);
}
int
thr_self(struct thread *td, struct thr_self_args *uap)
/* thr_id_t *id */
{
int error;
if ((error = copyout(&td, uap->id, sizeof(thr_id_t))))
return (error);
return (0);
}
int
thr_exit(struct thread *td, struct thr_exit_args *uap)
/* NULL */
{
struct proc *p;
p = td->td_proc;
PROC_LOCK(p);
mtx_lock_spin(&sched_lock);
/*
* This unlocks proc and doesn't return unless this is the last
* thread.
*/
thr_exit1();
mtx_unlock_spin(&sched_lock);
return (0);
}
int
thr_kill(struct thread *td, struct thr_kill_args *uap)
/* thr_id_t id, int sig */
{
struct thread *ttd;
struct proc *p;
int error;
p = td->td_proc;
error = 0;
PROC_LOCK(p);
FOREACH_THREAD_IN_PROC(p, ttd)
if (ttd == uap->id)
break;
if (ttd == NULL) {
error = ESRCH;
goto out;
}
if (uap->sig == 0)
goto out;
if (!_SIG_VALID(uap->sig)) {
error = EINVAL;
goto out;
}
/*
* We need a way to force this to go into this thread's siglist.
* Until then blocked signals will go to the proc.
*/
tdsignal(ttd, uap->sig);
out:
PROC_UNLOCK(p);
return (error);
}