mirror of
https://git.hardenedbsd.org/hardenedbsd/HardenedBSD.git
synced 2024-11-18 17:00:49 +01:00
04f1835605
Eliminates vm_fault overhead on process startup and mmap referenced data for in-memory pages. (process startup time using in-memory segments *much* faster) 2) Even more efficient pmap code. Code partially cleaned up. More comments yet to follow. (generally more efficient pte management) 3) Pageout clustering ( in addition to the FreeBSD V1.1 pagein clustering.) (much faster paging performance on non-write behind disk subsystems, slightly faster performance on other systems.) 4) Slightly changed vm_pageout code for more efficiency and better statistics. Also, resist swapout a little more. (less likely to pageout a recently used page) 5) Slight improvement to the page table page trap efficiency. (generally faster system VM fault performance) 6) Defer creation of unnamed anonymous regions pager until needed. (speeds up shared memory bss creation) 7) Remove possible deadlock from swap_pager initialization. 8) Enhanced procfs to provide "vminfo" about vm objects and user pmaps. 9) Increased MCLSHIFT/MCLBYTES from 2K to 4K to improve net & socket performance and to prepare for things to come. John Dyson dyson@implode.root.com David Greenman davidg@root.com
700 lines
17 KiB
C
700 lines
17 KiB
C
/*-
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* the University of Utah, and William Jolitz.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* from: @(#)trap.c 7.4 (Berkeley) 5/13/91
|
|
* $Id: trap.c,v 1.17 1994/02/08 09:26:01 davidg Exp $
|
|
*/
|
|
|
|
/*
|
|
* 386 Trap and System call handleing
|
|
*/
|
|
|
|
#include "isa.h"
|
|
#include "npx.h"
|
|
#include "ddb.h"
|
|
#include "machine/cpu.h"
|
|
#include "machine/psl.h"
|
|
#include "machine/reg.h"
|
|
#include "machine/eflags.h"
|
|
|
|
#include "param.h"
|
|
#include "systm.h"
|
|
#include "proc.h"
|
|
#include "user.h"
|
|
#include "acct.h"
|
|
#include "kernel.h"
|
|
#ifdef KTRACE
|
|
#include "ktrace.h"
|
|
#endif
|
|
|
|
#include "vm/vm_param.h"
|
|
#include "vm/pmap.h"
|
|
#include "vm/vm_map.h"
|
|
#include "vm/vm_user.h"
|
|
#include "vm/vm_page.h"
|
|
#include "sys/vmmeter.h"
|
|
|
|
#include "machine/trap.h"
|
|
|
|
#ifdef __GNUC__
|
|
|
|
/*
|
|
* The "r" contraint could be "rm" except for fatal bugs in gas. As usual,
|
|
* we omit the size from the mov instruction to avoid nonfatal bugs in gas.
|
|
*/
|
|
#define read_gs() ({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
|
|
#define write_gs(newgs) __asm("mov %0,%%gs" : : "r" ((u_short) newgs))
|
|
|
|
#else /* not __GNUC__ */
|
|
|
|
u_short read_gs __P((void));
|
|
void write_gs __P((/* promoted u_short */ int gs));
|
|
|
|
#endif /* __GNUC__ */
|
|
|
|
extern int grow(struct proc *,int);
|
|
|
|
struct sysent sysent[];
|
|
int nsysent;
|
|
extern short cpl;
|
|
extern short netmask, ttymask, biomask;
|
|
|
|
#define MAX_TRAP_MSG 27
|
|
char *trap_msg[] = {
|
|
"reserved addressing fault", /* 0 T_RESADFLT */
|
|
"privileged instruction fault", /* 1 T_PRIVINFLT */
|
|
"reserved operand fault", /* 2 T_RESOPFLT */
|
|
"breakpoint instruction fault", /* 3 T_BPTFLT */
|
|
"", /* 4 unused */
|
|
"system call trap", /* 5 T_SYSCALL */
|
|
"arithmetic trap", /* 6 T_ARITHTRAP */
|
|
"system forced exception", /* 7 T_ASTFLT */
|
|
"segmentation (limit) fault", /* 8 T_SEGFLT */
|
|
"protection fault", /* 9 T_PROTFLT */
|
|
"trace trap", /* 10 T_TRCTRAP */
|
|
"", /* 11 unused */
|
|
"page fault", /* 12 T_PAGEFLT */
|
|
"page table fault", /* 13 T_TABLEFLT */
|
|
"alignment fault", /* 14 T_ALIGNFLT */
|
|
"kernel stack pointer not valid", /* 15 T_KSPNOTVAL */
|
|
"bus error", /* 16 T_BUSERR */
|
|
"kernel debugger fault", /* 17 T_KDBTRAP */
|
|
"integer divide fault", /* 18 T_DIVIDE */
|
|
"non-maskable interrupt trap", /* 19 T_NMI */
|
|
"overflow trap", /* 20 T_OFLOW */
|
|
"FPU bounds check fault", /* 21 T_BOUND */
|
|
"FPU device not available", /* 22 T_DNA */
|
|
"double fault", /* 23 T_DOUBLEFLT */
|
|
"FPU operand fetch fault", /* 24 T_FPOPFLT */
|
|
"invalid TSS fault", /* 25 T_TSSFLT */
|
|
"segment not present fault", /* 26 T_SEGNPFLT */
|
|
"stack fault", /* 27 T_STKFLT */
|
|
};
|
|
|
|
#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
|
|
|
|
/*
|
|
* trap(frame):
|
|
* Exception, fault, and trap interface to BSD kernel. This
|
|
* common code is called from assembly language IDT gate entry
|
|
* routines that prepare a suitable stack frame, and restore this
|
|
* frame after the exception has been processed. Note that the
|
|
* effect is as if the arguments were passed call by reference.
|
|
*/
|
|
|
|
/*ARGSUSED*/
|
|
void
|
|
trap(frame)
|
|
struct trapframe frame;
|
|
{
|
|
register int i;
|
|
register struct proc *p = curproc;
|
|
struct timeval syst;
|
|
int ucode, type, code, eva, fault_type;
|
|
|
|
frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */
|
|
type = frame.tf_trapno;
|
|
#if NDDB > 0
|
|
if (curpcb && curpcb->pcb_onfault) {
|
|
if (frame.tf_trapno == T_BPTFLT
|
|
|| frame.tf_trapno == T_TRCTRAP)
|
|
if (kdb_trap (type, 0, &frame))
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if (curpcb == 0 || curproc == 0)
|
|
goto skiptoswitch;
|
|
if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
|
|
extern int _udatasel;
|
|
|
|
if (read_gs() != (u_short) _udatasel)
|
|
/*
|
|
* Some user has corrupted %gs but we depend on it in
|
|
* copyout() etc. Fix it up and retry.
|
|
*
|
|
* (We don't preserve %fs or %gs, so users can change
|
|
* them to either _ucodesel, _udatasel or a not-present
|
|
* selector, possibly ORed with 0 to 3, making them
|
|
* volatile for other users. Not preserving them saves
|
|
* time and doesn't lose functionality or open security
|
|
* holes.)
|
|
*/
|
|
write_gs(_udatasel);
|
|
else
|
|
copyfault:
|
|
frame.tf_eip = (int)curpcb->pcb_onfault;
|
|
return;
|
|
}
|
|
|
|
syst = p->p_stime;
|
|
if (ISPL(frame.tf_cs) == SEL_UPL) {
|
|
type |= T_USER;
|
|
p->p_regs = (int *)&frame;
|
|
}
|
|
|
|
skiptoswitch:
|
|
ucode=0;
|
|
eva = rcr2();
|
|
code = frame.tf_err;
|
|
|
|
if ((type & ~T_USER) == T_PAGEFLT)
|
|
goto pfault;
|
|
|
|
switch (type) {
|
|
case T_SEGNPFLT|T_USER:
|
|
case T_STKFLT|T_USER:
|
|
case T_PROTFLT|T_USER: /* protection fault */
|
|
ucode = code + BUS_SEGM_FAULT ;
|
|
i = SIGBUS;
|
|
break;
|
|
|
|
case T_PRIVINFLT|T_USER: /* privileged instruction fault */
|
|
case T_RESADFLT|T_USER: /* reserved addressing fault */
|
|
case T_RESOPFLT|T_USER: /* reserved operand fault */
|
|
case T_FPOPFLT|T_USER: /* coprocessor operand fault */
|
|
ucode = type &~ T_USER;
|
|
i = SIGILL;
|
|
break;
|
|
|
|
case T_ASTFLT|T_USER: /* Allow process switch */
|
|
astoff();
|
|
cnt.v_soft++;
|
|
if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
|
|
addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
|
|
p->p_flag &= ~SOWEUPC;
|
|
}
|
|
goto out;
|
|
|
|
case T_DNA|T_USER:
|
|
#if NNPX > 0
|
|
/* if a transparent fault (due to context switch "late") */
|
|
if (npxdna()) return;
|
|
#endif /* NNPX > 0 */
|
|
#ifdef MATH_EMULATE
|
|
i = math_emulate(&frame);
|
|
if (i == 0) return;
|
|
#else /* MATH_EMULTATE */
|
|
panic("trap: math emulation necessary!");
|
|
#endif /* MATH_EMULTATE */
|
|
ucode = FPE_FPU_NP_TRAP;
|
|
break;
|
|
|
|
case T_BOUND|T_USER:
|
|
ucode = FPE_SUBRNG_TRAP;
|
|
i = SIGFPE;
|
|
break;
|
|
|
|
case T_OFLOW|T_USER:
|
|
ucode = FPE_INTOVF_TRAP;
|
|
i = SIGFPE;
|
|
break;
|
|
|
|
case T_DIVIDE|T_USER:
|
|
ucode = FPE_INTDIV_TRAP;
|
|
i = SIGFPE;
|
|
break;
|
|
|
|
case T_ARITHTRAP|T_USER:
|
|
ucode = code;
|
|
i = SIGFPE;
|
|
break;
|
|
|
|
pfault:
|
|
case T_PAGEFLT: /* allow page faults in kernel mode */
|
|
case T_PAGEFLT|T_USER: /* page fault */
|
|
{
|
|
vm_offset_t va;
|
|
struct vmspace *vm;
|
|
vm_map_t map = 0;
|
|
int rv = 0, oldflags;
|
|
vm_prot_t ftype;
|
|
unsigned nss, v;
|
|
extern vm_map_t kernel_map;
|
|
|
|
va = trunc_page((vm_offset_t)eva);
|
|
|
|
/*
|
|
* Don't allow user-mode faults in kernel address space
|
|
*/
|
|
if ((type == (T_PAGEFLT|T_USER)) && (va >= KERNBASE)) {
|
|
goto nogo;
|
|
}
|
|
|
|
if ((p == 0) || (type == T_PAGEFLT && va >= KERNBASE)) {
|
|
vm = 0;
|
|
map = kernel_map;
|
|
} else {
|
|
vm = p->p_vmspace;
|
|
map = &vm->vm_map;
|
|
}
|
|
|
|
if (code & PGEX_W)
|
|
ftype = VM_PROT_READ | VM_PROT_WRITE;
|
|
else
|
|
ftype = VM_PROT_READ;
|
|
|
|
oldflags = p->p_flag;
|
|
if (map != kernel_map) {
|
|
vm_offset_t pa;
|
|
vm_offset_t v = (vm_offset_t) vtopte(va);
|
|
|
|
/*
|
|
* Keep swapout from messing with us during this
|
|
* critical time.
|
|
*/
|
|
p->p_flag |= SLOCK;
|
|
|
|
/*
|
|
* Grow the stack if necessary
|
|
*/
|
|
if ((caddr_t)va > vm->vm_maxsaddr
|
|
&& (caddr_t)va < (caddr_t)USRSTACK) {
|
|
if (!grow(p, va)) {
|
|
rv = KERN_FAILURE;
|
|
p->p_flag &= ~SLOCK;
|
|
p->p_flag |= (oldflags & SLOCK);
|
|
goto nogo;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check if page table is mapped, if not,
|
|
* fault it first
|
|
*/
|
|
|
|
/* Fault the pte only if needed: */
|
|
*(volatile char *)v += 0;
|
|
|
|
vm_page_wire(pmap_pte_vm_page(vm_map_pmap(map),v));
|
|
|
|
/* Fault in the user page: */
|
|
rv = vm_fault(map, va, ftype, FALSE);
|
|
|
|
vm_page_unwire(pmap_pte_vm_page(vm_map_pmap(map),v));
|
|
|
|
p->p_flag &= ~SLOCK;
|
|
p->p_flag |= (oldflags & SLOCK);
|
|
} else {
|
|
/*
|
|
* Since we know that kernel virtual address addresses
|
|
* always have pte pages mapped, we just have to fault
|
|
* the page.
|
|
*/
|
|
rv = vm_fault(map, va, ftype, FALSE);
|
|
}
|
|
|
|
if (rv == KERN_SUCCESS) {
|
|
if (type == T_PAGEFLT)
|
|
return;
|
|
goto out;
|
|
}
|
|
nogo:
|
|
if (type == T_PAGEFLT) {
|
|
if (curpcb->pcb_onfault)
|
|
goto copyfault;
|
|
|
|
goto we_re_toast;
|
|
}
|
|
i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
|
|
|
|
/* kludge to pass faulting virtual address to sendsig */
|
|
ucode = type &~ T_USER;
|
|
frame.tf_err = eva;
|
|
|
|
break;
|
|
}
|
|
|
|
#if NDDB == 0
|
|
case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */
|
|
frame.tf_eflags &= ~PSL_T;
|
|
|
|
/* Q: how do we turn it on again? */
|
|
return;
|
|
#endif
|
|
|
|
case T_BPTFLT|T_USER: /* bpt instruction fault */
|
|
case T_TRCTRAP|T_USER: /* trace trap */
|
|
frame.tf_eflags &= ~PSL_T;
|
|
i = SIGTRAP;
|
|
break;
|
|
|
|
#if NISA > 0
|
|
case T_NMI:
|
|
case T_NMI|T_USER:
|
|
#if NDDB > 0
|
|
/* NMI can be hooked up to a pushbutton for debugging */
|
|
printf ("NMI ... going to debugger\n");
|
|
if (kdb_trap (type, 0, &frame))
|
|
return;
|
|
#endif
|
|
/* machine/parity/power fail/"kitchen sink" faults */
|
|
if (isa_nmi(code) == 0) return;
|
|
/* FALL THROUGH */
|
|
#endif
|
|
default:
|
|
we_re_toast:
|
|
|
|
fault_type = type & ~T_USER;
|
|
if (fault_type <= MAX_TRAP_MSG)
|
|
printf("\n\nFatal trap %d: %s while in %s mode\n",
|
|
fault_type, trap_msg[fault_type],
|
|
ISPL(frame.tf_cs) == SEL_UPL ? "user" : "kernel");
|
|
if (fault_type == T_PAGEFLT) {
|
|
printf("fault virtual address = 0x%x\n", eva);
|
|
printf("fault code = %s %s, %s\n",
|
|
code & PGEX_U ? "user" : "supervisor",
|
|
code & PGEX_W ? "write" : "read",
|
|
code & PGEX_P ? "protection violation" : "page not present");
|
|
}
|
|
printf("instruction pointer = 0x%x\n", frame.tf_eip);
|
|
printf("processor eflags = ");
|
|
if (frame.tf_eflags & EFL_TF)
|
|
printf("trace/trap, ");
|
|
if (frame.tf_eflags & EFL_IF)
|
|
printf("interrupt enabled, ");
|
|
if (frame.tf_eflags & EFL_NT)
|
|
printf("nested task, ");
|
|
if (frame.tf_eflags & EFL_RF)
|
|
printf("resume, ");
|
|
if (frame.tf_eflags & EFL_VM)
|
|
printf("vm86, ");
|
|
printf("IOPL = %d\n", (frame.tf_eflags & EFL_IOPL) >> 12);
|
|
printf("current process = ");
|
|
if (curproc) {
|
|
printf("%d (%s)\n",
|
|
curproc->p_pid, curproc->p_comm ?
|
|
curproc->p_comm : "");
|
|
} else {
|
|
printf("Idle\n");
|
|
}
|
|
printf("interrupt mask = ");
|
|
if ((cpl & netmask) == netmask)
|
|
printf("net ");
|
|
if ((cpl & ttymask) == ttymask)
|
|
printf("tty ");
|
|
if ((cpl & biomask) == biomask)
|
|
printf("bio ");
|
|
if (cpl == 0)
|
|
printf("none");
|
|
printf("\n");
|
|
|
|
#ifdef KDB
|
|
if (kdb_trap(&psl))
|
|
return;
|
|
#endif
|
|
#if NDDB > 0
|
|
if (kdb_trap (type, 0, &frame))
|
|
return;
|
|
#endif
|
|
if (fault_type <= MAX_TRAP_MSG)
|
|
panic(trap_msg[fault_type]);
|
|
else
|
|
panic("unknown/reserved trap");
|
|
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
trapsignal(p, i, ucode);
|
|
if ((type & T_USER) == 0)
|
|
return;
|
|
out:
|
|
while (i = CURSIG(p))
|
|
psig(i);
|
|
p->p_pri = p->p_usrpri;
|
|
if (want_resched) {
|
|
int s;
|
|
/*
|
|
* Since we are curproc, clock will normally just change
|
|
* our priority without moving us from one queue to another
|
|
* (since the running process is not on a queue.)
|
|
* If that happened after we setrq ourselves but before we
|
|
* swtch()'ed, we might not be on the queue indicated by
|
|
* our priority.
|
|
*/
|
|
s = splclock();
|
|
setrq(p);
|
|
p->p_stats->p_ru.ru_nivcsw++;
|
|
swtch();
|
|
splx(s);
|
|
while (i = CURSIG(p))
|
|
psig(i);
|
|
}
|
|
if (p->p_stats->p_prof.pr_scale) {
|
|
int ticks;
|
|
struct timeval *tv = &p->p_stime;
|
|
|
|
ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
|
|
(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
|
|
if (ticks) {
|
|
#ifdef PROFTIMER
|
|
extern int profscale;
|
|
addupc(frame.tf_eip, &p->p_stats->p_prof,
|
|
ticks * profscale);
|
|
#else
|
|
addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
|
|
#endif
|
|
}
|
|
}
|
|
curpri = p->p_pri;
|
|
}
|
|
|
|
/*
|
|
* Compensate for 386 brain damage (missing URKR).
|
|
* This is a little simpler than the pagefault handler in trap() because
|
|
* it the page tables have already been faulted in and high addresses
|
|
* are thrown out early for other reasons.
|
|
*/
|
|
int trapwrite(addr)
|
|
unsigned addr;
|
|
{
|
|
unsigned nss;
|
|
struct proc *p;
|
|
vm_offset_t va, v;
|
|
struct vmspace *vm;
|
|
int oldflags;
|
|
int rv;
|
|
|
|
va = trunc_page((vm_offset_t)addr);
|
|
/*
|
|
* XXX - MAX is END. Changed > to >= for temp. fix.
|
|
*/
|
|
if (va >= VM_MAXUSER_ADDRESS)
|
|
return (1);
|
|
|
|
p = curproc;
|
|
vm = p->p_vmspace;
|
|
|
|
oldflags = p->p_flag;
|
|
p->p_flag |= SLOCK;
|
|
|
|
if ((caddr_t)va >= vm->vm_maxsaddr
|
|
&& (caddr_t)va < (caddr_t)USRSTACK) {
|
|
if (!grow(p, va)) {
|
|
p->p_flag &= ~SLOCK;
|
|
p->p_flag |= (oldflags & SLOCK);
|
|
return (1);
|
|
}
|
|
}
|
|
|
|
v = trunc_page(vtopte(va));
|
|
|
|
/*
|
|
* wire the pte page
|
|
*/
|
|
if (va < USRSTACK) {
|
|
vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
|
|
}
|
|
|
|
/*
|
|
* fault the data page
|
|
*/
|
|
rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
|
|
|
|
/*
|
|
* unwire the pte page
|
|
*/
|
|
if (va < USRSTACK) {
|
|
vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
|
|
}
|
|
|
|
p->p_flag &= ~SLOCK;
|
|
p->p_flag |= (oldflags & SLOCK);
|
|
|
|
if (rv != KERN_SUCCESS)
|
|
return 1;
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* syscall(frame):
|
|
* System call request from POSIX system call gate interface to kernel.
|
|
* Like trap(), argument is call by reference.
|
|
*/
|
|
/*ARGSUSED*/
|
|
void
|
|
syscall(frame)
|
|
volatile struct trapframe frame;
|
|
{
|
|
register int *locr0 = ((int *)&frame);
|
|
register caddr_t params;
|
|
register int i;
|
|
register struct sysent *callp;
|
|
register struct proc *p = curproc;
|
|
struct timeval syst;
|
|
int error, opc;
|
|
int args[8], rval[2];
|
|
int code;
|
|
|
|
#ifdef lint
|
|
r0 = 0; r0 = r0; r1 = 0; r1 = r1;
|
|
#endif
|
|
syst = p->p_stime;
|
|
if (ISPL(frame.tf_cs) != SEL_UPL)
|
|
panic("syscall");
|
|
|
|
code = frame.tf_eax;
|
|
p->p_regs = (int *)&frame;
|
|
params = (caddr_t)frame.tf_esp + sizeof (int) ;
|
|
|
|
/*
|
|
* Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
|
|
*/
|
|
opc = frame.tf_eip - 7;
|
|
if (code == 0) {
|
|
code = fuword(params);
|
|
params += sizeof (int);
|
|
}
|
|
if (code < 0 || code >= nsysent)
|
|
callp = &sysent[0];
|
|
else
|
|
callp = &sysent[code];
|
|
|
|
if ((i = callp->sy_narg * sizeof (int)) &&
|
|
(error = copyin(params, (caddr_t)args, (u_int)i))) {
|
|
frame.tf_eax = error;
|
|
frame.tf_eflags |= PSL_C; /* carry bit */
|
|
#ifdef KTRACE
|
|
if (KTRPOINT(p, KTR_SYSCALL))
|
|
ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
|
|
#endif
|
|
goto done;
|
|
}
|
|
#ifdef KTRACE
|
|
if (KTRPOINT(p, KTR_SYSCALL))
|
|
ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
|
|
#endif
|
|
rval[0] = 0;
|
|
rval[1] = frame.tf_edx;
|
|
/*pg("%d. s %d\n", p->p_pid, code);*/
|
|
error = (*callp->sy_call)(p, args, rval);
|
|
if (error == ERESTART)
|
|
frame.tf_eip = opc;
|
|
else if (error != EJUSTRETURN) {
|
|
if (error) {
|
|
/*pg("error %d", error);*/
|
|
frame.tf_eax = error;
|
|
frame.tf_eflags |= PSL_C; /* carry bit */
|
|
} else {
|
|
frame.tf_eax = rval[0];
|
|
frame.tf_edx = rval[1];
|
|
frame.tf_eflags &= ~PSL_C; /* carry bit */
|
|
}
|
|
}
|
|
/* else if (error == EJUSTRETURN) */
|
|
/* nothing to do */
|
|
done:
|
|
/*
|
|
* Reinitialize proc pointer `p' as it may be different
|
|
* if this is a child returning from fork syscall.
|
|
*/
|
|
p = curproc;
|
|
while (i = CURSIG(p))
|
|
psig(i);
|
|
p->p_pri = p->p_usrpri;
|
|
if (want_resched) {
|
|
int s;
|
|
/*
|
|
* Since we are curproc, clock will normally just change
|
|
* our priority without moving us from one queue to another
|
|
* (since the running process is not on a queue.)
|
|
* If that happened after we setrq ourselves but before we
|
|
* swtch()'ed, we might not be on the queue indicated by
|
|
* our priority.
|
|
*/
|
|
s = splclock();
|
|
setrq(p);
|
|
p->p_stats->p_ru.ru_nivcsw++;
|
|
swtch();
|
|
splx(s);
|
|
while (i = CURSIG(p))
|
|
psig(i);
|
|
}
|
|
if (p->p_stats->p_prof.pr_scale) {
|
|
int ticks;
|
|
struct timeval *tv = &p->p_stime;
|
|
|
|
ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
|
|
(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
|
|
if (ticks) {
|
|
#ifdef PROFTIMER
|
|
extern int profscale;
|
|
addupc(frame.tf_eip, &p->p_stats->p_prof,
|
|
ticks * profscale);
|
|
#else
|
|
addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
|
|
#endif
|
|
}
|
|
}
|
|
curpri = p->p_pri;
|
|
#ifdef KTRACE
|
|
if (KTRPOINT(p, KTR_SYSRET))
|
|
ktrsysret(p->p_tracep, code, error, rval[0]);
|
|
#endif
|
|
#ifdef DIAGNOSTICx
|
|
{ extern int _udatasel, _ucodesel;
|
|
if (frame.tf_ss != _udatasel)
|
|
printf("ss %x call %d\n", frame.tf_ss, code);
|
|
if ((frame.tf_cs&0xffff) != _ucodesel)
|
|
printf("cs %x call %d\n", frame.tf_cs, code);
|
|
if (frame.tf_eip > VM_MAXUSER_ADDRESS) {
|
|
printf("eip %x call %d\n", frame.tf_eip, code);
|
|
frame.tf_eip = 0;
|
|
}
|
|
}
|
|
#endif
|
|
}
|