diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c index 3a2e2750c377..ddf082b5b204 100644 --- a/sys/compat/svr4/svr4_stream.c +++ b/sys/compat/svr4/svr4_stream.c @@ -1481,8 +1481,6 @@ svr4_do_putmsg(td, uap, fp) uap->dat, uap->flags); #endif /* DEBUG_SVR4 */ - FILE_LOCK_ASSERT(fp, MA_NOTOWNED); - if (uap->ctl != NULL) { if ((error = copyin(uap->ctl, &ctl, sizeof(ctl))) != 0) { #ifdef DEBUG_SVR4 @@ -1656,8 +1654,6 @@ svr4_do_getmsg(td, uap, fp) error = 0; afp = NULL; - FILE_LOCK_ASSERT(fp, MA_NOTOWNED); - memset(&sc, 0, sizeof(sc)); #ifdef DEBUG_SVR4 diff --git a/sys/dev/streams/streams.c b/sys/dev/streams/streams.c index dc67fc53811c..55df9e5913eb 100644 --- a/sys/dev/streams/streams.c +++ b/sys/dev/streams/streams.c @@ -251,12 +251,7 @@ streamsopen(struct cdev *dev, int oflags, int devtype, struct thread *td) return error; } - FILE_LOCK(fp); - fp->f_data = so; - fp->f_flag = FREAD|FWRITE; - fp->f_ops = &svr4_netops; - fp->f_type = DTYPE_SOCKET; - FILE_UNLOCK(fp); + finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &svr4_netops); /* * Allocate a stream structure and attach it to this socket. diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index ba0131814478..269f2c3a72a1 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -800,12 +800,9 @@ devfs_open(struct vop_open_args *ap) if(fp == NULL) return (error); #endif - FILE_LOCK(fp); KASSERT(fp->f_ops == &badfileops, ("Could not vnode bypass device on fdops %p", fp->f_ops)); - fp->f_data = dev; - fp->f_ops = &devfs_ops_f; - FILE_UNLOCK(fp); + finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); return (error); } diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c index 2d3254f968dd..78718dbfed60 100644 --- a/sys/fs/fifofs/fifo_vnops.c +++ b/sys/fs/fifofs/fifo_vnops.c @@ -294,11 +294,8 @@ fail1: } mtx_unlock(&fifo_mtx); KASSERT(fp != NULL, ("can't fifo/vnode bypass")); - FILE_LOCK(fp); KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open")); - fp->f_data = fip; - fp->f_ops = &fifo_ops_f; - FILE_UNLOCK(fp); + finit(fp, fp->f_flag, DTYPE_FIFO, fip, &fifo_ops_f); return (0); } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 274522f14c21..070fac7516df 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -95,7 +95,6 @@ static int do_dup(struct thread *td, enum dup_type type, int old, int new, static int fd_first_free(struct filedesc *, int, int); static int fd_last_used(struct filedesc *, int, int); static void fdgrowtable(struct filedesc *, int); -static int fdrop_locked(struct file *fp, struct thread *td); static void fdunused(struct filedesc *fdp, int fd); static void fdused(struct filedesc *fdp, int fd); @@ -137,9 +136,7 @@ struct filedesc0 { /* * Descriptor management. */ -struct filelist filehead; /* head of list of open files */ -int openfiles; /* actual number of open files */ -struct sx filelist_lock; /* sx to protect filelist */ +volatile int openfiles; /* actual number of open files */ struct mtx sigio_lock; /* mtx to protect pointers to sigio */ void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); @@ -428,9 +425,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - FILE_LOCK(fp); td->td_retval[0] = OFLAGS(fp->f_flag); - FILE_UNLOCK(fp); FILEDESC_SUNLOCK(fdp); break; @@ -441,12 +436,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - FILE_LOCK(fp); - fhold_locked(fp); - fp->f_flag &= ~FCNTLFLAGS; - fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; - FILE_UNLOCK(fp); + fhold(fp); FILEDESC_SUNLOCK(fdp); + do { + tmp = flg = fp->f_flag; + tmp &= ~FCNTLFLAGS; + tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; + } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); if (error) { @@ -459,9 +455,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) fdrop(fp, td); break; } - FILE_LOCK(fp); - fp->f_flag &= ~FNONBLOCK; - FILE_UNLOCK(fp); + atomic_clear_int(&fp->f_flag, FNONBLOCK); tmp = 0; (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -1359,15 +1353,13 @@ int falloc(struct thread *td, struct file **resultfp, int *resultfd) { struct proc *p = td->td_proc; - struct file *fp, *fq; + struct file *fp; int error, i; int maxuserfiles = maxfiles - (maxfiles / 20); static struct timeval lastfail; static int curfail; fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); - sx_xlock(&filelist_lock); - if ((openfiles >= maxuserfiles && priv_check(td, PRIV_MAXFILES) != 0) || openfiles >= maxfiles) { @@ -1375,18 +1367,16 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd) printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", td->td_ucred->cr_ruid); } - sx_xunlock(&filelist_lock); uma_zfree(file_zone, fp); return (ENFILE); } - openfiles++; + atomic_add_int(&openfiles, 1); /* * If the process has file descriptor zero open, add the new file * descriptor to the list of open files at that point, otherwise * put it at the front of the list of open files. */ - fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); fp->f_count = 1; if (resultfp) fp->f_count++; @@ -1395,12 +1385,6 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd) fp->f_data = NULL; fp->f_vnode = NULL; FILEDESC_XLOCK(p->p_fd); - if ((fq = p->p_fd->fd_ofiles[0])) { - LIST_INSERT_AFTER(fq, fp, f_list); - } else { - LIST_INSERT_HEAD(&filehead, fp, f_list); - } - sx_xunlock(&filelist_lock); if ((error = fdalloc(td, 0, &i))) { FILEDESC_XUNLOCK(p->p_fd); fdrop(fp, td); @@ -1961,6 +1945,23 @@ closef(struct file *fp, struct thread *td) return (fdrop(fp, td)); } +/* + * Initialize the file pointer with the specified properties. + * + * The ops are set with release semantics to be certain that the flags, type, + * and data are visible when ops is. This is to prevent ops methods from being + * called with bad data. + */ +void +finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) +{ + fp->f_data = data; + fp->f_flag = flag; + fp->f_type = type; + atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); +} + + /* * Extract the file pointer associated with the specified descriptor for the * current user process. @@ -2135,54 +2136,20 @@ fputsock(struct socket *so) sorele(so); } -int -fdrop(struct file *fp, struct thread *td) -{ - - FILE_LOCK(fp); - return (fdrop_locked(fp, td)); -} - /* - * Drop reference on struct file passed in, may call closef if the - * reference hits zero. - * Expects struct file locked, and will unlock it. + * Handle the last reference to a file being closed. */ -static int -fdrop_locked(struct file *fp, struct thread *td) +int +_fdrop(struct file *fp, struct thread *td) { int error; - FILE_LOCK_ASSERT(fp, MA_OWNED); - - if (--fp->f_count > 0) { - FILE_UNLOCK(fp); - return (0); - } - - /* - * We might have just dropped the last reference to a file - * object that is for a UNIX domain socket whose message - * buffers are being examined in unp_gc(). If that is the - * case, FWAIT will be set in f_gcflag and we need to wait for - * unp_gc() to finish its scan. - */ - while (fp->f_gcflag & FWAIT) - msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); - - /* We have the last ref so we can proceed without the file lock. */ - FILE_UNLOCK(fp); - if (fp->f_count < 0) - panic("fdrop: count < 0"); + error = 0; + if (fp->f_count != 0) + panic("fdrop: count %d", fp->f_count); if (fp->f_ops != &badfileops) error = fo_close(fp, td); - else - error = 0; - - sx_xlock(&filelist_lock); - LIST_REMOVE(fp, f_list); - openfiles--; - sx_xunlock(&filelist_lock); + atomic_subtract_int(&openfiles, 1); crfree(fp->f_cred); uma_zfree(file_zone, fp); @@ -2225,9 +2192,7 @@ flock(struct thread *td, struct flock_args *uap) lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; - FILE_LOCK(fp); - fp->f_flag &= ~FHASLOCK; - FILE_UNLOCK(fp); + atomic_clear_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto done2; } @@ -2239,9 +2204,7 @@ flock(struct thread *td, struct flock_args *uap) error = EBADF; goto done2; } - FILE_LOCK(fp); - fp->f_flag |= FHASLOCK; - FILE_UNLOCK(fp); + atomic_set_int(&fp->f_flag, FHASLOCK); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done2: @@ -2286,9 +2249,7 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ - FILE_LOCK(wfp); if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { - FILE_UNLOCK(wfp); FILEDESC_XUNLOCK(fdp); return (EACCES); } @@ -2297,8 +2258,7 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; if (fp == NULL) fdused(fdp, indx); - fhold_locked(wfp); - FILE_UNLOCK(wfp); + fhold(wfp); FILEDESC_XUNLOCK(fdp); if (fp != NULL) /* @@ -2419,29 +2379,23 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) struct proc *p; int error, n; - /* - * Note: because the number of file descriptors is calculated - * in different ways for sizing vs returning the data, - * there is information leakage from the first loop. However, - * it is of a similar order of magnitude to the leakage from - * global system statistics such as kern.openfiles. - */ error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); if (req->oldptr == NULL) { - n = 16; /* A slight overestimate. */ - sx_slock(&filelist_lock); - LIST_FOREACH(fp, &filehead, f_list) { - /* - * We should grab the lock, but this is an - * estimate, so does it really matter? - */ - /* mtx_lock(fp->f_mtxp); */ - n += fp->f_count; - /* mtx_unlock(f->f_mtxp); */ + n = 0; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state == PRS_NEW) + continue; + fdp = fdhold(p); + if (fdp == NULL) + continue; + /* overestimates sparse tables. */ + n += fdp->fd_lastfile; + fddrop(fdp); } - sx_sunlock(&filelist_lock); + sx_sunlock(&allproc_lock); return (SYSCTL_OUT(req, 0, n * sizeof(xf))); } error = 0; @@ -2472,7 +2426,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) xf.xf_vnode = fp->f_vnode; xf.xf_type = fp->f_type; xf.xf_count = fp->f_count; - xf.xf_msgcount = fp->f_msgcount; + xf.xf_msgcount = 0; xf.xf_offset = fp->f_offset; xf.xf_flag = fp->f_flag; error = SYSCTL_OUT(req, &xf, sizeof(xf)); @@ -2523,7 +2477,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); - FILE_LOCK(fp); vp = NULL; so = NULL; kif->kf_fd = i; @@ -2531,7 +2484,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; vp = fp->f_vnode; - vref(vp); break; case DTYPE_SOCKET: @@ -2583,8 +2535,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) if (fp->f_flag & FHASLOCK) kif->kf_flags |= KF_FLAG_HASLOCK; kif->kf_offset = fp->f_offset; - FILE_UNLOCK(fp); if (vp != NULL) { + vref(vp); switch (vp->v_type) { case VNON: kif->kf_vnode_type = KF_VTYPE_VNON; @@ -2736,7 +2688,7 @@ db_print_file(struct file *fp, int header) p = file_to_first_proc(fp); db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, - fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, + 0, fp->f_count, 0, fp->f_vnode, p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); } @@ -2754,13 +2706,24 @@ DB_SHOW_COMMAND(file, db_show_file) DB_SHOW_COMMAND(files, db_show_files) { + struct filedesc *fdp; struct file *fp; + struct proc *p; int header; + int n; header = 1; - LIST_FOREACH(fp, &filehead, f_list) { - db_print_file(fp, header); - header = 0; + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state == PRS_NEW) + continue; + if ((fdp = p->p_fd) == NULL) + continue; + for (n = 0; n < fdp->fd_nfiles; ++n) { + if ((fp = fdp->fd_ofiles[n]) == NULL) + continue; + db_print_file(fp, header); + header = 0; + } } } #endif @@ -2772,7 +2735,7 @@ SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "Maximum number of files"); SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, - &openfiles, 0, "System-wide number of open files"); + __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files"); /* ARGSUSED*/ static void @@ -2781,7 +2744,6 @@ filelistinit(void *dummy) file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - sx_init(&filelist_lock, "filelist lock"); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 4d75822dbff2..b5d01d07621c 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -531,12 +531,7 @@ kqueue(struct thread *td, struct kqueue_args *uap) SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); FILEDESC_XUNLOCK(fdp); - FILE_LOCK(fp); - fp->f_flag = FREAD | FWRITE; - fp->f_type = DTYPE_KQUEUE; - fp->f_data = kq; - fp->f_ops = &kqueueops; - FILE_UNLOCK(fp); + finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; @@ -990,24 +985,17 @@ kqueue_acquire(struct file *fp, struct kqueue **kqp) error = 0; - FILE_LOCK(fp); - do { - kq = fp->f_data; - if (fp->f_type != DTYPE_KQUEUE || kq == NULL) { - error = EBADF; - break; - } - *kqp = kq; - KQ_LOCK(kq); - if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { - KQ_UNLOCK(kq); - error = EBADF; - break; - } - kq->kq_refcnt++; + kq = fp->f_data; + if (fp->f_type != DTYPE_KQUEUE || kq == NULL) + return (EBADF); + *kqp = kq; + KQ_LOCK(kq); + if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); - } while (0); - FILE_UNLOCK(fp); + return (EBADF); + } + kq->kq_refcnt++; + KQ_UNLOCK(kq); return error; } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 0800c08073e6..9c800f37d342 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -646,21 +646,17 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) FILEDESC_XUNLOCK(fdp); goto out; case FIONBIO: - FILE_LOCK(fp); if ((tmp = *(int *)data)) - fp->f_flag |= FNONBLOCK; + atomic_set_int(&fp->f_flag, FNONBLOCK); else - fp->f_flag &= ~FNONBLOCK; - FILE_UNLOCK(fp); + atomic_clear_int(&fp->f_flag, FNONBLOCK); data = (void *)&tmp; break; case FIOASYNC: - FILE_LOCK(fp); if ((tmp = *(int *)data)) - fp->f_flag |= FASYNC; + atomic_set_int(&fp->f_flag, FASYNC); else - fp->f_flag &= ~FASYNC; - FILE_UNLOCK(fp); + atomic_clear_int(&fp->f_flag, FASYNC); data = (void *)&tmp; break; } diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 262ef0c1fec8..27ecf80896da 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -363,12 +363,7 @@ pipe(td, uap) * to avoid races against processes which manage to dup() the read * side while we are blocked trying to allocate the write side. */ - FILE_LOCK(rf); - rf->f_flag = FREAD | FWRITE; - rf->f_type = DTYPE_PIPE; - rf->f_data = rpipe; - rf->f_ops = &pipeops; - FILE_UNLOCK(rf); + finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); error = falloc(td, &wf, &fd); if (error) { fdclose(fdp, rf, td->td_retval[0], td); @@ -378,12 +373,7 @@ pipe(td, uap) return (error); } /* An extra reference on `wf' has been held for us by falloc(). */ - FILE_LOCK(wf); - wf->f_flag = FREAD | FWRITE; - wf->f_type = DTYPE_PIPE; - wf->f_data = wpipe; - wf->f_ops = &pipeops; - FILE_UNLOCK(wf); + finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); fdrop(wf, td); td->td_retval[1] = fd; fdrop(rf, td); diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index 1c5cadbc3311..8fe34bcba0a9 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -1999,12 +1999,8 @@ kmq_open(struct thread *td, struct kmq_open_args *uap) mqnode_addref(pn); sx_xunlock(&mqfs_data.mi_lock); - FILE_LOCK(fp); - fp->f_flag = (flags & (FREAD | FWRITE | O_NONBLOCK)); - fp->f_type = DTYPE_MQUEUE; - fp->f_data = pn; - fp->f_ops = &mqueueops; - FILE_UNLOCK(fp); + finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn, + &mqueueops); FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[fd] == fp) @@ -2097,6 +2093,7 @@ kmq_setattr(struct thread *td, struct kmq_setattr_args *uap) struct mqueue *mq; struct file *fp; struct mq_attr attr, oattr; + u_int oflag, flag; int error; if (uap->attr) { @@ -2112,13 +2109,15 @@ kmq_setattr(struct thread *td, struct kmq_setattr_args *uap) oattr.mq_maxmsg = mq->mq_maxmsg; oattr.mq_msgsize = mq->mq_msgsize; oattr.mq_curmsgs = mq->mq_curmsgs; - FILE_LOCK(fp); - oattr.mq_flags = (O_NONBLOCK & fp->f_flag); if (uap->attr) { - fp->f_flag &= ~O_NONBLOCK; - fp->f_flag |= (attr.mq_flags & O_NONBLOCK); - } - FILE_UNLOCK(fp); + do { + oflag = flag = fp->f_flag; + flag &= ~O_NONBLOCK; + flag |= (attr.mq_flags & O_NONBLOCK); + } while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0); + } else + oflag = fp->f_flag; + oattr.mq_flags = (O_NONBLOCK & oflag); fdrop(fp, td); if (uap->oattr) error = copyout(&oattr, uap->oattr, sizeof(oattr)); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index faf7f24a66cc..616afa0e5d65 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -180,12 +180,7 @@ socket(td, uap) if (error) { fdclose(fdp, fp, fd, td); } else { - FILE_LOCK(fp); - fp->f_data = so; /* already has ref count */ - fp->f_flag = FREAD|FWRITE; - fp->f_type = DTYPE_SOCKET; - fp->f_ops = &socketops; - FILE_UNLOCK(fp); + finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); td->td_retval[0] = fd; } fdrop(fp, td); @@ -423,12 +418,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name, if (pgid != 0) fsetown(pgid, &so->so_sigio); - FILE_LOCK(nfp); - nfp->f_data = so; /* nfp has ref count from falloc */ - nfp->f_flag = fflag; - nfp->f_type = DTYPE_SOCKET; - nfp->f_ops = &socketops; - FILE_UNLOCK(nfp); + finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); /* Sync socket nonblocking/async state with file flags */ tmp = fflag & FNONBLOCK; (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); @@ -640,16 +630,8 @@ socketpair(td, uap) if (error) goto free4; } - FILE_LOCK(fp1); - fp1->f_flag = FREAD|FWRITE; - fp1->f_type = DTYPE_SOCKET; - fp1->f_ops = &socketops; - FILE_UNLOCK(fp1); - FILE_LOCK(fp2); - fp2->f_flag = FREAD|FWRITE; - fp2->f_type = DTYPE_SOCKET; - fp2->f_ops = &socketops; - FILE_UNLOCK(fp2); + finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); + finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); so1 = so2 = NULL; error = copyout(sv, uap->rsv, 2 * sizeof (int)); if (error) @@ -2270,12 +2252,7 @@ sctp_peeloff(td, uap) so->so_qstate &= ~SQ_COMP; so->so_head = NULL; ACCEPT_UNLOCK(); - FILE_LOCK(nfp); - nfp->f_data = so; - nfp->f_flag = fflag; - nfp->f_type = DTYPE_SOCKET; - nfp->f_ops = &socketops; - FILE_UNLOCK(nfp); + finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); if (error) goto noconnection; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 9fea71b4b626..1d6cc464da0f 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -233,10 +233,11 @@ static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); -static void unp_mark(struct file *); static void unp_discard(struct file *); static void unp_freerights(struct file **, int); static int unp_internalize(struct mbuf **, struct thread *); +static void unp_internalize_fp(struct file *); +static void unp_externalize_fp(struct file *); static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); /* @@ -586,9 +587,9 @@ uipc_detach(struct socket *so) unp_drop(ref, ECONNRESET); UNP_PCB_UNLOCK(ref); } + local_unp_rights = unp_rights; UNP_GLOBAL_WUNLOCK(); unp->unp_socket->so_pcb = NULL; - local_unp_rights = unp_rights; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; unp->unp_refcount--; @@ -1600,10 +1601,7 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) panic("unp_externalize fdalloc failed"); fp = *rp++; td->td_proc->p_fd->fd_ofiles[f] = fp; - FILE_LOCK(fp); - fp->f_msgcount--; - FILE_UNLOCK(fp); - unp_rights--; + unp_externalize_fp(fp); *fdp++ = f; } FILEDESC_XUNLOCK(td->td_proc->p_fd); @@ -1765,11 +1763,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td) for (i = 0; i < oldfds; i++) { fp = fdescp->fd_ofiles[*fdp++]; *rp++ = fp; - FILE_LOCK(fp); - fp->f_count++; - fp->f_msgcount++; - FILE_UNLOCK(fp); - unp_rights++; + fhold(fp); + unp_internalize_fp(fp); } FILEDESC_SUNLOCK(fdescp); break; @@ -1860,230 +1855,198 @@ unp_addsockcred(struct thread *td, struct mbuf *control) return (m); } +static struct unpcb * +fptounp(struct file *fp) +{ + struct socket *so; + + if (fp->f_type != DTYPE_SOCKET) + return (NULL); + if ((so = fp->f_data) == NULL) + return (NULL); + if (so->so_proto->pr_domain != &localdomain) + return (NULL); + return sotounpcb(so); +} + +static void +unp_discard(struct file *fp) +{ + + unp_externalize_fp(fp); + (void) closef(fp, (struct thread *)NULL); +} + +static void +unp_internalize_fp(struct file *fp) +{ + struct unpcb *unp; + + UNP_GLOBAL_WLOCK(); + if ((unp = fptounp(fp)) != NULL) { + unp->unp_file = fp; + unp->unp_msgcount++; + } + unp_rights++; + UNP_GLOBAL_WUNLOCK(); +} + +static void +unp_externalize_fp(struct file *fp) +{ + struct unpcb *unp; + + UNP_GLOBAL_WLOCK(); + if ((unp = fptounp(fp)) != NULL) + unp->unp_msgcount--; + unp_rights--; + UNP_GLOBAL_WUNLOCK(); +} + /* * unp_defer indicates whether additional work has been defered for a future * pass through unp_gc(). It is thread local and does not require explicit * synchronization. */ -static int unp_defer; +static int unp_marked; +static int unp_unreachable; -static int unp_taskcount; -SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, ""); +static void +unp_accessable(struct file *fp) +{ + struct unpcb *unp; + + unp = fptounp(fp); + if (fp == NULL) + return; + if (unp->unp_gcflag & UNPGC_REF) + return; + unp->unp_gcflag &= ~UNPGC_DEAD; + unp->unp_gcflag |= UNPGC_REF; + unp_marked++; +} + +static void +unp_gc_process(struct unpcb *unp) +{ + struct socket *soa; + struct socket *so; + struct file *fp; + + /* Already processed. */ + if (unp->unp_gcflag & UNPGC_SCANNED) + return; + fp = unp->unp_file; + /* + * Check for a socket potentially in a cycle. It must be in a + * queue as indicated by msgcount, and this must equal the file + * reference count. Note that when msgcount is 0 the file is NULL. + */ + if (unp->unp_msgcount != 0 && fp->f_count != 0 && + fp->f_count == unp->unp_msgcount) { + unp->unp_gcflag |= UNPGC_DEAD; + unp_unreachable++; + return; + } + /* + * Mark all sockets we reference with RIGHTS. + */ + so = unp->unp_socket; + SOCKBUF_LOCK(&so->so_rcv); + unp_scan(so->so_rcv.sb_mb, unp_accessable); + SOCKBUF_UNLOCK(&so->so_rcv); + /* + * Mark all sockets in our accept queue. + */ + ACCEPT_LOCK(); + TAILQ_FOREACH(soa, &so->so_comp, so_list) { + SOCKBUF_LOCK(&soa->so_rcv); + unp_scan(soa->so_rcv.sb_mb, unp_accessable); + SOCKBUF_UNLOCK(&soa->so_rcv); + } + ACCEPT_UNLOCK(); + unp->unp_gcflag |= UNPGC_SCANNED; +} static int unp_recycled; SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, ""); +static int unp_taskcount; +SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, ""); + static void unp_gc(__unused void *arg, int pending) { - struct file *fp, *nextfp; - struct socket *so; - struct file **extra_ref, **fpp; - int nunref, i; - int nfiles_snap; - int nfiles_slack = 20; + struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; + struct unp_head **head; + struct file **unref; + struct unpcb *unp; + int i; unp_taskcount++; - unp_defer = 0; + UNP_GLOBAL_RLOCK(); /* - * Before going through all this, set all FDs to be NOT deferred and - * NOT externally accessible. + * First clear all gc flags from previous runs. + */ + for (head = heads; *head != NULL; head++) + LIST_FOREACH(unp, *head, unp_link) + unp->unp_gcflag &= ~(UNPGC_REF|UNPGC_DEAD); + /* + * Scan marking all reachable sockets with UNPGC_REF. Once a socket + * is reachable all of the sockets it references are reachable. + * Stop the scan once we do a complete loop without discovering + * a new reachable socket. */ - sx_slock(&filelist_lock); - LIST_FOREACH(fp, &filehead, f_list) - fp->f_gcflag &= ~(FMARK|FDEFER); do { - KASSERT(unp_defer >= 0, ("unp_gc: unp_defer %d", unp_defer)); - LIST_FOREACH(fp, &filehead, f_list) { - FILE_LOCK(fp); - /* - * If the file is not open, skip it -- could be a - * file in the process of being opened, or in the - * process of being closed. If the file is - * "closing", it may have been marked for deferred - * consideration. Clear the flag now if so. - */ - if (fp->f_count == 0) { - if (fp->f_gcflag & FDEFER) - unp_defer--; - fp->f_gcflag &= ~(FMARK|FDEFER); - FILE_UNLOCK(fp); - continue; - } - /* - * If we already marked it as 'defer' in a - * previous pass, then try to process it this - * time and un-mark it. - */ - if (fp->f_gcflag & FDEFER) { - fp->f_gcflag &= ~FDEFER; - unp_defer--; - } else { - /* - * If it's not deferred, then check if it's - * already marked.. if so skip it - */ - if (fp->f_gcflag & FMARK) { - FILE_UNLOCK(fp); - continue; - } - /* - * If all references are from messages in - * transit, then skip it. it's not externally - * accessible. - */ - if (fp->f_count == fp->f_msgcount) { - FILE_UNLOCK(fp); - continue; - } - /* - * If it got this far then it must be - * externally accessible. - */ - fp->f_gcflag |= FMARK; - } - /* - * Either it was deferred, or it is externally - * accessible and not already marked so. Now check - * if it is possibly one of OUR sockets. - */ - if (fp->f_type != DTYPE_SOCKET || - (so = fp->f_data) == NULL) { - FILE_UNLOCK(fp); - continue; - } - if (so->so_proto->pr_domain != &localdomain || - (so->so_proto->pr_flags & PR_RIGHTS) == 0) { - FILE_UNLOCK(fp); - continue; - } - - /* - * Tell any other threads that do a subsequent - * fdrop() that we are scanning the message - * buffers. - */ - fp->f_gcflag |= FWAIT; - FILE_UNLOCK(fp); - - /* - * So, Ok, it's one of our sockets and it IS - * externally accessible (or was deferred). Now we - * look to see if we hold any file descriptors in its - * message buffers. Follow those links and mark them - * as accessible too. - */ - SOCKBUF_LOCK(&so->so_rcv); - unp_scan(so->so_rcv.sb_mb, unp_mark); - SOCKBUF_UNLOCK(&so->so_rcv); - - /* - * Wake up any threads waiting in fdrop(). - */ - FILE_LOCK(fp); - fp->f_gcflag &= ~FWAIT; - wakeup(&fp->f_gcflag); - FILE_UNLOCK(fp); - } - } while (unp_defer); - sx_sunlock(&filelist_lock); + unp_unreachable = 0; + unp_marked = 0; + for (head = heads; *head != NULL; head++) + LIST_FOREACH(unp, *head, unp_link) + unp_gc_process(unp); + } while (unp_marked); + UNP_GLOBAL_RUNLOCK(); + if (unp_unreachable == 0) + return; /* - * XXXRW: The following comments need updating for a post-SMPng and - * deferred unp_gc() world, but are still generally accurate. - * - * We grab an extra reference to each of the file table entries that - * are not otherwise accessible and then free the rights that are - * stored in messages on them. - * - * The bug in the orginal code is a little tricky, so I'll describe - * what's wrong with it here. - * - * It is incorrect to simply unp_discard each entry for f_msgcount - * times -- consider the case of sockets A and B that contain - * references to each other. On a last close of some other socket, - * we trigger a gc since the number of outstanding rights (unp_rights) - * is non-zero. If during the sweep phase the gc code unp_discards, - * we end up doing a (full) closef on the descriptor. A closef on A - * results in the following chain. Closef calls soo_close, which - * calls soclose. Soclose calls first (through the switch - * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply - * returns because the previous instance had set unp_gcing, and we - * return all the way back to soclose, which marks the socket with - * SS_NOFDREF, and then calls sofree. Sofree calls sorflush to free - * up the rights that are queued in messages on the socket A, i.e., - * the reference on B. The sorflush calls via the dom_dispose switch - * unp_dispose, which unp_scans with unp_discard. This second - * instance of unp_discard just calls closef on B. - * - * Well, a similar chain occurs on B, resulting in a sorflush on B, - * which results in another closef on A. Unfortunately, A is already - * being closed, and the descriptor has already been marked with - * SS_NOFDREF, and soclose panics at this point. - * - * Here, we first take an extra reference to each inaccessible - * descriptor. Then, we call sorflush ourself, since we know it is a - * Unix domain socket anyhow. After we destroy all the rights - * carried in messages, we do a last closef to get rid of our extra - * reference. This is the last close, and the unp_detach etc will - * shut down the socket. - * - * 91/09/19, bsy@cs.cmu.edu + * Allocate space for a local list of dead unpcbs. */ -again: - nfiles_snap = openfiles + nfiles_slack; /* some slack */ - extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, - M_WAITOK); - sx_slock(&filelist_lock); - if (nfiles_snap < openfiles) { - sx_sunlock(&filelist_lock); - free(extra_ref, M_TEMP); - nfiles_slack += 20; - goto again; - } - for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; - fp != NULL; fp = nextfp) { - nextfp = LIST_NEXT(fp, f_list); - FILE_LOCK(fp); - /* - * If it's not open, skip it - */ - if (fp->f_count == 0) { - FILE_UNLOCK(fp); - continue; - } - /* - * If all refs are from msgs, and it's not marked accessible - * then it must be referenced from some unreachable cycle of - * (shut-down) FDs, so include it in our list of FDs to - * remove. - */ - if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { - *fpp++ = fp; - nunref++; - fp->f_count++; - } - FILE_UNLOCK(fp); - } - sx_sunlock(&filelist_lock); + unref = malloc(unp_unreachable * sizeof(struct file *), + M_TEMP, M_WAITOK); /* - * For each FD on our hit list, do the following two things: + * Iterate looking for sockets which have been specifically marked + * as as unreachable and store them locally. */ - for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { - struct file *tfp = *fpp; - FILE_LOCK(tfp); - if (tfp->f_type == DTYPE_SOCKET && - tfp->f_data != NULL) { - FILE_UNLOCK(tfp); - sorflush(tfp->f_data); - } else { - FILE_UNLOCK(tfp); - } - } - for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { - closef(*fpp, (struct thread *) NULL); - unp_recycled++; - } - free(extra_ref, M_TEMP); + UNP_GLOBAL_RLOCK(); + for (i = 0, head = heads; *head != NULL; head++) + LIST_FOREACH(unp, *head, unp_link) + if (unp->unp_gcflag & UNPGC_DEAD) { + unref[i++] = unp->unp_file; + KASSERT(unp->unp_file != NULL, + ("unp_gc: Invalid unpcb.")); + KASSERT(i <= unp_unreachable, + ("unp_gc: incorrect unreachable count.")); + } + UNP_GLOBAL_RUNLOCK(); + /* + * All further operation is now done on a local list. We first ref + * all sockets to avoid closing them until all are flushed. + */ + for (i = 0; i < unp_unreachable; i++) + fhold(unref[i]); + /* + * Now flush all sockets, free'ing rights. This will free the + * struct files associated with these sockets but leave each socket + * with one remaining ref. + */ + for (i = 0; i < unp_unreachable; i++) + sorflush(unref[i]->f_data); + /* + * And finally release the sockets so they can be reclaimed. + */ + for (i = 0; i < unp_unreachable; i++) + fdrop(unref[i], NULL); + unp_recycled += unp_unreachable; + free(unref, M_TEMP); } void @@ -2143,31 +2106,6 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) } } -static void -unp_mark(struct file *fp) -{ - - /* XXXRW: Should probably assert file list lock here. */ - - if (fp->f_gcflag & FMARK) - return; - unp_defer++; - fp->f_gcflag |= (FMARK|FDEFER); -} - -static void -unp_discard(struct file *fp) -{ - - UNP_GLOBAL_WLOCK(); - FILE_LOCK(fp); - fp->f_msgcount--; - unp_rights--; - FILE_UNLOCK(fp); - UNP_GLOBAL_WUNLOCK(); - (void) closef(fp, (struct thread *)NULL); -} - #ifdef DDB static void db_print_indent(int indent) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 20d722efb646..0e42ea393f3f 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1022,6 +1022,8 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, return (error); /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; + /* Set the flags early so the finit in devfs can pick them up. */ + fp->f_flag = flags & FMASK; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, td); td->td_dupfd = -1; /* XXX check for fdopen */ @@ -1067,16 +1069,16 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; - FILE_LOCK(fp); - fp->f_vnode = vp; - if (fp->f_data == NULL) - fp->f_data = vp; - fp->f_flag = flags & FMASK; - fp->f_seqcount = 1; - fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); - if (fp->f_ops == &badfileops) - fp->f_ops = &vnops; - FILE_UNLOCK(fp); + fp->f_vnode = vp; /* XXX Does devfs need this? */ + /* + * If the file wasn't claimed by devfs bind it to the normal + * vnode operations here. + */ + if (fp->f_ops == &badfileops) { + KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); + fp->f_seqcount = 1; + finit(fp, flags & FMASK, DTYPE_VNODE, vp, &vnops); + } VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { @@ -1093,7 +1095,7 @@ kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) goto bad; - fp->f_flag |= FHASLOCK; + atomic_set_int(&fp->f_flag, FHASLOCK); } if (flags & O_TRUNC) { if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) @@ -4179,14 +4181,8 @@ fhopen(td, uap) } /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; - - FILE_LOCK(nfp); nfp->f_vnode = vp; - nfp->f_data = vp; - nfp->f_flag = fmode & FMASK; - nfp->f_type = DTYPE_VNODE; - nfp->f_ops = &vnops; - FILE_UNLOCK(nfp); + finit(nfp, fmode & FMASK, DTYPE_VNODE, vp, &vnops); if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -4215,7 +4211,7 @@ fhopen(td, uap) goto out; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - fp->f_flag |= FHASLOCK; + atomic_set_int(&fp->f_flag, FHASLOCK); } VOP_UNLOCK(vp, 0, td); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 50835747e78f..c7df6adef59f 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -488,10 +488,12 @@ vn_read(fp, uio, active_cred, flags, td) { struct vnode *vp; int error, ioflag; + struct mtx *mtxp; int vfslocked; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); + mtxp = NULL; vp = fp->f_vnode; ioflag = 0; if (fp->f_flag & FNONBLOCK) @@ -505,13 +507,15 @@ vn_read(fp, uio, active_cred, flags, td) * It is now protected by the FOFFSET_LOCKED flag. */ if ((flags & FOF_OFFSET) == 0) { - FILE_LOCK(fp); + mtxp = mtx_pool_find(mtxpool_sleep, fp); + mtx_lock(mtxp); while(fp->f_vnread_flags & FOFFSET_LOCKED) { fp->f_vnread_flags |= FOFFSET_LOCK_WAITING; - msleep(&fp->f_vnread_flags,fp->f_mtxp,PUSER -1,"vnread offlock",0); + msleep(&fp->f_vnread_flags, mtxp, PUSER -1, + "vnread offlock", 0); } fp->f_vnread_flags |= FOFFSET_LOCKED; - FILE_UNLOCK(fp); + mtx_unlock(mtxp); vn_lock(vp, LK_SHARED | LK_RETRY, td); uio->uio_offset = fp->f_offset; } else @@ -526,11 +530,11 @@ vn_read(fp, uio, active_cred, flags, td) error = VOP_READ(vp, uio, ioflag, fp->f_cred); if ((flags & FOF_OFFSET) == 0) { fp->f_offset = uio->uio_offset; - FILE_LOCK(fp); + mtx_lock(mtxp); if (fp->f_vnread_flags & FOFFSET_LOCK_WAITING) wakeup(&fp->f_vnread_flags); fp->f_vnread_flags = 0; - FILE_UNLOCK(fp); + mtx_unlock(mtxp); } fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0, td); diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c index 377182b2a68c..b4447940bdd0 100644 --- a/sys/netgraph/ng_socket.c +++ b/sys/netgraph/ng_socket.c @@ -689,7 +689,7 @@ ng_internalize(struct mbuf *control, struct thread *td) vn = fp->f_data; if (vn && (vn->v_type == VCHR)) { /* for a VCHR, actually reference the FILE */ - fp->f_count++; + fhold(fp); /* XXX then what :) */ /* how to pass on to other modules? */ } else { diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index a940a3cd1913..c9fc6d264fc7 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -840,12 +840,7 @@ cryptoioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread return (error); } /* falloc automatically provides an extra reference to 'f'. */ - FILE_LOCK(f); - f->f_flag = FREAD | FWRITE; - f->f_type = DTYPE_CRYPTO; - f->f_data = fcr; - f->f_ops = &cryptofops; - FILE_UNLOCK(f); + finit(f, FREAD | FWRITE, DTYPE_CRYPTO, fcr, &cryptofops); *(u_int32_t *)data = fd; fdrop(f, td); break; diff --git a/sys/sys/file.h b/sys/sys/file.h index 58501fa83f37..c5f4afbfadae 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -99,49 +99,37 @@ struct fileops { * * Below is the list of locks that protects members in struct file. * - * (fl) filelist_lock - * (f) f_mtx in struct file + * (f) protected with mtx_lock(mtx_pool_find(fp)) * none not locked */ struct file { - LIST_ENTRY(file) f_list;/* (fl) list of active files */ - short f_type; /* descriptor type */ - void *f_data; /* file descriptor specific data */ - u_int f_flag; /* see fcntl.h */ - struct mtx *f_mtxp; /* mutex to protect data */ - struct fileops *f_ops; /* File operations */ - struct ucred *f_cred; /* credentials associated with descriptor */ - int f_count; /* (f) reference count */ - struct vnode *f_vnode; /* NULL or applicable vnode */ - - /* DFLAG_SEEKABLE specific fields */ - off_t f_offset; - short f_vnread_flags; /* - * (f) home grown sleep lock for f_offset - * Used only for shared vnode locking in - * vnread() - */ -#define FOFFSET_LOCKED 0x1 -#define FOFFSET_LOCK_WAITING 0x2 - /* DTYPE_SOCKET specific fields */ - short f_gcflag; /* used by thread doing fd garbage collection */ -#define FMARK 0x1 /* mark during gc() */ -#define FDEFER 0x2 /* defer for next gc pass */ -#define FWAIT 0x4 /* gc is scanning message buffers */ - int f_msgcount; /* (f) references from message queue */ - - /* DTYPE_VNODE specific fields */ - int f_seqcount; /* - * count of sequential accesses -- cleared - * by most seek operations. - */ - off_t f_nextoff; /* - * offset of next expected read or write - */ - void *f_label; /* Place-holder for struct label pointer. */ + void *f_data; /* file descriptor specific data */ + struct fileops *f_ops; /* File operations */ + struct ucred *f_cred; /* associated credentials. */ + struct vnode *f_vnode; /* NULL or applicable vnode */ + short f_type; /* descriptor type */ + short f_vnread_flags; /* (f) Sleep lock for f_offset */ + volatile u_int f_flag; /* see fcntl.h */ + volatile int f_count; /* reference count */ + /* + * DTYPE_VNODE specific fields. + */ + int f_seqcount; /* Count of sequential accesses. */ + off_t f_nextoff; /* next expected read/write offset. */ + /* + * DFLAG_SEEKABLE specific fields + */ + off_t f_offset; + /* + * Mandatory Access control information. + */ + void *f_label; /* Place-holder for MAC label. */ }; +#define FOFFSET_LOCKED 0x1 +#define FOFFSET_LOCK_WAITING 0x2 + #endif /* _KERNEL */ /* @@ -168,20 +156,17 @@ struct xfile { MALLOC_DECLARE(M_FILE); #endif -LIST_HEAD(filelist, file); -extern struct filelist filehead; /* (fl) head of list of open files */ extern struct fileops vnops; extern struct fileops badfileops; extern struct fileops socketops; extern int maxfiles; /* kernel limit on number of open files */ extern int maxfilesperproc; /* per process limit on number of open files */ -extern int openfiles; /* (fl) actual number of open files */ -extern struct sx filelist_lock; /* sx to protect filelist and openfiles */ +extern volatile int openfiles; /* actual number of open files */ int fget(struct thread *td, int fd, struct file **fpp); int fget_read(struct thread *td, int fd, struct file **fpp); int fget_write(struct thread *td, int fd, struct file **fpp); -int fdrop(struct file *fp, struct thread *td); +int _fdrop(struct file *fp, struct thread *td); /* * The socket operations are used a couple of places. @@ -196,12 +181,7 @@ fo_kqfilter_t soo_kqfilter; fo_stat_t soo_stat; fo_close_t soo_close; -/* Lock a file. */ -#define FILE_LOCK(f) mtx_lock((f)->f_mtxp) -#define FILE_UNLOCK(f) mtx_unlock((f)->f_mtxp) -#define FILE_LOCKED(f) mtx_owned((f)->f_mtxp) -#define FILE_LOCK_ASSERT(f, type) mtx_assert((f)->f_mtxp, (type)) - +void finit(struct file *, u_int, short, void *, struct fileops *); int fgetvp(struct thread *td, int fd, struct vnode **vpp); int fgetvp_read(struct thread *td, int fd, struct vnode **vpp); int fgetvp_write(struct thread *td, int fd, struct vnode **vpp); @@ -209,18 +189,9 @@ int fgetvp_write(struct thread *td, int fd, struct vnode **vpp); int fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp); void fputsock(struct socket *sp); -#define fhold_locked(fp) \ - do { \ - FILE_LOCK_ASSERT(fp, MA_OWNED); \ - (fp)->f_count++; \ - } while (0) - -#define fhold(fp) \ - do { \ - FILE_LOCK(fp); \ - (fp)->f_count++; \ - FILE_UNLOCK(fp); \ - } while (0) +#define fhold(fp) atomic_add_int(&(fp)->f_count, 1) +#define fdrop(fp, td) \ + (atomic_fetchadd_int(&(fp)->f_count, -1) <= 1 ? _fdrop((fp), (td)) : 0) static __inline fo_rdwr_t fo_read; static __inline fo_rdwr_t fo_write; diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index c7b3a44588ef..4d69f3e5ace5 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -67,6 +67,7 @@ LIST_HEAD(unp_head, unpcb); struct unpcb { LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */ struct socket *unp_socket; /* pointer back to socket */ + struct file *unp_file; /* back-pointer to file for gc. */ struct vnode *unp_vnode; /* if associated with file */ ino_t unp_ino; /* fake inode number */ struct unpcb *unp_conn; /* control block of connected socket */ @@ -76,9 +77,11 @@ struct unpcb { int unp_cc; /* copy of rcv.sb_cc */ int unp_mbcnt; /* copy of rcv.sb_mbcnt */ unp_gen_t unp_gencnt; /* generation count of this instance */ - int unp_flags; /* flags */ + short unp_flags; /* flags */ + short unp_gcflag; /* Garbage collector flags. */ struct xucred unp_peercred; /* peer credentials, if applicable */ u_int unp_refcount; + u_int unp_msgcount; /* references from message queue */ struct mtx unp_mtx; /* mutex */ }; @@ -100,6 +103,10 @@ struct unpcb { #define UNP_WANTCRED 0x004 /* credentials wanted */ #define UNP_CONNWAIT 0x008 /* connect blocks until accepted */ +#define UNPGC_REF 0x1 /* unpcb has external ref. */ +#define UNPGC_DEAD 0x2 /* unpcb might be dead. */ +#define UNPGC_SCANNED 0x4 /* Has been scanned. */ + /* * These flags are used to handle non-atomicity in connect() and bind() * operations on a socket: in particular, to avoid races between multiple