src/sys/kern/sysv_shm.c

620 lines
17 KiB
C

/* $OpenBSD: sysv_shm.c,v 1.80 2022/08/14 01:58:28 jsg Exp $ */
/* $NetBSD: sysv_shm.c,v 1.50 1998/10/21 22:24:29 tron Exp $ */
/*
* Copyright (c) 2002 Todd C. Miller <millert@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*
* Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Adam Glass and Charles M.
* Hannum.
* 4. The names of the authors may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/shm.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/pool.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <uvm/uvm_extern.h>
extern struct shminfo shminfo;
struct shmid_ds **shmsegs; /* linear mapping of shmid -> shmseg */
struct pool shm_pool;
unsigned short *shmseqs; /* array of shm sequence numbers */
struct shmid_ds *shm_find_segment_by_shmid(int);
/*
* Provides the following externally accessible functions:
*
* shminit(void); initialization
* shmexit(struct vmspace *) cleanup
* shmfork(struct vmspace *, struct vmspace *) fork handling
* shmsys(arg1, arg2, arg3, arg4); shm{at,ctl,dt,get}(arg2, arg3, arg4)
*
* Structures:
* shmsegs (an array of 'struct shmid_ds *')
* per proc 'struct shmmap_head' with an array of 'struct shmmap_state'
*/
#define SHMSEG_REMOVED 0x0200 /* can't overlap ACCESSPERMS */
int shm_last_free, shm_nused, shm_committed;
struct shm_handle {
struct uvm_object *shm_object;
};
struct shmmap_state {
vaddr_t va;
int shmid;
};
struct shmmap_head {
int shmseg;
struct shmmap_state state[1];
};
int shm_find_segment_by_key(key_t);
void shm_deallocate_segment(struct shmid_ds *);
int shm_delete_mapping(struct vmspace *, struct shmmap_state *);
int shmget_existing(struct proc *, struct sys_shmget_args *,
int, int, register_t *);
int shmget_allocate_segment(struct proc *, struct sys_shmget_args *,
int, register_t *);
int
shm_find_segment_by_key(key_t key)
{
struct shmid_ds *shmseg;
int i;
for (i = 0; i < shminfo.shmmni; i++) {
shmseg = shmsegs[i];
if (shmseg != NULL && shmseg->shm_perm.key == key)
return (i);
}
return (-1);
}
struct shmid_ds *
shm_find_segment_by_shmid(int shmid)
{
int segnum;
struct shmid_ds *shmseg;
segnum = IPCID_TO_IX(shmid);
if (segnum < 0 || segnum >= shminfo.shmmni ||
(shmseg = shmsegs[segnum]) == NULL ||
shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid))
return (NULL);
return (shmseg);
}
void
shm_deallocate_segment(struct shmid_ds *shmseg)
{
struct shm_handle *shm_handle;
size_t size;
shm_handle = shmseg->shm_internal;
size = round_page(shmseg->shm_segsz);
uao_detach(shm_handle->shm_object);
pool_put(&shm_pool, shmseg);
shm_committed -= atop(size);
shm_nused--;
}
int
shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
{
struct shmid_ds *shmseg;
int segnum;
vaddr_t end;
segnum = IPCID_TO_IX(shmmap_s->shmid);
if (segnum < 0 || segnum >= shminfo.shmmni ||
(shmseg = shmsegs[segnum]) == NULL)
return (EINVAL);
end = round_page(shmmap_s->va+shmseg->shm_segsz);
uvm_unmap(&vm->vm_map, trunc_page(shmmap_s->va), end);
shmmap_s->shmid = -1;
shmseg->shm_dtime = gettime();
if ((--shmseg->shm_nattch <= 0) &&
(shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
shm_deallocate_segment(shmseg);
shm_last_free = segnum;
shmsegs[shm_last_free] = NULL;
}
return (0);
}
int
sys_shmdt(struct proc *p, void *v, register_t *retval)
{
struct sys_shmdt_args /* {
syscallarg(const void *) shmaddr;
} */ *uap = v;
struct shmmap_head *shmmap_h;
struct shmmap_state *shmmap_s;
int i;
shmmap_h = (struct shmmap_head *)p->p_vmspace->vm_shm;
if (shmmap_h == NULL)
return (EINVAL);
for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg;
i++, shmmap_s++)
if (shmmap_s->shmid != -1 &&
shmmap_s->va == (vaddr_t)SCARG(uap, shmaddr))
break;
if (i == shmmap_h->shmseg)
return (EINVAL);
return (shm_delete_mapping(p->p_vmspace, shmmap_s));
}
int
sys_shmat(struct proc *p, void *v, register_t *retval)
{
struct sys_shmat_args /* {
syscallarg(int) shmid;
syscallarg(const void *) shmaddr;
syscallarg(int) shmflg;
} */ *uap = v;
int error, i, flags = 0;
struct ucred *cred = p->p_ucred;
struct shmid_ds *shmseg;
struct shmmap_head *shmmap_h;
struct shmmap_state *shmmap_s;
struct shm_handle *shm_handle;
vaddr_t attach_va;
vm_prot_t prot;
vsize_t size;
shmmap_h = (struct shmmap_head *)p->p_vmspace->vm_shm;
if (shmmap_h == NULL) {
size = sizeof(int) +
shminfo.shmseg * sizeof(struct shmmap_state);
shmmap_h = malloc(size, M_SHM, M_WAITOK | M_CANFAIL);
if (shmmap_h == NULL)
return (ENOMEM);
shmmap_h->shmseg = shminfo.shmseg;
for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg;
i++, shmmap_s++)
shmmap_s->shmid = -1;
p->p_vmspace->vm_shm = (caddr_t)shmmap_h;
}
shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
if (shmseg == NULL)
return (EINVAL);
error = ipcperm(cred, &shmseg->shm_perm,
(SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
if (error)
return (error);
for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg; i++) {
if (shmmap_s->shmid == -1)
break;
shmmap_s++;
}
if (i >= shmmap_h->shmseg)
return (EMFILE);
size = round_page(shmseg->shm_segsz);
prot = PROT_READ;
if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
prot |= PROT_WRITE;
if (SCARG(uap, shmaddr)) {
flags |= UVM_FLAG_FIXED;
if (SCARG(uap, shmflg) & SHM_RND)
attach_va =
(vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
attach_va = (vaddr_t)SCARG(uap, shmaddr);
else
return (EINVAL);
} else
attach_va = 0;
/*
* Since uvm_map() could end up sleeping, grab a reference to prevent
* the segment from being deallocated while sleeping.
*/
shmseg->shm_nattch++;
shm_handle = shmseg->shm_internal;
uao_reference(shm_handle->shm_object);
error = uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
shm_handle->shm_object, 0, 0, UVM_MAPFLAG(prot, prot,
MAP_INHERIT_SHARE, MADV_RANDOM, flags));
if (error) {
if ((--shmseg->shm_nattch <= 0) &&
(shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
shm_deallocate_segment(shmseg);
shm_last_free = IPCID_TO_IX(SCARG(uap, shmid));
shmsegs[shm_last_free] = NULL;
} else {
uao_detach(shm_handle->shm_object);
}
return (error);
}
shmmap_s->va = attach_va;
shmmap_s->shmid = SCARG(uap, shmid);
shmseg->shm_lpid = p->p_p->ps_pid;
shmseg->shm_atime = gettime();
*retval = attach_va;
return (0);
}
int
sys_shmctl(struct proc *p, void *v, register_t *retval)
{
struct sys_shmctl_args /* {
syscallarg(int) shmid;
syscallarg(int) cmd;
syscallarg(struct shmid_ds *) buf;
} */ *uap = v;
int shmid = SCARG(uap, shmid);
int cmd = SCARG(uap, cmd);
void *buf = SCARG(uap, buf);
struct ucred *cred = p->p_ucred;
struct shmid_ds inbuf, *shmseg;
int error;
if (cmd == IPC_SET) {
error = copyin(buf, &inbuf, sizeof(inbuf));
if (error)
return (error);
}
shmseg = shm_find_segment_by_shmid(shmid);
if (shmseg == NULL)
return (EINVAL);
switch (cmd) {
case IPC_STAT:
if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
return (error);
error = copyout(shmseg, buf, sizeof(inbuf));
if (error)
return (error);
break;
case IPC_SET:
if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
return (error);
shmseg->shm_perm.uid = inbuf.shm_perm.uid;
shmseg->shm_perm.gid = inbuf.shm_perm.gid;
shmseg->shm_perm.mode =
(shmseg->shm_perm.mode & ~ACCESSPERMS) |
(inbuf.shm_perm.mode & ACCESSPERMS);
shmseg->shm_ctime = gettime();
break;
case IPC_RMID:
if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
return (error);
shmseg->shm_perm.key = IPC_PRIVATE;
shmseg->shm_perm.mode |= SHMSEG_REMOVED;
if (shmseg->shm_nattch <= 0) {
shm_deallocate_segment(shmseg);
shm_last_free = IPCID_TO_IX(shmid);
shmsegs[shm_last_free] = NULL;
}
break;
case SHM_LOCK:
case SHM_UNLOCK:
default:
return (EINVAL);
}
return (0);
}
int
shmget_existing(struct proc *p,
struct sys_shmget_args /* {
syscallarg(key_t) key;
syscallarg(size_t) size;
syscallarg(int) shmflg;
} */ *uap,
int mode, int segnum, register_t *retval)
{
struct shmid_ds *shmseg;
struct ucred *cred = p->p_ucred;
int error;
shmseg = shmsegs[segnum]; /* We assume the segnum is valid */
if ((error = ipcperm(cred, &shmseg->shm_perm, mode)) != 0)
return (error);
if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
return (EINVAL);
if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
(IPC_CREAT | IPC_EXCL))
return (EEXIST);
*retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
return (0);
}
int
shmget_allocate_segment(struct proc *p,
struct sys_shmget_args /* {
syscallarg(key_t) key;
syscallarg(size_t) size;
syscallarg(int) shmflg;
} */ *uap,
int mode, register_t *retval)
{
size_t size;
key_t key;
int segnum;
struct ucred *cred = p->p_ucred;
struct shmid_ds *shmseg;
struct shm_handle *shm_handle;
int error = 0;
if (SCARG(uap, size) < shminfo.shmmin ||
SCARG(uap, size) > shminfo.shmmax)
return (EINVAL);
if (shm_nused >= shminfo.shmmni) /* any shmids left? */
return (ENOSPC);
size = round_page(SCARG(uap, size));
if (shm_committed + atop(size) > shminfo.shmall)
return (ENOMEM);
shm_nused++;
shm_committed += atop(size);
/*
* If a key has been specified and we had to wait for memory
* to be freed up we need to verify that no one has allocated
* the key we want in the meantime. Yes, this is ugly.
*/
key = SCARG(uap, key);
shmseg = pool_get(&shm_pool, key == IPC_PRIVATE ? PR_WAITOK :
PR_NOWAIT);
if (shmseg == NULL) {
shmseg = pool_get(&shm_pool, PR_WAITOK);
if (shm_find_segment_by_key(key) != -1) {
pool_put(&shm_pool, shmseg);
shm_nused--;
shm_committed -= atop(size);
return (EAGAIN);
}
}
/* XXX - hash shmids instead */
if (shm_last_free < 0) {
for (segnum = 0; segnum < shminfo.shmmni && shmsegs[segnum];
segnum++)
;
if (segnum == shminfo.shmmni)
panic("shmseg free count inconsistent");
} else {
segnum = shm_last_free;
if (++shm_last_free >= shminfo.shmmni || shmsegs[shm_last_free])
shm_last_free = -1;
}
shmsegs[segnum] = shmseg;
shm_handle = (struct shm_handle *)((caddr_t)shmseg + sizeof(*shmseg));
shm_handle->shm_object = uao_create(size, 0);
shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid;
shmseg->shm_perm.mode = (mode & ACCESSPERMS);
shmseg->shm_perm.seq = shmseqs[segnum] = (shmseqs[segnum] + 1) & 0x7fff;
shmseg->shm_perm.key = key;
shmseg->shm_segsz = SCARG(uap, size);
shmseg->shm_cpid = p->p_p->ps_pid;
shmseg->shm_lpid = shmseg->shm_nattch = 0;
shmseg->shm_atime = shmseg->shm_dtime = 0;
shmseg->shm_ctime = gettime();
shmseg->shm_internal = shm_handle;
*retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
return (error);
}
int
sys_shmget(struct proc *p, void *v, register_t *retval)
{
struct sys_shmget_args /* {
syscallarg(key_t) key;
syscallarg(size_t) size;
syscallarg(int) shmflg;
} */ *uap = v;
int segnum, mode, error;
mode = SCARG(uap, shmflg) & ACCESSPERMS;
if (SCARG(uap, key) != IPC_PRIVATE) {
again:
segnum = shm_find_segment_by_key(SCARG(uap, key));
if (segnum >= 0)
return (shmget_existing(p, uap, mode, segnum, retval));
if ((SCARG(uap, shmflg) & IPC_CREAT) == 0)
return (ENOENT);
}
error = shmget_allocate_segment(p, uap, mode, retval);
if (error == EAGAIN)
goto again;
return (error);
}
void
shmfork(struct vmspace *vm1, struct vmspace *vm2)
{
struct shmmap_head *shmmap_h;
struct shmmap_state *shmmap_s;
struct shmid_ds *shmseg;
size_t size;
int i;
if (vm1->vm_shm == NULL) {
vm2->vm_shm = NULL;
return;
}
shmmap_h = (struct shmmap_head *)vm1->vm_shm;
size = sizeof(int) + shmmap_h->shmseg * sizeof(struct shmmap_state);
vm2->vm_shm = malloc(size, M_SHM, M_WAITOK);
memcpy(vm2->vm_shm, vm1->vm_shm, size);
for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg;
i++, shmmap_s++) {
if (shmmap_s->shmid != -1 &&
(shmseg = shmsegs[IPCID_TO_IX(shmmap_s->shmid)]) != NULL)
shmseg->shm_nattch++;
}
}
void
shmexit(struct vmspace *vm)
{
struct shmmap_head *shmmap_h;
struct shmmap_state *shmmap_s;
size_t size;
int i;
shmmap_h = (struct shmmap_head *)vm->vm_shm;
if (shmmap_h == NULL)
return;
size = sizeof(int) + shmmap_h->shmseg * sizeof(struct shmmap_state);
for (i = 0, shmmap_s = shmmap_h->state; i < shmmap_h->shmseg;
i++, shmmap_s++)
if (shmmap_s->shmid != -1)
shm_delete_mapping(vm, shmmap_s);
free(vm->vm_shm, M_SHM, size);
vm->vm_shm = NULL;
}
void
shminit(void)
{
pool_init(&shm_pool,
sizeof(struct shmid_ds) + sizeof(struct shm_handle), 0,
IPL_NONE, PR_WAITOK, "shmpl", NULL);
shmsegs = mallocarray(shminfo.shmmni, sizeof(struct shmid_ds *),
M_SHM, M_WAITOK|M_ZERO);
shmseqs = mallocarray(shminfo.shmmni, sizeof(unsigned short),
M_SHM, M_WAITOK|M_ZERO);
shminfo.shmmax *= PAGE_SIZE; /* actually in pages */
shm_last_free = 0;
shm_nused = 0;
shm_committed = 0;
}
/* Expand shmsegs and shmseqs arrays */
void
shm_reallocate(int val)
{
struct shmid_ds **newsegs;
unsigned short *newseqs;
newsegs = mallocarray(val, sizeof(struct shmid_ds *),
M_SHM, M_WAITOK | M_ZERO);
memcpy(newsegs, shmsegs,
shminfo.shmmni * sizeof(struct shmid_ds *));
free(shmsegs, M_SHM,
shminfo.shmmni * sizeof(struct shmid_ds *));
shmsegs = newsegs;
newseqs = mallocarray(val, sizeof(unsigned short), M_SHM,
M_WAITOK | M_ZERO);
memcpy(newseqs, shmseqs,
shminfo.shmmni * sizeof(unsigned short));
free(shmseqs, M_SHM, shminfo.shmmni * sizeof(unsigned short));
shmseqs = newseqs;
shminfo.shmmni = val;
}
/*
* Userland access to struct shminfo.
*/
int
sysctl_sysvshm(int *name, u_int namelen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
int error, val;
if (namelen != 1)
return (ENOTDIR); /* leaf-only */
switch (name[0]) {
case KERN_SHMINFO_SHMMAX:
if ((error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
&shminfo.shmmax, 0, INT_MAX)) || newp == NULL)
return (error);
/* If new shmmax > shmall, crank shmall */
if (atop(round_page(shminfo.shmmax)) > shminfo.shmall)
shminfo.shmall = atop(round_page(shminfo.shmmax));
return (0);
case KERN_SHMINFO_SHMMIN:
return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
&shminfo.shmmin, 1, INT_MAX));
case KERN_SHMINFO_SHMMNI:
val = shminfo.shmmni;
/* can't decrease shmmni */
error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
&val, val, 0xffff);
/* returns success and skips reallocation if val is unchanged */
if (error || val == shminfo.shmmni)
return (error);
shm_reallocate(val);
return (0);
case KERN_SHMINFO_SHMSEG:
return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
&shminfo.shmseg, 1, INT_MAX));
case KERN_SHMINFO_SHMALL:
/* can't decrease shmall */
return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
&shminfo.shmall, shminfo.shmall, INT_MAX));
default:
return (EOPNOTSUPP);
}
/* NOTREACHED */
}