/* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $Id: vm_map.c,v 1.13 1995/02/02 09:08:40 davidg Exp $ */ /* * Virtual memory mapping module. */ #include #include #include #include #include #include #include /* * Virtual memory maps provide for the mapping, protection, * and sharing of virtual memory objects. In addition, * this module provides for an efficient virtual copy of * memory from one map to another. * * Synchronization is required prior to most operations. * * Maps consist of an ordered doubly-linked list of simple * entries; a single hint is used to speed up lookups. * * In order to properly represent the sharing of virtual * memory regions among maps, the map structure is bi-level. * Top-level ("address") maps refer to regions of sharable * virtual memory. These regions are implemented as * ("sharing") maps, which then refer to the actual virtual * memory objects. When two address maps "share" memory, * their top-level maps both have references to the same * sharing map. When memory is virtual-copied from one * address map to another, the references in the sharing * maps are actually copied -- no copying occurs at the * virtual memory object level. * * Since portions of maps are specified by start/end addreses, * which may not align with existing map entries, all * routines merely "clip" entries to these start/end values. * [That is, an entry is split into two, bordering at a * start or end value.] Note that these clippings may not * always be necessary (as the two resulting entries are then * not changed); however, the clipping is done for convenience. * No attempt is currently made to "glue back together" two * abutting entries. * * As mentioned above, virtual copy operations are performed * by copying VM object references from one sharing map to * another, and then marking both regions as copy-on-write. * It is important to note that only one writeable reference * to a VM object region exists in any map -- this means that * shadow object creation can be delayed until a write operation * occurs. */ /* * vm_map_startup: * * Initialize the vm_map module. Must be called before * any other vm_map routines. * * Map and entry structures are allocated from the general * purpose memory pool with some exceptions: * * - The kernel map and kmem submap are allocated statically. * - Kernel map entries are allocated out of a static pool. * * These restrictions are necessary since malloc() uses the * maps and requires map entries. */ vm_offset_t kentry_data; vm_size_t kentry_data_size; vm_map_entry_t kentry_free; vm_map_t kmap_free; int kentry_count; static vm_offset_t mapvm_start = 0, mapvm = 0, mapvmmax; static int mapvmpgcnt = 0; static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); void vm_map_startup() { register int i; register vm_map_entry_t mep; vm_map_t mp; /* * Static map structures for allocation before initialization of * kernel map or kmem map. vm_map_create knows how to deal with them. */ kmap_free = mp = (vm_map_t) kentry_data; i = MAX_KMAP; while (--i > 0) { mp->header.next = (vm_map_entry_t) (mp + 1); mp++; } mp++->header.next = NULL; /* * Form a free list of statically allocated kernel map entries with * the rest. */ kentry_free = mep = (vm_map_entry_t) mp; kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep; while (--i > 0) { mep->next = mep + 1; mep++; } mep->next = NULL; } /* * Allocate a vmspace structure, including a vm_map and pmap, * and initialize those structures. The refcnt is set to 1. * The remaining fields must be initialized by the caller. */ struct vmspace * vmspace_alloc(min, max, pageable) vm_offset_t min, max; int pageable; { register struct vmspace *vm; if (mapvmpgcnt == 0 && mapvm == 0) { int s; mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE; s = splhigh(); mapvm_start = mapvm = kmem_alloc_pageable(kmem_map, mapvmpgcnt * PAGE_SIZE); mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE; splx(s); if (!mapvm) mapvmpgcnt = 0; } MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); vm_map_init(&vm->vm_map, min, max, pageable); pmap_pinit(&vm->vm_pmap); vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ vm->vm_refcnt = 1; return (vm); } void vmspace_free(vm) register struct vmspace *vm; { if (vm->vm_refcnt == 0) panic("vmspace_free: attempt to free already freed vmspace"); if (--vm->vm_refcnt == 0) { /* * Lock the map, to wait out all other references to it. * Delete all of the mappings and pages they hold, then call * the pmap module to reclaim anything left. */ vm_map_lock(&vm->vm_map); (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); while( vm->vm_map.ref_count != 1) tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0); --vm->vm_map.ref_count; pmap_release(&vm->vm_pmap); FREE(vm, M_VMMAP); } } /* * vm_map_create: * * Creates and returns a new empty VM map with * the given physical map structure, and having * the given lower and upper address bounds. */ vm_map_t vm_map_create(pmap, min, max, pageable) pmap_t pmap; vm_offset_t min, max; boolean_t pageable; { register vm_map_t result; if (kmem_map == NULL) { result = kmap_free; kmap_free = (vm_map_t) result->header.next; if (result == NULL) panic("vm_map_create: out of maps"); } else MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK); vm_map_init(result, min, max, pageable); result->pmap = pmap; return (result); } /* * Initialize an existing vm_map structure * such as that in the vmspace structure. * The pmap is set elsewhere. */ void vm_map_init(map, min, max, pageable) register struct vm_map *map; vm_offset_t min, max; boolean_t pageable; { map->header.next = map->header.prev = &map->header; map->nentries = 0; map->size = 0; map->ref_count = 1; map->is_main_map = TRUE; map->min_offset = min; map->max_offset = max; map->entries_pageable = pageable; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; lock_init(&map->lock, TRUE); simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); } /* * vm_map_entry_create: [ internal use only ] * * Allocates a VM map entry for insertion. * No entry fields are filled in. This routine is */ static struct vm_map_entry *mappool; static int mappoolcnt; vm_map_entry_t vm_map_entry_create(map) vm_map_t map; { vm_map_entry_t entry; int i; #define KENTRY_LOW_WATER 64 #define MAPENTRY_LOW_WATER 128 /* * This is a *very* nasty (and sort of incomplete) hack!!!! */ if (kentry_count < KENTRY_LOW_WATER) { if (mapvmpgcnt && mapvm) { vm_page_t m; m = vm_page_alloc(kmem_object, mapvm - vm_map_min(kmem_map), (map == kmem_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL); if (m) { int newentries; newentries = (NBPG / sizeof(struct vm_map_entry)); vm_page_wire(m); m->flags &= ~PG_BUSY; pmap_enter(vm_map_pmap(kmem_map), mapvm, VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1); entry = (vm_map_entry_t) mapvm; mapvm += NBPG; --mapvmpgcnt; for (i = 0; i < newentries; i++) { vm_map_entry_dispose(kernel_map, entry); entry++; } } } } if (map == kernel_map || map == kmem_map || map == pager_map) { entry = kentry_free; if (entry) { kentry_free = entry->next; --kentry_count; return entry; } entry = mappool; if (entry) { mappool = entry->next; --mappoolcnt; return entry; } } else { entry = mappool; if (entry) { mappool = entry->next; --mappoolcnt; return entry; } MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry), M_VMMAPENT, M_WAITOK); } if (entry == NULL) panic("vm_map_entry_create: out of map entries"); return (entry); } /* * vm_map_entry_dispose: [ internal use only ] * * Inverse of vm_map_entry_create. */ void vm_map_entry_dispose(map, entry) vm_map_t map; vm_map_entry_t entry; { if ((kentry_count < KENTRY_LOW_WATER) || ((vm_offset_t) entry >= kentry_data && (vm_offset_t) entry < (kentry_data + kentry_data_size)) || ((vm_offset_t) entry >= mapvm_start && (vm_offset_t) entry < mapvmmax)) { entry->next = kentry_free; kentry_free = entry; ++kentry_count; return; } else { if (mappoolcnt < MAPENTRY_LOW_WATER) { entry->next = mappool; mappool = entry; ++mappoolcnt; return; } FREE(entry, M_VMMAPENT); } } /* * vm_map_entry_{un,}link: * * Insert/remove entries from maps. */ #define vm_map_entry_link(map, after_where, entry) \ { \ (map)->nentries++; \ (entry)->prev = (after_where); \ (entry)->next = (after_where)->next; \ (entry)->prev->next = (entry); \ (entry)->next->prev = (entry); \ } #define vm_map_entry_unlink(map, entry) \ { \ (map)->nentries--; \ (entry)->next->prev = (entry)->prev; \ (entry)->prev->next = (entry)->next; \ } /* * vm_map_reference: * * Creates another valid reference to the given map. * */ void vm_map_reference(map) register vm_map_t map; { if (map == NULL) return; simple_lock(&map->ref_lock); map->ref_count++; simple_unlock(&map->ref_lock); } /* * vm_map_deallocate: * * Removes a reference from the specified map, * destroying it if no references remain. * The map should not be locked. */ void vm_map_deallocate(map) register vm_map_t map; { register int c; if (map == NULL) return; simple_lock(&map->ref_lock); c = map->ref_count; simple_unlock(&map->ref_lock); if (c == 0) panic("vm_map_deallocate: deallocating already freed map"); if (c != 1) { --map->ref_count; wakeup((caddr_t) &map->ref_count); return; } /* * Lock the map, to wait out all other references to it. */ vm_map_lock(map); (void) vm_map_delete(map, map->min_offset, map->max_offset); --map->ref_count; if( map->ref_count != 0) { vm_map_unlock(map); return; } pmap_destroy(map->pmap); FREE(map, M_VMMAP); } /* * vm_map_insert: * * Inserts the given whole VM object into the target * map at the specified address range. The object's * size should match that of the address range. * * Requires that the map be locked, and leaves it so. */ int vm_map_insert(map, object, offset, start, end) vm_map_t map; vm_object_t object; vm_offset_t offset; vm_offset_t start; vm_offset_t end; { register vm_map_entry_t new_entry; register vm_map_entry_t prev_entry; vm_map_entry_t temp_entry; /* * Check that the start and end points are not bogus. */ if ((start < map->min_offset) || (end > map->max_offset) || (start >= end)) return (KERN_INVALID_ADDRESS); /* * Find the entry prior to the proposed starting address; if it's part * of an existing entry, this range is bogus. */ if (vm_map_lookup_entry(map, start, &temp_entry)) return (KERN_NO_SPACE); prev_entry = temp_entry; /* * Assert that the next entry doesn't overlap the end point. */ if ((prev_entry->next != &map->header) && (prev_entry->next->start < end)) return (KERN_NO_SPACE); /* * See if we can avoid creating a new entry by extending one of our * neighbors. */ if (object == NULL) { if ((prev_entry != &map->header) && (prev_entry->end == start) && (map->is_main_map) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && (prev_entry->protection == VM_PROT_DEFAULT) && (prev_entry->max_protection == VM_PROT_DEFAULT) && (prev_entry->wired_count == 0)) { if (vm_object_coalesce(prev_entry->object.vm_object, NULL, prev_entry->offset, (vm_offset_t) 0, (vm_size_t) (prev_entry->end - prev_entry->start), (vm_size_t) (end - prev_entry->end))) { /* * Coalesced the two objects - can extend the * previous map entry to include the new * range. */ map->size += (end - prev_entry->end); prev_entry->end = end; return (KERN_SUCCESS); } } } /* * Create a new entry */ new_entry = vm_map_entry_create(map); new_entry->start = start; new_entry->end = end; new_entry->is_a_map = FALSE; new_entry->is_sub_map = FALSE; new_entry->object.vm_object = object; new_entry->offset = offset; new_entry->copy_on_write = FALSE; new_entry->needs_copy = FALSE; if (map->is_main_map) { new_entry->inheritance = VM_INHERIT_DEFAULT; new_entry->protection = VM_PROT_DEFAULT; new_entry->max_protection = VM_PROT_DEFAULT; new_entry->wired_count = 0; } /* * Insert the new entry into the list */ vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; /* * Update the free space hint */ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) map->first_free = new_entry; return (KERN_SUCCESS); } /* * SAVE_HINT: * * Saves the specified entry as the hint for * future lookups. Performs necessary interlocks. */ #define SAVE_HINT(map,value) \ simple_lock(&(map)->hint_lock); \ (map)->hint = (value); \ simple_unlock(&(map)->hint_lock); /* * vm_map_lookup_entry: [ internal use only ] * * Finds the map entry containing (or * immediately preceding) the specified address * in the given map; the entry is returned * in the "entry" parameter. The boolean * result indicates whether the address is * actually contained in the map. */ boolean_t vm_map_lookup_entry(map, address, entry) register vm_map_t map; register vm_offset_t address; vm_map_entry_t *entry; /* OUT */ { register vm_map_entry_t cur; register vm_map_entry_t last; /* * Start looking either from the head of the list, or from the hint. */ simple_lock(&map->hint_lock); cur = map->hint; simple_unlock(&map->hint_lock); if (cur == &map->header) cur = cur->next; if (address >= cur->start) { /* * Go from hint to end of list. * * But first, make a quick check to see if we are already looking * at the entry we want (which is usually the case). Note also * that we don't need to save the hint here... it is the same * hint (unless we are at the header, in which case the hint * didn't buy us anything anyway). */ last = &map->header; if ((cur != last) && (cur->end > address)) { *entry = cur; return (TRUE); } } else { /* * Go from start to hint, *inclusively* */ last = cur->next; cur = map->header.next; } /* * Search linearly */ while (cur != last) { if (cur->end > address) { if (address >= cur->start) { /* * Save this lookup for future hints, and * return */ *entry = cur; SAVE_HINT(map, cur); return (TRUE); } break; } cur = cur->next; } *entry = cur->prev; SAVE_HINT(map, *entry); return (FALSE); } /* * Find sufficient space for `length' bytes in the given map, starting at * `start'. The map must be locked. Returns 0 on success, 1 on no space. */ int vm_map_findspace(map, start, length, addr) register vm_map_t map; register vm_offset_t start; vm_size_t length; vm_offset_t *addr; { register vm_map_entry_t entry, next; register vm_offset_t end; if (start < map->min_offset) start = map->min_offset; if (start > map->max_offset) return (1); /* * Look for the first possible address; if there's already something * at this address, we have to start after it. */ if (start == map->min_offset) { if ((entry = map->first_free) != &map->header) start = entry->end; } else { vm_map_entry_t tmp; if (vm_map_lookup_entry(map, start, &tmp)) start = tmp->end; entry = tmp; } /* * Look through the rest of the map, trying to fit a new region in the * gap between existing regions, or after the very last region. */ for (;; start = (entry = next)->end) { /* * Find the end of the proposed new region. Be sure we didn't * go beyond the end of the map, or wrap around the address; * if so, we lose. Otherwise, if this is the last entry, or * if the proposed new region fits before the next entry, we * win. */ end = start + length; if (end > map->max_offset || end < start) return (1); next = entry->next; if (next == &map->header || next->start >= end) break; } SAVE_HINT(map, entry); *addr = start; if (map == kernel_map && round_page(start + length) > kernel_vm_end) pmap_growkernel(round_page(start + length)); return (0); } /* * vm_map_find finds an unallocated region in the target address * map with the given length. The search is defined to be * first-fit from the specified address; the region found is * returned in the same parameter. * */ int vm_map_find(map, object, offset, addr, length, find_space) vm_map_t map; vm_object_t object; vm_offset_t offset; vm_offset_t *addr; /* IN/OUT */ vm_size_t length; boolean_t find_space; { register vm_offset_t start; int result, s = 0; start = *addr; vm_map_lock(map); if (map == kmem_map) s = splhigh(); if (find_space) { if (vm_map_findspace(map, start, length, addr)) { vm_map_unlock(map); if (map == kmem_map) splx(s); return (KERN_NO_SPACE); } start = *addr; } result = vm_map_insert(map, object, offset, start, start + length); vm_map_unlock(map); if (map == kmem_map) splx(s); return (result); } /* * vm_map_simplify_entry: [ internal use only ] * * Simplify the given map entry by: * removing extra sharing maps * [XXX maybe later] merging with a neighbor */ void vm_map_simplify_entry(map, entry) vm_map_t map; vm_map_entry_t entry; { #ifdef lint map++; #endif /* * If this entry corresponds to a sharing map, then see if we can * remove the level of indirection. If it's not a sharing map, then it * points to a VM object, so see if we can merge with either of our * neighbors. */ if (entry->is_sub_map) return; if (entry->is_a_map) { #if 0 vm_map_t my_share_map; int count; my_share_map = entry->object.share_map; simple_lock(&my_share_map->ref_lock); count = my_share_map->ref_count; simple_unlock(&my_share_map->ref_lock); if (count == 1) { /* * Can move the region from entry->start to entry->end * (+ entry->offset) in my_share_map into place of * entry. Later. */ } #endif } else { /* * Try to merge with our neighbors. * * Conditions for merge are: * * 1. entries are adjacent. 2. both entries point to objects * with null pagers. * * If a merge is possible, we replace the two entries with a * single entry, then merge the two objects into a single * object. * * Now, all that is left to do is write the code! */ } } /* * vm_map_clip_start: [ internal use only ] * * Asserts that the given entry begins at or after * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_start(map, entry, startaddr) \ { \ if (startaddr > entry->start) \ _vm_map_clip_start(map, entry, startaddr); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_start(map, entry, start) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t start; { register vm_map_entry_t new_entry; /* * See if we can simplify this entry first */ /* vm_map_simplify_entry(map, entry); */ /* * Split off the front portion -- note that we must insert the new * entry BEFORE this one, so that this entry has the specified * starting address. */ new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->end = start; entry->offset += (start - entry->start); entry->start = start; vm_map_entry_link(map, entry->prev, new_entry); if (entry->is_a_map || entry->is_sub_map) vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } /* * vm_map_clip_end: [ internal use only ] * * Asserts that the given entry ends at or before * the specified address; if necessary, * it splits the entry into two. */ #define vm_map_clip_end(map, entry, endaddr) \ { \ if (endaddr < entry->end) \ _vm_map_clip_end(map, entry, endaddr); \ } /* * This routine is called only when it is known that * the entry must be split. */ static void _vm_map_clip_end(map, entry, end) register vm_map_t map; register vm_map_entry_t entry; register vm_offset_t end; { register vm_map_entry_t new_entry; /* * Create a new entry and insert it AFTER the specified entry */ new_entry = vm_map_entry_create(map); *new_entry = *entry; new_entry->start = entry->end = end; new_entry->offset += (end - entry->start); vm_map_entry_link(map, entry, new_entry); if (entry->is_a_map || entry->is_sub_map) vm_map_reference(new_entry->object.share_map); else vm_object_reference(new_entry->object.vm_object); } /* * VM_MAP_RANGE_CHECK: [ internal use only ] * * Asserts that the starting and ending region * addresses fall within the valid range of the map. */ #define VM_MAP_RANGE_CHECK(map, start, end) \ { \ if (start < vm_map_min(map)) \ start = vm_map_min(map); \ if (end > vm_map_max(map)) \ end = vm_map_max(map); \ if (start > end) \ start = end; \ } /* * vm_map_submap: [ kernel use only ] * * Mark the given range as handled by a subordinate map. * * This range must have been created with vm_map_find, * and no other operations may have been performed on this * range prior to calling vm_map_submap. * * Only a limited number of operations can be performed * within this rage after calling vm_map_submap: * vm_fault * [Don't try vm_map_copy!] * * To remove a submapping, one must first remove the * range from the superior map, and then destroy the * submap (if desired). [Better yet, don't try it.] */ int vm_map_submap(map, start, end, submap) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; vm_map_t submap; { vm_map_entry_t entry; register int result = KERN_INVALID_ARGUMENT; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else entry = entry->next; vm_map_clip_end(map, entry, end); if ((entry->start == start) && (entry->end == end) && (!entry->is_a_map) && (entry->object.vm_object == NULL) && (!entry->copy_on_write)) { entry->is_a_map = FALSE; entry->is_sub_map = TRUE; vm_map_reference(entry->object.sub_map = submap); result = KERN_SUCCESS; } vm_map_unlock(map); return (result); } /* * vm_map_protect: * * Sets the protection of the specified address * region in the target map. If "set_max" is * specified, the maximum protection is to be set; * otherwise, only the current protection is affected. */ int vm_map_protect(map, start, end, new_prot, set_max) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_prot_t new_prot; register boolean_t set_max; { register vm_map_entry_t current; vm_map_entry_t entry; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &entry)) { vm_map_clip_start(map, entry, start); } else entry = entry->next; /* * Make a first pass to check for protection violations. */ current = entry; while ((current != &map->header) && (current->start < end)) { if (current->is_sub_map) { vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); } if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } current = current->next; } /* * Go back and fix up protections. [Note that clipping is not * necessary the second time.] */ current = entry; while ((current != &map->header) && (current->start < end)) { vm_prot_t old_prot; vm_map_clip_end(map, current, end); old_prot = current->protection; if (set_max) current->protection = (current->max_protection = new_prot) & old_prot; else current->protection = new_prot; /* * Update physical map if necessary. Worry about copy-on-write * here -- CHECK THIS XXX */ if (current->protection != old_prot) { #define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \ VM_PROT_ALL) #define max(a,b) ((a) > (b) ? (a) : (b)) if (current->is_a_map) { vm_map_entry_t share_entry; vm_offset_t share_end; vm_map_lock(current->object.share_map); (void) vm_map_lookup_entry( current->object.share_map, current->offset, &share_entry); share_end = current->offset + (current->end - current->start); while ((share_entry != ¤t->object.share_map->header) && (share_entry->start < share_end)) { pmap_protect(map->pmap, (max(share_entry->start, current->offset) - current->offset + current->start), min(share_entry->end, share_end) - current->offset + current->start, current->protection & MASK(share_entry)); share_entry = share_entry->next; } vm_map_unlock(current->object.share_map); } else pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(entry)); #undef max #undef MASK } current = current->next; } vm_map_unlock(map); return (KERN_SUCCESS); } /* * vm_map_inherit: * * Sets the inheritance of the specified address * range in the target map. Inheritance * affects how the map will be shared with * child maps at the time of vm_map_fork. */ int vm_map_inherit(map, start, end, new_inheritance) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_inherit_t new_inheritance; { register vm_map_entry_t entry; vm_map_entry_t temp_entry; switch (new_inheritance) { case VM_INHERIT_NONE: case VM_INHERIT_COPY: case VM_INHERIT_SHARE: break; default: return (KERN_INVALID_ARGUMENT); } vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (vm_map_lookup_entry(map, start, &temp_entry)) { entry = temp_entry; vm_map_clip_start(map, entry, start); } else entry = temp_entry->next; while ((entry != &map->header) && (entry->start < end)) { vm_map_clip_end(map, entry, end); entry->inheritance = new_inheritance; entry = entry->next; } vm_map_unlock(map); return (KERN_SUCCESS); } /* * vm_map_pageable: * * Sets the pageability of the specified address * range in the target map. Regions specified * as not pageable require locked-down physical * memory and physical page maps. * * The map must not be locked, but a reference * must remain to the map throughout the call. */ int vm_map_pageable(map, start, end, new_pageable) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register boolean_t new_pageable; { register vm_map_entry_t entry; vm_map_entry_t start_entry; register vm_offset_t failed = 0; int rv; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); /* * Only one pageability change may take place at one time, since * vm_fault assumes it will be called only once for each * wiring/unwiring. Therefore, we have to make sure we're actually * changing the pageability for the entire region. We do so before * making any changes. */ if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { vm_map_unlock(map); return (KERN_INVALID_ADDRESS); } entry = start_entry; /* * Actions are rather different for wiring and unwiring, so we have * two separate cases. */ if (new_pageable) { vm_map_clip_start(map, entry, start); /* * Unwiring. First ensure that the range to be unwired is * really wired down and that there are no holes. */ while ((entry != &map->header) && (entry->start < end)) { if (entry->wired_count == 0 || (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end))) { vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); } entry = entry->next; } /* * Now decrement the wiring count for each region. If a region * becomes completely unwired, unwire its physical pages and * mappings. */ lock_set_recursive(&map->lock); entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { vm_map_clip_end(map, entry, end); entry->wired_count--; if (entry->wired_count == 0) vm_fault_unwire(map, entry->start, entry->end); entry = entry->next; } lock_clear_recursive(&map->lock); } else { /* * Wiring. We must do this in two passes: * * 1. Holding the write lock, we create any shadow or zero-fill * objects that need to be created. Then we clip each map * entry to the region to be wired and increment its wiring * count. We create objects before clipping the map entries * to avoid object proliferation. * * 2. We downgrade to a read lock, and call vm_fault_wire to * fault in the pages for any newly wired area (wired_count is * 1). * * Downgrading to a read lock for vm_fault_wire avoids a possible * deadlock with another thread that may have faulted on one * of the pages to be wired (it would mark the page busy, * blocking us, then in turn block on the map lock that we * hold). Because of problems in the recursive lock package, * we cannot upgrade to a write lock in vm_map_lookup. Thus, * any actions that require the write lock must be done * beforehand. Because we keep the read lock on the map, the * copy-on-write status of the entries we modify here cannot * change. */ /* * Pass 1. */ while ((entry != &map->header) && (entry->start < end)) { if (entry->wired_count == 0) { /* * Perform actions of vm_map_lookup that need * the write lock on the map: create a shadow * object for a copy-on-write region, or an * object for a zero-fill region. * * We don't have to do this for entries that * point to sharing maps, because we won't * hold the lock on the sharing map. */ if (!entry->is_a_map && !entry->is_sub_map) { if (entry->needs_copy && ((entry->protection & VM_PROT_WRITE) != 0)) { vm_object_shadow(&entry->object.vm_object, &entry->offset, (vm_size_t) (entry->end - entry->start)); entry->needs_copy = FALSE; } else if (entry->object.vm_object == NULL) { entry->object.vm_object = vm_object_allocate((vm_size_t) (entry->end - entry->start)); entry->offset = (vm_offset_t) 0; } } } vm_map_clip_start(map, entry, start); vm_map_clip_end(map, entry, end); entry->wired_count++; /* * Check for holes */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { /* * Found one. Object creation actions do not * need to be undone, but the wired counts * need to be restored. */ while (entry != &map->header && entry->end > start) { entry->wired_count--; entry = entry->prev; } vm_map_unlock(map); return (KERN_INVALID_ARGUMENT); } entry = entry->next; } /* * Pass 2. */ /* * HACK HACK HACK HACK * * If we are wiring in the kernel map or a submap of it, unlock * the map to avoid deadlocks. We trust that the kernel * threads are well-behaved, and therefore will not do * anything destructive to this region of the map while we * have it unlocked. We cannot trust user threads to do the * same. * * HACK HACK HACK HACK */ if (vm_map_pmap(map) == kernel_pmap) { vm_map_unlock(map); /* trust me ... */ } else { lock_set_recursive(&map->lock); lock_write_to_read(&map->lock); } rv = 0; entry = start_entry; while (entry != &map->header && entry->start < end) { /* * If vm_fault_wire fails for any page we need to undo * what has been done. We decrement the wiring count * for those pages which have not yet been wired (now) * and unwire those that have (later). * * XXX this violates the locking protocol on the map, * needs to be fixed. */ if (rv) entry->wired_count--; else if (entry->wired_count == 1) { rv = vm_fault_wire(map, entry->start, entry->end); if (rv) { failed = entry->start; entry->wired_count--; } } entry = entry->next; } if (vm_map_pmap(map) == kernel_pmap) { vm_map_lock(map); } else { lock_clear_recursive(&map->lock); } if (rv) { vm_map_unlock(map); (void) vm_map_pageable(map, start, failed, TRUE); return (rv); } } vm_map_unlock(map); return (KERN_SUCCESS); } /* * vm_map_clean * * Push any dirty cached pages in the address range to their pager. * If syncio is TRUE, dirty pages are written synchronously. * If invalidate is TRUE, any cached pages are freed as well. * * Returns an error if any part of the specified range is not mapped. */ int vm_map_clean(map, start, end, syncio, invalidate) vm_map_t map; vm_offset_t start; vm_offset_t end; boolean_t syncio; boolean_t invalidate; { register vm_map_entry_t current; vm_map_entry_t entry; vm_size_t size; vm_object_t object; vm_offset_t offset; vm_map_lock_read(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &entry)) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } /* * Make a first pass to check for holes. */ for (current = entry; current->start < end; current = current->next) { if (current->is_sub_map) { vm_map_unlock_read(map); return (KERN_INVALID_ARGUMENT); } if (end > current->end && (current->next == &map->header || current->end != current->next->start)) { vm_map_unlock_read(map); return (KERN_INVALID_ADDRESS); } } /* * Make a second pass, cleaning/uncaching pages from the indicated * objects as we go. */ for (current = entry; current->start < end; current = current->next) { offset = current->offset + (start - current->start); size = (end <= current->end ? end : current->end) - start; if (current->is_a_map || current->is_sub_map) { register vm_map_t smap; vm_map_entry_t tentry; vm_size_t tsize; smap = current->object.share_map; vm_map_lock_read(smap); (void) vm_map_lookup_entry(smap, offset, &tentry); tsize = tentry->end - offset; if (tsize < size) size = tsize; object = tentry->object.vm_object; offset = tentry->offset + (offset - tentry->start); vm_map_unlock_read(smap); } else { object = current->object.vm_object; } if (object && (object->pager != NULL) && (object->pager->pg_type == PG_VNODE)) { vm_object_lock(object); /* * Flush pages if writing is allowed. XXX should we continue * on an error? */ if ((current->protection & VM_PROT_WRITE) && !vm_object_page_clean(object, offset, offset + size, syncio, FALSE)) { vm_object_unlock(object); vm_map_unlock_read(map); return (KERN_FAILURE); } if (invalidate) vm_object_page_remove(object, offset, offset + size); vm_object_unlock(object); } start += size; } vm_map_unlock_read(map); return (KERN_SUCCESS); } /* * vm_map_entry_unwire: [ internal use only ] * * Make the region specified by this entry pageable. * * The map in question should be locked. * [This is the reason for this routine's existence.] */ void vm_map_entry_unwire(map, entry) vm_map_t map; register vm_map_entry_t entry; { vm_fault_unwire(map, entry->start, entry->end); entry->wired_count = 0; } /* * vm_map_entry_delete: [ internal use only ] * * Deallocate the given entry from the target map. */ void vm_map_entry_delete(map, entry) register vm_map_t map; register vm_map_entry_t entry; { if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); vm_map_entry_unlink(map, entry); map->size -= entry->end - entry->start; if (entry->is_a_map || entry->is_sub_map) vm_map_deallocate(entry->object.share_map); else vm_object_deallocate(entry->object.vm_object); vm_map_entry_dispose(map, entry); } /* * vm_map_delete: [ internal use only ] * * Deallocates the given address range from the target * map. * * When called with a sharing map, removes pages from * that region from all physical maps. */ int vm_map_delete(map, start, end) register vm_map_t map; vm_offset_t start; register vm_offset_t end; { register vm_map_entry_t entry; vm_map_entry_t first_entry; /* * Find the start of the region, and clip it */ if (!vm_map_lookup_entry(map, start, &first_entry)) entry = first_entry->next; else { entry = first_entry; vm_map_clip_start(map, entry, start); /* * Fix the lookup hint now, rather than each time though the * loop. */ SAVE_HINT(map, entry->prev); } /* * Save the free space hint */ if (map->first_free->start >= start) map->first_free = entry->prev; /* * Step through all entries in this region */ while ((entry != &map->header) && (entry->start < end)) { vm_map_entry_t next; register vm_offset_t s, e; register vm_object_t object; vm_map_clip_end(map, entry, end); next = entry->next; s = entry->start; e = entry->end; /* * Unwire before removing addresses from the pmap; otherwise, * unwiring will put the entries back in the pmap. */ object = entry->object.vm_object; if (entry->wired_count != 0) vm_map_entry_unwire(map, entry); /* * If this is a sharing map, we must remove *all* references * to this data, since we can't find all of the physical maps * which are sharing it. */ if (object == kernel_object || object == kmem_object) vm_object_page_remove(object, entry->offset, entry->offset + (e - s)); else if (!map->is_main_map) vm_object_pmap_remove(object, entry->offset, entry->offset + (e - s)); else pmap_remove(map->pmap, s, e); /* * Delete the entry (which may delete the object) only after * removing all pmap entries pointing to its pages. * (Otherwise, its page frames may be reallocated, and any * modify bits will be set in the wrong object!) */ vm_map_entry_delete(map, entry); entry = next; } return (KERN_SUCCESS); } /* * vm_map_remove: * * Remove the given address range from the target map. * This is the exported form of vm_map_delete. */ int vm_map_remove(map, start, end) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; { register int result, s = 0; if (map == kmem_map) s = splhigh(); vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); result = vm_map_delete(map, start, end); vm_map_unlock(map); if (map == kmem_map) splx(s); return (result); } /* * vm_map_check_protection: * * Assert that the target map allows the specified * privilege on the entire address region given. * The entire region must be allocated. */ boolean_t vm_map_check_protection(map, start, end, protection) register vm_map_t map; register vm_offset_t start; register vm_offset_t end; register vm_prot_t protection; { register vm_map_entry_t entry; vm_map_entry_t tmp_entry; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { return (FALSE); } entry = tmp_entry; while (start < end) { if (entry == &map->header) { return (FALSE); } /* * No holes allowed! */ if (start < entry->start) { return (FALSE); } /* * Check protection associated with entry. */ if ((entry->protection & protection) != protection) { return (FALSE); } /* go to next entry */ start = entry->end; entry = entry->next; } return (TRUE); } /* * vm_map_copy_entry: * * Copies the contents of the source entry to the destination * entry. The entries *must* be aligned properly. */ void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) vm_map_t src_map, dst_map; register vm_map_entry_t src_entry, dst_entry; { vm_object_t temp_object; if (src_entry->is_sub_map || dst_entry->is_sub_map) return; if (dst_entry->object.vm_object != NULL && (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0) printf("vm_map_copy_entry: copying over permanent data!\n"); /* * If our destination map was wired down, unwire it now. */ if (dst_entry->wired_count != 0) vm_map_entry_unwire(dst_map, dst_entry); /* * If we're dealing with a sharing map, we must remove the destination * pages from all maps (since we cannot know which maps this sharing * map belongs in). */ if (dst_map->is_main_map) pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); else vm_object_pmap_remove(dst_entry->object.vm_object, dst_entry->offset, dst_entry->offset + (dst_entry->end - dst_entry->start)); if (src_entry->wired_count == 0) { boolean_t src_needs_copy; /* * If the source entry is marked needs_copy, it is already * write-protected. */ if (!src_entry->needs_copy) { boolean_t su; /* * If the source entry has only one mapping, we can * just protect the virtual address range. */ if (!(su = src_map->is_main_map)) { simple_lock(&src_map->ref_lock); su = (src_map->ref_count == 1); simple_unlock(&src_map->ref_lock); } if (su) { pmap_protect(src_map->pmap, src_entry->start, src_entry->end, src_entry->protection & ~VM_PROT_WRITE); } else { vm_object_pmap_copy(src_entry->object.vm_object, src_entry->offset, src_entry->offset + (src_entry->end - src_entry->start)); } } /* * Make a copy of the object. */ temp_object = dst_entry->object.vm_object; vm_object_copy(src_entry->object.vm_object, src_entry->offset, (vm_size_t) (src_entry->end - src_entry->start), &dst_entry->object.vm_object, &dst_entry->offset, &src_needs_copy); /* * If we didn't get a copy-object now, mark the source map * entry so that a shadow will be created to hold its changed * pages. */ if (src_needs_copy) src_entry->needs_copy = TRUE; /* * The destination always needs to have a shadow created. */ dst_entry->needs_copy = TRUE; /* * Mark the entries copy-on-write, so that write-enabling the * entry won't make copy-on-write pages writable. */ src_entry->copy_on_write = TRUE; dst_entry->copy_on_write = TRUE; /* * Get rid of the old object. */ vm_object_deallocate(temp_object); pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, dst_entry->end - dst_entry->start, src_entry->start); } else { /* * Of course, wired down pages can't be set copy-on-write. * Cause wired pages to be copied into the new map by * simulating faults (the new pages are pageable) */ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); } } /* * vm_map_copy: * * Perform a virtual memory copy from the source * address map/range to the destination map/range. * * If src_destroy or dst_alloc is requested, * the source and destination regions should be * disjoint, not only in the top-level map, but * in the sharing maps as well. [The best way * to guarantee this is to use a new intermediate * map to make copies. This also reduces map * fragmentation.] */ int vm_map_copy(dst_map, src_map, dst_addr, len, src_addr, dst_alloc, src_destroy) vm_map_t dst_map; vm_map_t src_map; vm_offset_t dst_addr; vm_size_t len; vm_offset_t src_addr; boolean_t dst_alloc; boolean_t src_destroy; { register vm_map_entry_t src_entry; register vm_map_entry_t dst_entry; vm_map_entry_t tmp_entry; vm_offset_t src_start; vm_offset_t src_end; vm_offset_t dst_start; vm_offset_t dst_end; vm_offset_t src_clip; vm_offset_t dst_clip; int result; boolean_t old_src_destroy; /* * XXX While we figure out why src_destroy screws up, we'll do it by * explicitly vm_map_delete'ing at the end. */ old_src_destroy = src_destroy; src_destroy = FALSE; /* * Compute start and end of region in both maps */ src_start = src_addr; src_end = src_start + len; dst_start = dst_addr; dst_end = dst_start + len; /* * Check that the region can exist in both source and destination. */ if ((dst_end < dst_start) || (src_end < src_start)) return (KERN_NO_SPACE); /* * Lock the maps in question -- we avoid deadlock by ordering lock * acquisition by map value */ if (src_map == dst_map) { vm_map_lock(src_map); } else if ((int) src_map < (int) dst_map) { vm_map_lock(src_map); vm_map_lock(dst_map); } else { vm_map_lock(dst_map); vm_map_lock(src_map); } result = KERN_SUCCESS; /* * Check protections... source must be completely readable and * destination must be completely writable. [Note that if we're * allocating the destination region, we don't have to worry about * protection, but instead about whether the region exists.] */ if (src_map->is_main_map && dst_map->is_main_map) { if (!vm_map_check_protection(src_map, src_start, src_end, VM_PROT_READ)) { result = KERN_PROTECTION_FAILURE; goto Return; } if (dst_alloc) { /* XXX Consider making this a vm_map_find instead */ if ((result = vm_map_insert(dst_map, NULL, (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS) goto Return; } else if (!vm_map_check_protection(dst_map, dst_start, dst_end, VM_PROT_WRITE)) { result = KERN_PROTECTION_FAILURE; goto Return; } } /* * Find the start entries and clip. * * Note that checking protection asserts that the lookup cannot fail. * * Also note that we wait to do the second lookup until we have done the * first clip, as the clip may affect which entry we get! */ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; vm_map_clip_start(src_map, src_entry, src_start); (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry); dst_entry = tmp_entry; vm_map_clip_start(dst_map, dst_entry, dst_start); /* * If both source and destination entries are the same, retry the * first lookup, as it may have changed. */ if (src_entry == dst_entry) { (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry); src_entry = tmp_entry; } /* * If source and destination entries are still the same, a null copy * is being performed. */ if (src_entry == dst_entry) goto Return; /* * Go through entries until we get to the end of the region. */ while (src_start < src_end) { /* * Clip the entries to the endpoint of the entire region. */ vm_map_clip_end(src_map, src_entry, src_end); vm_map_clip_end(dst_map, dst_entry, dst_end); /* * Clip each entry to the endpoint of the other entry. */ src_clip = src_entry->start + (dst_entry->end - dst_entry->start); vm_map_clip_end(src_map, src_entry, src_clip); dst_clip = dst_entry->start + (src_entry->end - src_entry->start); vm_map_clip_end(dst_map, dst_entry, dst_clip); /* * Both entries now match in size and relative endpoints. * * If both entries refer to a VM object, we can deal with them * now. */ if (!src_entry->is_a_map && !dst_entry->is_a_map) { vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry); } else { register vm_map_t new_dst_map; vm_offset_t new_dst_start; vm_size_t new_size; vm_map_t new_src_map; vm_offset_t new_src_start; /* * We have to follow at least one sharing map. */ new_size = (dst_entry->end - dst_entry->start); if (src_entry->is_a_map) { new_src_map = src_entry->object.share_map; new_src_start = src_entry->offset; } else { new_src_map = src_map; new_src_start = src_entry->start; lock_set_recursive(&src_map->lock); } if (dst_entry->is_a_map) { vm_offset_t new_dst_end; new_dst_map = dst_entry->object.share_map; new_dst_start = dst_entry->offset; /* * Since the destination sharing entries will * be merely deallocated, we can do that now, * and replace the region with a null object. * [This prevents splitting the source map to * match the form of the destination map.] * Note that we can only do so if the source * and destination do not overlap. */ new_dst_end = new_dst_start + new_size; if (new_dst_map != new_src_map) { vm_map_lock(new_dst_map); (void) vm_map_delete(new_dst_map, new_dst_start, new_dst_end); (void) vm_map_insert(new_dst_map, NULL, (vm_offset_t) 0, new_dst_start, new_dst_end); vm_map_unlock(new_dst_map); } } else { new_dst_map = dst_map; new_dst_start = dst_entry->start; lock_set_recursive(&dst_map->lock); } /* * Recursively copy the sharing map. */ (void) vm_map_copy(new_dst_map, new_src_map, new_dst_start, new_size, new_src_start, FALSE, FALSE); if (dst_map == new_dst_map) lock_clear_recursive(&dst_map->lock); if (src_map == new_src_map) lock_clear_recursive(&src_map->lock); } /* * Update variables for next pass through the loop. */ src_start = src_entry->end; src_entry = src_entry->next; dst_start = dst_entry->end; dst_entry = dst_entry->next; /* * If the source is to be destroyed, here is the place to do * it. */ if (src_destroy && src_map->is_main_map && dst_map->is_main_map) vm_map_entry_delete(src_map, src_entry->prev); } /* * Update the physical maps as appropriate */ if (src_map->is_main_map && dst_map->is_main_map) { if (src_destroy) pmap_remove(src_map->pmap, src_addr, src_addr + len); } /* * Unlock the maps */ Return:; if (old_src_destroy) vm_map_delete(src_map, src_addr, src_addr + len); vm_map_unlock(src_map); if (src_map != dst_map) vm_map_unlock(dst_map); return (result); } /* * vmspace_fork: * Create a new process vmspace structure and vm_map * based on those of an existing process. The new map * is based on the old map, according to the inheritance * values on the regions in that map. * * The source map must not be locked. */ struct vmspace * vmspace_fork(vm1) register struct vmspace *vm1; { register struct vmspace *vm2; vm_map_t old_map = &vm1->vm_map; vm_map_t new_map; vm_map_entry_t old_entry; vm_map_entry_t new_entry; pmap_t new_pmap; vm_map_lock(old_map); vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, old_map->entries_pageable); bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); new_pmap = &vm2->vm_pmap; /* XXX */ new_map = &vm2->vm_map; /* XXX */ old_entry = old_map->header.next; while (old_entry != &old_map->header) { if (old_entry->is_sub_map) panic("vm_map_fork: encountered a submap"); switch (old_entry->inheritance) { case VM_INHERIT_NONE: break; case VM_INHERIT_SHARE: /* * If we don't already have a sharing map: */ if (!old_entry->is_a_map) { vm_map_t new_share_map; vm_map_entry_t new_share_entry; /* * Create a new sharing map */ new_share_map = vm_map_create(NULL, old_entry->start, old_entry->end, TRUE); new_share_map->is_main_map = FALSE; /* * Create the only sharing entry from the old * task map entry. */ new_share_entry = vm_map_entry_create(new_share_map); *new_share_entry = *old_entry; new_share_entry->wired_count = 0; /* * Insert the entry into the new sharing map */ vm_map_entry_link(new_share_map, new_share_map->header.prev, new_share_entry); /* * Fix up the task map entry to refer to the * sharing map now. */ old_entry->is_a_map = TRUE; old_entry->object.share_map = new_share_map; old_entry->offset = old_entry->start; } /* * Clone the entry, referencing the sharing map. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; vm_map_reference(new_entry->object.share_map); /* * Insert the entry into the new map -- we know we're * inserting at the end of the new map. */ vm_map_entry_link(new_map, new_map->header.prev, new_entry); /* * Update the physical map */ pmap_copy(new_map->pmap, old_map->pmap, new_entry->start, (old_entry->end - old_entry->start), old_entry->start); break; case VM_INHERIT_COPY: /* * Clone the entry and link into the map. */ new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; new_entry->object.vm_object = NULL; new_entry->is_a_map = FALSE; vm_map_entry_link(new_map, new_map->header.prev, new_entry); if (old_entry->is_a_map) { int check; check = vm_map_copy(new_map, old_entry->object.share_map, new_entry->start, (vm_size_t) (new_entry->end - new_entry->start), old_entry->offset, FALSE, FALSE); if (check != KERN_SUCCESS) printf("vm_map_fork: copy in share_map region failed\n"); } else { vm_map_copy_entry(old_map, new_map, old_entry, new_entry); } break; } old_entry = old_entry->next; } new_map->size = old_map->size; vm_map_unlock(old_map); return (vm2); } /* * vm_map_lookup: * * Finds the VM object, offset, and * protection for a given virtual address in the * specified map, assuming a page fault of the * type specified. * * Leaves the map in question locked for read; return * values are guaranteed until a vm_map_lookup_done * call is performed. Note that the map argument * is in/out; the returned map must be used in * the call to vm_map_lookup_done. * * A handle (out_entry) is returned for use in * vm_map_lookup_done, to make that fast. * * If a lookup is requested with "write protection" * specified, the map may be changed to perform virtual * copying operations, although the data referenced will * remain the same. */ int vm_map_lookup(var_map, vaddr, fault_type, out_entry, object, offset, out_prot, wired, single_use) vm_map_t *var_map; /* IN/OUT */ register vm_offset_t vaddr; register vm_prot_t fault_type; vm_map_entry_t *out_entry; /* OUT */ vm_object_t *object; /* OUT */ vm_offset_t *offset; /* OUT */ vm_prot_t *out_prot; /* OUT */ boolean_t *wired; /* OUT */ boolean_t *single_use; /* OUT */ { vm_map_t share_map; vm_offset_t share_offset; register vm_map_entry_t entry; register vm_map_t map = *var_map; register vm_prot_t prot; register boolean_t su; RetryLookup:; /* * Lookup the faulting address. */ vm_map_lock_read(map); #define RETURN(why) \ { \ vm_map_unlock_read(map); \ return(why); \ } /* * If the map has an interesting hint, try it before calling full * blown lookup routine. */ simple_lock(&map->hint_lock); entry = map->hint; simple_unlock(&map->hint_lock); *out_entry = entry; if ((entry == &map->header) || (vaddr < entry->start) || (vaddr >= entry->end)) { vm_map_entry_t tmp_entry; /* * Entry was either not a valid hint, or the vaddr was not * contained in the entry, so do a full lookup. */ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); entry = tmp_entry; *out_entry = entry; } /* * Handle submaps. */ if (entry->is_sub_map) { vm_map_t old_map = map; *var_map = map = entry->object.sub_map; vm_map_unlock_read(old_map); goto RetryLookup; } /* * Check whether this task is allowed to have this page. */ prot = entry->protection; if ((fault_type & (prot)) != fault_type) RETURN(KERN_PROTECTION_FAILURE); /* * If this page is not pageable, we have to get it for all possible * accesses. */ *wired = (entry->wired_count != 0); if (*wired) prot = fault_type = entry->protection; /* * If we don't already have a VM object, track it down. */ su = !entry->is_a_map; if (su) { share_map = map; share_offset = vaddr; } else { vm_map_entry_t share_entry; /* * Compute the sharing map, and offset into it. */ share_map = entry->object.share_map; share_offset = (vaddr - entry->start) + entry->offset; /* * Look for the backing store object and offset */ vm_map_lock_read(share_map); if (!vm_map_lookup_entry(share_map, share_offset, &share_entry)) { vm_map_unlock_read(share_map); RETURN(KERN_INVALID_ADDRESS); } entry = share_entry; } /* * If the entry was copy-on-write, we either ... */ if (entry->needs_copy) { /* * If we want to write the page, we may as well handle that * now since we've got the sharing map locked. * * If we don't need to write the page, we just demote the * permissions allowed. */ if (fault_type & VM_PROT_WRITE) { /* * Make a new object, and place it in the object * chain. Note that no new references have appeared * -- one just moved from the share map to the new * object. */ if (lock_read_to_write(&share_map->lock)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; } vm_object_shadow( &entry->object.vm_object, &entry->offset, (vm_size_t) (entry->end - entry->start)); entry->needs_copy = FALSE; lock_write_to_read(&share_map->lock); } else { /* * We're attempting to read a copy-on-write page -- * don't allow writes. */ prot &= (~VM_PROT_WRITE); } } /* * Create an object if necessary. */ if (entry->object.vm_object == NULL) { if (lock_read_to_write(&share_map->lock)) { if (share_map != map) vm_map_unlock_read(map); goto RetryLookup; } entry->object.vm_object = vm_object_allocate( (vm_size_t) (entry->end - entry->start)); entry->offset = 0; lock_write_to_read(&share_map->lock); } /* * Return the object/offset from this entry. If the entry was * copy-on-write or empty, it has been fixed up. */ *offset = (share_offset - entry->start) + entry->offset; *object = entry->object.vm_object; /* * Return whether this is the only map sharing this data. */ if (!su) { simple_lock(&share_map->ref_lock); su = (share_map->ref_count == 1); simple_unlock(&share_map->ref_lock); } *out_prot = prot; *single_use = su; return (KERN_SUCCESS); #undef RETURN } /* * vm_map_lookup_done: * * Releases locks acquired by a vm_map_lookup * (according to the handle returned by that lookup). */ void vm_map_lookup_done(map, entry) register vm_map_t map; vm_map_entry_t entry; { /* * If this entry references a map, unlock it first. */ if (entry->is_a_map) vm_map_unlock_read(entry->object.share_map); /* * Unlock the main-level map */ vm_map_unlock_read(map); } /* * Routine: vm_map_simplify * Purpose: * Attempt to simplify the map representation in * the vicinity of the given starting address. * Note: * This routine is intended primarily to keep the * kernel maps more compact -- they generally don't * benefit from the "expand a map entry" technology * at allocation time because the adjacent entry * is often wired down. */ void vm_map_simplify(map, start) vm_map_t map; vm_offset_t start; { vm_map_entry_t this_entry; vm_map_entry_t prev_entry; vm_map_lock(map); if ( (vm_map_lookup_entry(map, start, &this_entry)) && ((prev_entry = this_entry->prev) != &map->header) && (prev_entry->end == start) && (map->is_main_map) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (this_entry->is_a_map == FALSE) && (this_entry->is_sub_map == FALSE) && (prev_entry->inheritance == this_entry->inheritance) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->copy_on_write == this_entry->copy_on_write) && (prev_entry->needs_copy == this_entry->needs_copy) && (prev_entry->object.vm_object == this_entry->object.vm_object) && ((prev_entry->offset + (prev_entry->end - prev_entry->start)) == this_entry->offset) ) { if (map->first_free == this_entry) map->first_free = prev_entry; if (!this_entry->object.vm_object->paging_in_progress) { SAVE_HINT(map, prev_entry); vm_map_entry_unlink(map, this_entry); prev_entry->end = this_entry->end; vm_object_deallocate(this_entry->object.vm_object); vm_map_entry_dispose(map, this_entry); } } vm_map_unlock(map); } /* * vm_map_print: [ debug ] */ void vm_map_print(map, full) register vm_map_t map; boolean_t full; { register vm_map_entry_t entry; extern int indent; iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", (map->is_main_map ? "Task" : "Share"), (int) map, (int) (map->pmap), map->ref_count, map->nentries, map->timestamp); if (!full && indent) return; indent += 2; for (entry = map->header.next; entry != &map->header; entry = entry->next) { iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", (int) entry, (int) entry->start, (int) entry->end); if (map->is_main_map) { static char *inheritance_name[4] = {"share", "copy", "none", "donate_copy"}; printf("prot=%x/%x/%s, ", entry->protection, entry->max_protection, inheritance_name[entry->inheritance]); if (entry->wired_count != 0) printf("wired, "); } if (entry->is_a_map || entry->is_sub_map) { printf("share=0x%x, offset=0x%x\n", (int) entry->object.share_map, (int) entry->offset); if ((entry->prev == &map->header) || (!entry->prev->is_a_map) || (entry->prev->object.share_map != entry->object.share_map)) { indent += 2; vm_map_print(entry->object.share_map, full); indent -= 2; } } else { printf("object=0x%x, offset=0x%x", (int) entry->object.vm_object, (int) entry->offset); if (entry->copy_on_write) printf(", copy (%s)", entry->needs_copy ? "needed" : "done"); printf("\n"); if ((entry->prev == &map->header) || (entry->prev->is_a_map) || (entry->prev->object.vm_object != entry->object.vm_object)) { indent += 2; vm_object_print(entry->object.vm_object, full); indent -= 2; } } } indent -= 2; }