From a85e535e696138054a7b461ea8072f0d76c5b668 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 20 Apr 2006 22:56:54 +0000 Subject: [PATCH] Major revisions to documentation. Rewrite VM assignment entirely, adding implementation order and terminology suggested by Godmar Back. Make the terminology used elsewhere agree. Rename "Tour" to "Reference Guide" and move to appendix. Add "Virtual Addresses" and "Page Tables" sections to reference guide. Rename "References" to "Bibliography" (to avoid confusion) and move to end of documentation. Get rid of option to eagerly load partial pages. Get rid of fullpage.lds linker script, so rename the normal one from normal.lds to user.lds. Break mmu.h into vaddr.h and pte.h. Add "mcat" to examples as a simpler example of mmap. NB: Any webpages that reference chapters in the documentation will need to be updated, because the chapters have been reordered. --- doc/44bsd.texi | 2 +- doc/Makefile | 4 +- doc/{references.texi => bibliography.texi} | 4 +- doc/debug.texi | 16 +- doc/devel.texi | 2 +- doc/doc.texi | 2 +- doc/filesys.texi | 2 +- doc/intro.texi | 8 +- doc/pintos.texi | 9 +- doc/{tour.texi => reference.texi} | 583 ++++++++++++-- doc/standards.texi | 4 +- doc/threads.texi | 25 +- doc/userprog.texi | 166 ++-- doc/vm.texi | 849 +++++++++++---------- doc/vm.tmpl | 40 +- solutions/p1.patch | 2 +- solutions/p2.patch | 4 +- solutions/p3.patch | 14 +- solutions/p4.patch | 14 +- src/Makefile.userprog | 5 +- src/devices/vga.c | 2 +- src/examples/Makefile | 3 +- src/examples/mcat.c | 45 ++ src/filesys/fsutil.c | 2 +- src/lib/user/fullpage.lds | 50 -- src/lib/user/{normal.lds => user.lds} | 0 src/threads/init.c | 2 +- src/threads/interrupt.c | 2 +- src/threads/malloc.c | 2 +- src/threads/mmu.h | 214 ------ src/threads/palloc.c | 2 +- src/threads/pte.h | 107 +++ src/threads/thread.c | 2 +- src/threads/vaddr.h | 89 +++ src/userprog/gdt.c | 2 +- src/userprog/pagedir.c | 26 +- src/userprog/process.c | 2 +- src/userprog/tss.c | 2 +- 38 files changed, 1348 insertions(+), 961 deletions(-) rename doc/{references.texi => bibliography.texi} (97%) rename doc/{tour.texi => reference.texi} (78%) create mode 100644 src/examples/mcat.c delete mode 100644 src/lib/user/fullpage.lds rename src/lib/user/{normal.lds => user.lds} (100%) delete mode 100644 src/threads/mmu.h create mode 100644 src/threads/pte.h create mode 100644 src/threads/vaddr.h diff --git a/doc/44bsd.texi b/doc/44bsd.texi index e442326..8cff146 100644 --- a/doc/44bsd.texi +++ b/doc/44bsd.texi @@ -1,4 +1,4 @@ -@node 4.4BSD Scheduler, Coding Standards, References, Top +@node 4.4BSD Scheduler @appendix 4.4@acronym{BSD} Scheduler @iftex diff --git a/doc/Makefile b/doc/Makefile index eb5c68e..53f232e 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,5 +1,5 @@ -TEXIS = pintos.texi intro.texi tour.texi threads.texi userprog.texi \ -vm.texi filesys.texi references.texi standards.texi doc.texi \ +TEXIS = pintos.texi intro.texi threads.texi userprog.texi vm.texi \ +filesys.texi bibliography.texi reference.texi standards.texi doc.texi \ sample.tmpl.texi devel.texi debug.texi 44bsd.texi all: pintos.html pintos.info pintos.dvi pintos.ps pintos.pdf diff --git a/doc/references.texi b/doc/bibliography.texi similarity index 97% rename from doc/references.texi rename to doc/bibliography.texi index 2a3b8d2..360aade 100644 --- a/doc/references.texi +++ b/doc/bibliography.texi @@ -1,5 +1,5 @@ -@node References, 4.4BSD Scheduler, Project 4--File Systems, Top -@appendix References +@node Bibliography +@unnumbered Bibliography @macro bibdfn{cite} @noindent @anchor{\cite\} diff --git a/doc/debug.texi b/doc/debug.texi index b478afd..8a52be5 100644 --- a/doc/debug.texi +++ b/doc/debug.texi @@ -1,4 +1,4 @@ -@node Debugging Tools, Development Tools, Project Documentation, Top +@node Debugging Tools @appendix Debugging Tools Many tools lie at your disposal for debugging Pintos. This appendix @@ -340,8 +340,8 @@ Example: @code{dumplist all_list thread all_elem} prints all elements of Shows the backtrace of @var{thread}, which is a pointer to the @struct{thread} of the thread whose backtrace it should show. For the current thread, this is identical to the @code{bt} (backtrace) command. -It also works for threads that are suspended in @func{schedule}, -provided you know where their kernel stack page is located. +It also works for any thread suspended in @func{schedule}, +provided you know where its kernel stack page is located. @end deffn @deffn {GDB Macro} btthreadlist list element @@ -395,8 +395,8 @@ break point in @func{page_fault} in @file{exception.c}, which you will need to modify accordingly. In Project 3, a page fault in a user process no longer automatically -leads to the termination of a process. Rather, you may have to page in -the page containing the address the process was trying to access, either +leads to the termination of a process. Instead, it may require reading in +data for the page the process was trying to access, either because it was swapped out or because this is the first time it's accessed. In either case, you will reach @func{page_fault} and need to take the appropriate action there. @@ -710,13 +710,13 @@ above, a good place to start adding @func{printf}s is @section Tips The page allocator in @file{threads/palloc.c} and the block allocator in -@file{threads/malloc.c} both clear all the bytes in pages and blocks to -@t{0xcc} when they are freed. Thus, if you see an attempt to +@file{threads/malloc.c} clear all the bytes in memory to +@t{0xcc} at time of free. Thus, if you see an attempt to dereference a pointer like @t{0xcccccccc}, or some other reference to @t{0xcc}, there's a good chance you're trying to reuse a page that's already been freed. Also, byte @t{0xcc} is the CPU opcode for ``invoke interrupt 3,'' so if you see an error like @code{Interrupt 0x03 (#BP -Breakpoint Exception)}, Pintos tried to execute code in a freed page or +Breakpoint Exception)}, then Pintos tried to execute code in a freed page or block. An assertion failure on the expression @code{sec_no < d->capacity} diff --git a/doc/devel.texi b/doc/devel.texi index 60851df..48ed58d 100644 --- a/doc/devel.texi +++ b/doc/devel.texi @@ -1,4 +1,4 @@ -@node Development Tools, , Debugging Tools, Top +@node Development Tools @appendix Development Tools Here are some tools that you might find useful while developing code. diff --git a/doc/doc.texi b/doc/doc.texi index c43a5bb..5d516d3 100644 --- a/doc/doc.texi +++ b/doc/doc.texi @@ -1,4 +1,4 @@ -@node Project Documentation, Debugging Tools, Coding Standards, Top +@node Project Documentation @appendix Project Documentation This chapter presents a sample assignment and a filled-in design diff --git a/doc/filesys.texi b/doc/filesys.texi index e724d96..4db5665 100644 --- a/doc/filesys.texi +++ b/doc/filesys.texi @@ -1,4 +1,4 @@ -@node Project 4--File Systems, References, Project 3--Virtual Memory, Top +@node Project 4--File Systems @chapter Project 4: File Systems In the previous two assignments, you made extensive use of a diff --git a/doc/intro.texi b/doc/intro.texi index 5a0da39..4a517f2 100644 --- a/doc/intro.texi +++ b/doc/intro.texi @@ -1,4 +1,4 @@ -@node Introduction, Pintos Tour, Top, Top +@node Introduction @chapter Introduction Welcome to Pintos. Pintos is a simple operating system framework for @@ -531,16 +531,14 @@ work. The original structure and form of Pintos was inspired by the Nachos instructional operating system from the University of California, -Berkeley. A few of the source files were originally more-or-less -literal translations of the Nachos C++ code into C. These files bear -the original UCB license notice. +Berkeley. A few of the Pintos source files are derived from code used in the Massachusetts Institute of Technology's 6.828 advanced operating systems course. These files bear the original MIT license notice. The Pintos projects and documentation originated with those designed for -Nachos by current and former CS140 teaching assistants at Stanford +Nachos by current and former CS 140 teaching assistants at Stanford University, including at least Yu Ping, Greg Hutchins, Kelly Shaw, Paul Twohey, Sameer Qureshi, and John Rector. diff --git a/doc/pintos.texi b/doc/pintos.texi index 02db016..777b2f4 100644 --- a/doc/pintos.texi +++ b/doc/pintos.texi @@ -41,6 +41,7 @@ @author by Ben Pfaff @end titlepage +@shortcontents @contents @ifnottex @@ -50,30 +51,30 @@ @menu * Introduction:: -* Pintos Tour:: * Project 1--Threads:: * Project 2--User Programs:: * Project 3--Virtual Memory:: * Project 4--File Systems:: -* References:: +* Reference Guide:: * 4.4BSD Scheduler:: * Coding Standards:: * Project Documentation:: * Debugging Tools:: * Development Tools:: +* Bibliography:: @end menu @include intro.texi -@include tour.texi @include threads.texi @include userprog.texi @include vm.texi @include filesys.texi -@include references.texi +@include reference.texi @include 44bsd.texi @include standards.texi @include doc.texi @include debug.texi @include devel.texi +@include bibliography.texi @bye diff --git a/doc/tour.texi b/doc/reference.texi similarity index 78% rename from doc/tour.texi rename to doc/reference.texi index 2e15b38..b51c44e 100644 --- a/doc/tour.texi +++ b/doc/reference.texi @@ -1,23 +1,25 @@ -@node Pintos Tour, Project 1--Threads, Introduction, Top -@chapter A Tour Through Pintos +@node Reference Guide +@appendix Reference Guide -This chapter is a brief tour through the Pintos code. It covers the +This chapter is a reference for the Pintos code. It covers the entire code base, but you'll only be using Pintos one part at a time, so you may find that you want to read each part as you work on the -corresponding project. +project where it becomes important. -(Actually, the tour is currently incomplete. It fully covers only the -threads project.) +(Actually, the reference guide is currently incomplete.) We recommend using ``tags'' to follow along with references to function and variable names (@pxref{Tags}). @menu * Pintos Loading:: -* Threads Tour:: -* User Programs Tour:: -* Virtual Memory Tour:: -* File Systems Tour:: +* Threads:: +* Synchronization:: +* Interrupt Handling:: +* Memory Allocation:: +* Virtual Addresses:: +* Page Table:: +* Hash Table:: @end menu @node Pintos Loading @@ -115,7 +117,7 @@ encounter in Pintos from here on out. When @func{main} starts, the system is in a pretty raw state. We're in protected mode with paging enabled, but hardly anything else is ready. Thus, the @func{main} function consists primarily of calls -into other Pintos modules' initialization functions. +into other Pintos modules' initialization functions. These are usually named @func{@var{module}_init}, where @var{module} is the module's name, @file{@var{module}.c} is the module's source code, and @file{@var{module}.h} is the module's @@ -197,18 +199,8 @@ call @func{power_off} to terminate the machine simulator. Otherwise, @func{main} calls @func{thread_exit}, which allows any other running threads to continue running. -@node Threads Tour -@section Threads Project - -@menu -* Thread Support:: -* Synchronization:: -* Interrupt Handling:: -* Memory Allocation:: -@end menu - -@node Thread Support -@subsection Thread Support +@node Threads +@section Threads @menu * struct thread:: @@ -217,12 +209,12 @@ threads to continue running. @end menu @node struct thread -@subsubsection @code{struct thread} +@subsection @code{struct thread} The main Pintos data structure for threads is @struct{thread}, declared in @file{threads/thread.h}. -@deftp {Structure} {@struct{thread}} +@deftp {Structure} {struct thread} Represents a thread or a user process. In the projects, you will have to add your own members to @struct{thread}. You may also change or delete the definitions of existing members. @@ -355,7 +347,7 @@ the final member. @end deftypecv @node Thread Functions -@subsubsection Thread Functions +@subsection Thread Functions @file{threads/thread.c} implements several public functions for thread support. Let's take a look at the most useful: @@ -464,7 +456,7 @@ Skeletons for the advanced scheduler. @xref{4.4BSD Scheduler}. @end deftypefun @node Thread Switching -@subsubsection Thread Switching +@subsection Thread Switching @func{schedule} is the function responsible for switching threads. It is internal to @file{threads/thread.c} and called only by the three @@ -531,7 +523,7 @@ interrupts and calls the thread's function (the function passed to @end itemize @node Synchronization -@subsection Synchronization +@section Synchronization If sharing of resources between threads is not handled in a careful, controlled fashion, then the result is usually a big mess. @@ -548,7 +540,7 @@ synchronization primitives to help out. @end menu @node Disabling Interrupts -@subsubsection Disabling Interrupts +@subsection Disabling Interrupts The crudest way to do synchronization is to disable interrupts, that is, to temporarily prevent the CPU from responding to interrupts. If @@ -599,7 +591,7 @@ Turns interrupts off. Returns the previous interrupt state. @end deftypefun @node Semaphores -@subsubsection Semaphores +@subsection Semaphores Pintos' semaphore type and operations are declared in @file{threads/synch.h}. @@ -669,7 +661,7 @@ a list of waiting threads, using the linked list implementation in @file{lib/kernel/list.c}. @node Locks -@subsubsection Locks +@subsection Locks Lock types and functions are declared in @file{threads/synch.h}. @@ -714,7 +706,7 @@ false otherwise. @end deftypefun @node Condition Variables -@subsubsection Condition Variables +@subsection Condition Variables Condition variable types and functions are declared in @file{threads/synch.h}. @@ -768,7 +760,7 @@ monitor lock @var{lock}). @var{lock} must be held before calling this function. @end deftypefun -@subsubheading Monitor Example +@subsubsection Monitor Example The classical example of a monitor is handling a buffer into which one ``producer'' thread writes characters and out of which a second @@ -810,7 +802,7 @@ char get (void) @{ @end example @node Memory Barriers -@subsubsection Memory Barriers +@subsection Memory Barriers Suppose we add a ``feature'' that, whenever a timer interrupt occurs, the character in global variable @code{timer_put_char} is @@ -870,7 +862,7 @@ intr_set_level (old_level); A second possibility is to mark the declarations of @code{timer_put_char} and @code{timer_do_put} as @samp{volatile}. This keyword tells the compiler that the variables are externally observable -and allows it less latitude for optimization. However, the semantics of +and restricts its latitude for optimization. However, the semantics of @samp{volatile} are not well-defined, so it is not a good general solution. @@ -901,7 +893,7 @@ definition, because the compiler may read and parse the entire source file before performing optimization. @node Interrupt Handling -@subsection Interrupt Handling +@section Interrupt Handling An @dfn{interrupt} notifies the CPU of some event. Much of the work of an operating system relates to interrupts in one way or another. @@ -946,7 +938,7 @@ also want to skim chapter 5, ``Interrupt and Exception Handling,'' in @end menu @node Interrupt Infrastructure -@subsubsection Interrupt Infrastructure +@subsection Interrupt Infrastructure When an interrupt occurs while the kernel is running, the CPU saves its most essential state on the stack and jumps to an interrupt @@ -1017,7 +1009,7 @@ interrupts. @deftypecv {Member} {@struct{intr_frame}} void (*eip) (void) The address of the next instruction to be executed by the interrupted -thread. +thread. @end deftypecv @deftypecv {Member} {@struct{intr_frame}} {void *} esp @@ -1030,7 +1022,7 @@ Returns the name of the interrupt numbered @var{vec}, or @end deftypefun @node Internal Interrupt Handling -@subsubsection Internal Interrupt Handling +@subsection Internal Interrupt Handling When an internal interrupt occurs, it is because the running kernel thread (or, starting from project 2, the running user process) has @@ -1068,7 +1060,7 @@ starting with project 2). @end deftypefun @node External Interrupt Handling -@subsubsection External Interrupt Handling +@subsection External Interrupt Handling Whereas an internal interrupt runs in the context of the thread that caused it, external interrupts do not have any predictable context. @@ -1140,7 +1132,7 @@ when a thread's time slice expires. @end deftypefun @node Memory Allocation -@subsection Memory Allocation +@section Memory Allocation Pintos contains two memory allocators, one that allocates memory in units of a page, and one that can allocate blocks of any size. @@ -1151,7 +1143,7 @@ units of a page, and one that can allocate blocks of any size. @end menu @node Page Allocator -@subsubsection Page Allocator +@subsection Page Allocator The page allocator declared in @file{threads/palloc.h} allocates memory in units of a page. It is most often used to allocate memory @@ -1237,7 +1229,7 @@ or @func{palloc_get_multiple}. @end deftypefun @node Block Allocator -@subsubsection Block Allocator +@subsection Block Allocator The block allocator, declared in @file{threads/malloc.h}, can allocate blocks of any size. It is layered on top of the page allocator @@ -1247,7 +1239,7 @@ allocator are obtained from the kernel pool. The block allocator uses two different strategies for allocating memory. The first of these applies to ``small'' blocks, those 1 kB or smaller (one -fourth of the the page size). These allocations are rounded up to the +fourth of the page size). These allocations are rounded up to the nearest power of 2, or 16 bytes, whichever is larger. Then they are grouped into a page used only for allocations of the smae size. @@ -1256,7 +1248,7 @@ The second strategy applies to allocating ``large'' blocks, those larger than 1 kB. These allocations (plus a small amount of overhead) are rounded up to the nearest page in size, and then the block allocator requests that -number of contiguous pages from the page allocator. +number of contiguous pages from the page allocator. In either case, the difference between the allocation requested size and the actual block size is wasted. A real operating system would @@ -1280,31 +1272,458 @@ a debugging aid (@pxref{Debugging Tips}). The block allocator may not be called from interrupt context. -@node User Programs Tour -@section User Programs Project +@node Virtual Addresses +@section Virtual Addresses -No tour for this project is available. +A 32-bit virtual address can be divided into a 20-bit @dfn{page number} +and a 12-bit @dfn{page offset} (or just @dfn{offset}), like this: + +@example +@group + 31 12 11 0 + +-------------------+-----------+ + | Page Number | Offset | + +-------------------+-----------+ + Virtual Address +@end group +@end example + +Header @file{threads/vaddr.h} defines these functions and macros for +working with virtual addresses: + +@defmac PGSHIFT +@defmacx PGBITS +The bit index (0) and number of bits (12) in the offset part of a +virtual address, respectively. +@end defmac + +@defmac PGMASK +A bit mask with value @t{0xfff}, so that the bits in the page offset are +set to 1 and other bits set to 0. +@end defmac + +@defmac PGSIZE +The page size in bytes (4096). +@end defmac + +@deftypefun unsigned pg_ofs (const void *@var{va}) +Extracts and returns the page offset in virtual address @var{va}. +@end deftypefun + +@deftypefun uintptr_t pg_no (const void *@var{va}) +Extracts and returns the page number in virtual address @var{va}. +@end deftypefun + +@deftypefun {void *} pg_round_down (const void *@var{va}) +Returns the start of the virtual page that @var{va} points within, that +is, @var{va} with the page offset set to 0. +@end deftypefun + +@deftypefun {void *} pg_round_up (const void *@var{va}) +Returns @var{va} rounded up to the nearest page boundary. +@end deftypefun -@node Virtual Memory Tour -@section Virtual Memory Project +Virtual memory in Pintos is divided into two regions: user virtual +memory and kernel virtual memory. The boundary between them is +@code{PHYS_BASE}: -Only some parts of the tour for this project are available. +@defmac PHYS_BASE +Base address of kernel virtual memory. It defaults to @t{0xc0000000} (3 +GB), but it may be changed to any multiple of @t{0x10000000} from +@t{0x80000000} to @t{0xf0000000}. + +User virtual memory ranges from virtual address 0 up to +@code{PHYS_BASE}. Kernel virtual memory occupies the rest of the +virtual address space, from @code{PHYS_BASE} up to 4 GB. +@end defmac + +@deftypefun {bool} is_user_vaddr (const void *@var{va}) +@deftypefunx {bool} is_kernel_vaddr (const void *@var{va}) +Returns true if @var{va} is a user or kernel virtual address, +respectively, false otherwise. +@end deftypefun + +The 80@var{x}86 doesn't provide any way to directly access memory given +a physical address. This ability is often necessary in an operating +system kernel, so Pintos works around it by mapping kernel virtual +memory one-to-one to physical memory. That is, virtual address +@code{PHYS_BASE} accesses physical address 0, virtual address +@code{PHYS_BASE} + @t{0x1234} accesses physical address @t{0x1234}, and +so on up to the size of the machine's physical memory. Thus, adding +@code{PHYS_BASE} to a physical address obtains a kernel virtual address +that accesses that address; conversely, subtracting @code{PHYS_BASE} +from a kernel virtual address obtains the corresponding physical +address. Header @file{threads/vaddr.h} provides a pair of functions to +do these translations: + +@deftypefun {void *} ptov (uintptr_t @var{pa}) +Returns the kernel virtual address corresponding to physical address +@var{pa}, which should be between 0 and the number of bytes of physical +memory. +@end deftypefun + +@deftypefun {uintptr_t} vtop (void *@var{va}) +Returns the physical address corresponding to @var{va}, which must be a +kernel virtual address. +@end deftypefun + +@node Page Table +@section Page Table + +The code in @file{pagedir.c} is an abstract interface to the 80@var{x}86 +hardware page table, also called a ``page directory'' by Intel processor +documentation. The page table interface uses a @code{uint32_t *} to +represent a page table because this is convenient for accessing their +internal structure. + +The sections below describe the page table interface and internals. @menu -* Hash Table:: +* Page Table Creation Destruction Activation:: +* Page Tables Inspection and Updates:: +* Page Table Accessed and Dirty Bits:: +* Page Table Details:: @end menu -@node Hash Table -@subsection Hash Table +@node Page Table Creation Destruction Activation +@subsection Creation, Destruction, and Activation -Most implementations of the virtual memory project use a hash table to -translate virtual page frames to physical page frames. It is possible -to do this translation without adding a new data structure, by modifying -the code in @file{userprog/pagedir.c}. However, if you do that you'll -need to carefully study and understand section 3.7, ``Page Translation -Using 32-Bit Physical Addressing,'' in @bibref{IA32-v3a}, and in practice -it is probably easier to add a new data structure. You may find other -uses for hash tables as well. +These functions create, destroy, and activate page tables. The base +Pintos code already calls these functions where necessary, so it should +not be necessary to call them yourself. + +@deftypefun {uint32_t *} pagedir_create (void) +Creates and returns a new page table. The new page table contains +Pintos's normal kernel virtual page mappings, but no user virtual +mappings. + +Returns a null pointer if memory cannot be obtained. +@end deftypefun + +@deftypefun void pagedir_destroy (uint32_t *@var{pd}) +Frees all of the resources held by @var{pd}, including the page table +itself and the frames that it maps. +@end deftypefun + +@deftypefun void pagedir_activate (uint32_t *@var{pd}) +Activates @var{pd}. The active page table is the one used by the CPU to +translate memory references. +@end deftypefun + +@node Page Tables Inspection and Updates +@subsection Inspection and Updates + +These functions examine or update the mappings from pages to frames +encapsulated by a page table. They work on both active and inactive +page tables (that is, those for running and suspended processes), +flushing the TLB as necessary. + +User page parameters (@var{upage})to these functions should be user +virtual addresses. Kernel page parameters (@var{kpage}) should be +kernel virtual addresses and should have been obtained from the user +pool with @code{palloc_get_page(PAL_USER)} (@pxref{Why PAL_USER?}). + +@deftypefun bool pagedir_set_page (uint32_t *@var{pd}, void *@var{upage}, void *@var{kpage}, bool @var{writable}) +Adds to @var{pd} a mapping from page @var{upage} to the frame identified +by kernel virtual address @var{kpage}. If @var{writable} is true, the +page is mapped read/write; otherwise, it is mapped read-only. + +Page @var{upage} must not already be mapped. If it is, the kernel +panics. + +Returns true if successful, false on failure. Failure will occur if +additional memory required for the page table cannot be obtained. +@end deftypefun + +@deftypefun {void *} pagedir_get_page (uint32_t *@var{pd}, const void *@var{uaddr}) +Looks up the frame mapped to @var{upage} in @var{pd}. Returns the +kernel virtual address for that frame, if @var{upage} is mapped, or a +null pointer if it is not. +@end deftypefun + +@deftypefun void pagedir_clear_page (uint32_t *@var{pd}, void *@var{upage}) +Marks page @var{upage} ``not present'' in @var{pd}. Later accesses to +the page will fault. + +Other bits in the page table for @var{upage} are preserved, permitting +the accessed and dirty bits (see the next section) to be checked. + +If @var{upage} is not mapped, this function has no effect. +@end deftypefun + +@node Page Table Accessed and Dirty Bits +@subsection Accessed and Dirty Bits + +80@var{x}86 hardware provides some assistance for implementing page +replacement algorithms, through a pair of bits in the page table entry +(PTE) for each page. On any read or write to a page, the CPU sets the +@dfn{accessed bit} to 1 in the page's PTE, and on any write, the CPU +sets the @dfn{dirty bit} to 1. The CPU never resets these bits to 0, +but the OS may do so. + +Proper interpretation of these bits requires understanding of +@dfn{aliases}, that is, two (or more) pages that refer to the same +frame. When an aliased frame is accessed, the accessed and dirty bits +are updated in only one page table entry (the one for the page used for +access). The accessed and dirty bits for the other aliases are not +updated. + +@xref{Accessed and Dirty Bits}, on applying these bits in implementing +page replacement algorithms. + +@deftypefun bool pagedir_is_dirty (uint32_t *@var{pd}, const void *@var{page}) +@deftypefunx bool pagedir_is_accessed (uint32_t *@var{pd}, const void *@var{page}) +Returns true if page directory @var{pd} contains a page table entry for +@var{page} that is marked dirty (or accessed). Otherwise, +returns false. +@end deftypefun + +@deftypefun void pagedir_set_dirty (uint32_t *@var{pd}, const void *@var{page}, bool @var{value}) +@deftypefunx void pagedir_set_accessed (uint32_t *@var{pd}, const void *@var{page}, bool @var{value}) +If page directory @var{pd} has a page table entry for @var{page}, then +its dirty (or accessed) bit is set to @var{value}. +@end deftypefun + +@node Page Table Details +@subsection Page Table Details + +The functions provided with Pintos are sufficient to implement the +projects. However, you may still find it worthwhile to understand the +hardware page table format, so we'll go into a little detail in this +section. + +@menu +* Page Table Structure:: +* Page Table Entry Format:: +* Page Directory Entry Format:: +@end menu + +@node Page Table Structure +@subsubsection Structure + +The top-level paging data structure is a page called the ``page +directory'' (PD) arranged as an array of 1,024 32-bit page directory +entries (PDEs), each of which represents 4 MB of virtual memory. Each +PDE may point to the physical address of another page called a +``page table'' (PT) arranged, similarly, as an array of 1,024 +32-bit page table entries (PTEs), each of which translates a single 4 +kB virtual page to a physical page. + +Translation of a virtual address into a physical address follows +the three-step process illustrated in the diagram +below:@footnote{Actually, virtual to physical translation on the +80@var{x}86 architecture occurs via an intermediate ``linear +address,'' but Pintos (and most modern 80@var{x}86 OSes) set up the CPU +so that linear and virtual addresses are one and the same. Thus, you +can effectively ignore this CPU feature.} + +@enumerate 1 +@item +The most-significant 10 bits of the virtual address (bits 22@dots{}31) +index the page directory. If the PDE is marked ``present,'' the +physical address of a page table is read from the PDE thus obtained. +If the PDE is marked ``not present'' then a page fault occurs. + +@item +The next 10 bits of the virtual address (bits 12@dots{}21) index +the page table. If the PTE is marked ``present,'' the physical +address of a data page is read from the PTE thus obtained. If the PTE +is marked ``not present'' then a page fault occurs. + +@item +The least-significant 12 bits of the virtual address (bits 0@dots{}11) +are added to the data page's physical base address, yielding the final +physical address. +@end enumerate + +@example +@group + 31 22 21 12 11 0 ++----------------------+----------------------+----------------------+ +| Page Directory Index | Page Table Index | Page Offset | ++----------------------+----------------------+----------------------+ + | | | + _______/ _______/ _____/ + / / / + / Page Directory / Page Table / Data Page + / .____________. / .____________. / .____________. + |1,023|____________| |1,023|____________| | |____________| + |1,022|____________| |1,022|____________| | |____________| + |1,021|____________| |1,021|____________| \__\|____________| + |1,020|____________| |1,020|____________| /|____________| + | | | | | | | | + | | | \____\| |_ | | + | | . | /| . | \ | . | + \____\| . |_ | . | | | . | + /| . | \ | . | | | . | + | . | | | . | | | . | + | | | | | | | | + |____________| | |____________| | |____________| + 4|____________| | 4|____________| | |____________| + 3|____________| | 3|____________| | |____________| + 2|____________| | 2|____________| | |____________| + 1|____________| | 1|____________| | |____________| + 0|____________| \__\0|____________| \____\|____________| + / / +@end group +@end example + +Pintos provides some macros and functions that are useful for working +with raw page tables: + +@defmac PTSHIFT +@defmacx PTBITS +The bit index (12) and number of bits (10), respectively, in a page table +index within a virtual address. +@end defmac + +@defmac PTMASK +A bit mask with the bits in the page table index set to 1 and other bits +set to 0. +@end defmac + +@defmac PTSPAN +The number of bytes of virtual address space that a single page table +page covers (4,194,304 bytes, or 4 MB). +@end defmac + +@defmac PDSHIFT +@defmacx PDBITS +The bit index (22) and number of bits (10), respectively, in a page +directory index within a virtual address. +@end defmac + +@defmac PDMASK +A bit mask with the bits in the page directory index set to 1 and other +bits set to 0. +@end defmac + +@deftypefun uintptr_t pd_no (const void *@var{va}) +@deftypefunx uintptr_t pt_no (const void *@var{va}) +Returns the page directory index or page table index, respectively, for +virtual address @var{va}. These functions are defined in +@file{threads/pte.h}. +@end deftypefun + +@deftypefun unsigned pg_ofs (const void *@var{va}) +Returns the page offset for virtual address @var{va}. This function is +defined in @file{threads/vaddr.h}. +@end deftypefun + +@node Page Table Entry Format +@subsubsection Page Table Entry Format + +You do not need to understand the PTE format to do the Pintos +projects, unless you wish to incorporate the page table into your +segment table (@pxref{Managing the Segment Table}). + +The actual format of a page table entry is summarized below. For +complete information, refer to section 3.7, ``Page Translation Using +32-Bit Physical Addressing,'' in @bibref{IA32-v3a}. + +@example +@group + 31 12 11 9 6 5 2 1 0 ++---------------------------------------+----+----+-+-+---+-+-+-+ +| Physical Address | AVL| |D|A| |U|W|P| ++---------------------------------------+----+----+-+-+---+-+-+-+ +@end group +@end example + +Some more information on each bit is given below. The names are +@file{threads/pte.h} macros that represent the bits' values: + +@defmac PTE_P +Bit 0, the ``present'' bit. When this bit is 1, the +other bits are interpreted as described below. When this bit is 0, any +attempt to access the page will page fault. The remaining bits are then +not used by the CPU and may be used by the OS for any purpose. +@end defmac + +@defmac PTE_W +Bit 1, the ``read/write'' bit. When it is 1, the page +is writable. When it is 0, write attempts will page fault. +@end defmac + +@defmac PTE_U +Bit 2, the ``user/supervisor'' bit. When it is 1, user +processes may access the page. When it is 0, only the kernel may access +the page (user accesses will page fault). + +Pintos clears this bit in PTEs for kernel virtual memory, to prevent +user processes from accessing them. +@end defmac + +@defmac PTE_A +Bit 5, the ``accessed'' bit. @xref{Page Table Accessed +and Dirty Bits}. +@end defmac + +@defmac PTE_D +Bit 6, the ``dirty'' bit. @xref{Page Table Accessed and +Dirty Bits}. +@end defmac + +@defmac PTE_AVL +Bits 9@dots{}11, available for operating system use. +Pintos, as provided, does not use them and sets them to 0. +@end defmac + +@defmac PTE_ADDR +Bits 12@dots{}31, the top 20 bits of the physical address of a frame. +The low 12 bits of the frame's address are always 0. +@end defmac + +Other bits are either reserved or uninteresting in a Pintos context and +should be set to@tie{}0. + +Header @file{threads/pte.h} defines three functions for working with +page table entries: + +@deftypefun uint32_t pte_create_kernel (uint32_t *@var{page}, bool @var{writable}) +Returns a page table entry that points to @var{page}, which should be a +kernel virtual address. The PTE's present bit will be set. It will be +marked for kernel-only access. If @var{writable} is true, the PTE will +also be marked read/write; otherwise, it will be read-only. +@end deftypefun + +@deftypefun uint32_t pte_create_user (uint32_t *@var{page}, bool @var{writable}) +Returns a page table entry that points to @var{page}, which should be +the kernel virtual address of a frame in the user pool (@pxref{Why +PAL_USER?}). The PTE's present bit will be set and it will be marked to +allow user-mode access. If @var{writable} is true, the PTE will also be +marked read/write; otherwise, it will be read-only. +@end deftypefun + +@deftypefun {void *} pte_get_page (uint32_t @var{pte}) +Returns the kernel virtual address for the frame that @var{pte} points +to. The @var{pte} may be present or not-present; if it is not-present +then the pointer return is only meaningful if the proper bits in the PTE +actually represent a physical address. +@end deftypefun + +@node Page Directory Entry Format +@subsubsection Page Directory Entry Format + +Page directory entries have the same format as PTEs, except that the +physical address points to a page table page instead of a frame. Header +@file{threads/pte.h} defines two functions for working with page +directory entries: + +@deftypefun uint32_t pde_create (uint32_t *@var{pt}) +Returns a page directory that points to @var{page}, which should be the +kernel virtual address of a page table page. The PDE's present bit will +be set, it will be marked to allow user-mode access, and it will be +marked read/write. +@end deftypefun + +@deftypefun {uint32_t *} pde_get_pt (uint32_t @var{pde}) +Returns the kernel virtual address for the page table page that +@var{pde} points to. The @var{pde} must be marked present. +@end deftypefun + +@node Hash Table +@section Hash Table Pintos provides a hash table data structure in @file{lib/kernel/hash.c}. To use it you will need to manually include its header file, @@ -1313,6 +1732,10 @@ no code provided with Pintos uses the hash table, which means that you are free to use it as is, modify its implementation for your own purposes, or ignore it, as you wish. +Most implementations of the virtual memory project use a hash table to +translate pages to frames. You may find other uses for hash tables as +well. + @menu * Hash Data Types:: * Basic Hash Functions:: @@ -1324,20 +1747,20 @@ purposes, or ignore it, as you wish. @end menu @node Hash Data Types -@subsubsection Data Types +@subsection Data Types A hash table is represented by @struct{hash}. -@deftp {Type} {@struct{hash}} +@deftp {Type} {struct hash} Represents an entire hash table. The actual members of @struct{hash} are ``opaque.'' That is, code that uses a hash table should not access @struct{hash} members directly, nor should it need to. Instead, use hash table functions and macros. @end deftp -The hash table operates on elements of type @struct{hash_elem}. +The hash table operates on elements of type @struct{hash_elem}. -@deftp {Type} {@struct{hash_elem}} +@deftp {Type} {struct hash_elem} Embed a @struct{hash_elem} member in the structure you want to include in a hash table. Like @struct{hash}, @struct{hash_elem} is opaque. All functions for operating on hash table elements actually take and @@ -1392,7 +1815,7 @@ hash} for 32-bit words. Returns a hash of null-terminated string @var{s}. @end deftypefun -@deftypefun unsigned hash_int (int @var{i}) +@deftypefun unsigned hash_int (int @var{i}) Returns a hash of integer @var{i}. @end deftypefun @@ -1428,7 +1851,7 @@ Performs some kind of action, chosen by the caller, on @var{element}. @end deftp @node Basic Hash Functions -@subsubsection Basic Functions +@subsection Basic Functions These functions create and destroy hash tables and obtain basic information about their contents. @@ -1476,7 +1899,7 @@ false if @var{hash} contains at least one element. @end deftypefun @node Hash Search Functions -@subsubsection Search Functions +@subsection Search Functions Each of these functions searches a hash table for an element that compares equal to one provided. Based on the success of the search, @@ -1530,7 +1953,7 @@ must @func{free} the element after it is no longer needed. @end deftypefun @node Hash Iteration Functions -@subsubsection Iteration Functions +@subsection Iteration Functions These functions allow iterating through the elements in a hash table. Two interfaces are supplied. The first requires writing and supplying a @@ -1558,7 +1981,7 @@ while (hash_next (&i)) @} @end example -@deftp {Type} {@struct{hash_iterator}} +@deftp {Type} {struct hash_iterator} Represents a position within a hash table. Calling any function that may modify a hash table, such as @func{hash_insert} or @func{hash_delete}, invalidates all iterators within that hash table. @@ -1586,7 +2009,7 @@ called for the first time. @end deftypefun @node Hash Table Example -@subsubsection Hash Table Example +@subsection Hash Table Example Suppose you have a structure, called @struct{page}, that you want to put into a hash table. First, define @struct{page} to include a @@ -1608,7 +2031,7 @@ operator works fine for comparing pointers: @example /* @r{Returns a hash value for page @var{p}.} */ unsigned -page_hash (const struct hash_elem *p_, void *aux UNUSED) +page_hash (const struct hash_elem *p_, void *aux UNUSED) @{ const struct page *p = hash_entry (p_, struct page, hash_elem); return hash_bytes (&p->addr, sizeof p->addr); @@ -1617,11 +2040,11 @@ page_hash (const struct hash_elem *p_, void *aux UNUSED) /* @r{Returns true if page @var{a} precedes page @var{b}.} */ bool page_less (const struct hash_elem *a_, const struct hash_elem *b_, - void *aux UNUSED) + void *aux UNUSED) @{ const struct page *a = hash_entry (a_, struct page, hash_elem); const struct page *b = hash_entry (b_, struct page, hash_elem); - + return a->addr < b->addr; @} @end example @@ -1662,7 +2085,7 @@ scope: /* @r{Returns the page containing the given virtual @var{address}, or a null pointer if no such page exists.} */ struct page * -page_lookup (const void *address) +page_lookup (const void *address) @{ struct page p; struct hash_elem *e; @@ -1682,7 +2105,7 @@ A similar function could delete a page by address using @func{hash_delete}. @node Hash Auxiliary Data -@subsubsection Auxiliary Data +@subsection Auxiliary Data In simple cases like the example above, there's no need for the @var{aux} parameters. In these cases, just pass a null pointer to @@ -1700,7 +2123,7 @@ themselves don't indicate what that fixed length is, you could pass the length as an @var{aux} parameter. @node Hash Synchronization -@subsubsection Synchronization +@subsection Synchronization The hash table does not do any internal synchronization. It is the caller's responsibility to synchronize calls to hash table functions. @@ -1715,7 +2138,3 @@ It is also the caller's responsibility to synchronize access to data in hash table elements. How to synchronize access to this data depends on how it is designed and organized, as with any other data structure. -@node File Systems Tour -@section File Systems Project - -No tour for this project is available. diff --git a/doc/standards.texi b/doc/standards.texi index 8e0f0e0..b1296c7 100644 --- a/doc/standards.texi +++ b/doc/standards.texi @@ -1,4 +1,4 @@ -@node Coding Standards, Project Documentation, 4.4BSD Scheduler, Top +@node Coding Standards @appendix Coding Standards All of you should have taken a class like CS 107, so we expect you to be @@ -45,7 +45,7 @@ Please limit C source file lines to at most 79 characters long. Pintos comments sometimes refer to external standards or specifications by writing a name inside square brackets, like this: @code{[IA32-v3a]}. These names refer to the reference names used in -this documentation (@pxref{References}). +this documentation (@pxref{Bibliography}). If you remove existing Pintos code, please delete it from your source file entirely. Don't just put it into a comment or a conditional diff --git a/doc/threads.texi b/doc/threads.texi index a28d7a4..d0d34e4 100644 --- a/doc/threads.texi +++ b/doc/threads.texi @@ -1,4 +1,4 @@ -@node Project 1--Threads, Project 2--User Programs, Pintos Tour, Top +@node Project 1--Threads @chapter Project 1: Threads In this assignment, we give you a minimally functional thread system. @@ -12,9 +12,9 @@ side. Compilation should be done in the @file{threads} directory. Before you read the description of this project, you should read all of the following sections: @ref{Introduction}, @ref{Coding Standards}, @ref{Debugging Tools}, and @ref{Development Tools}. You should at least -skim the material in @ref{Threads Tour} and especially -@ref{Synchronization}. To complete this project you will also need to -read @ref{4.4BSD Scheduler}. +skim the material from @ref{Pintos Loading} through @ref{Memory +Allocation}, especially @ref{Synchronization}. To complete this project +you will also need to read @ref{4.4BSD Scheduler}. @menu * Project 1 Background:: @@ -137,10 +137,10 @@ here. @xref{Kernel Initialization}, for details. @item thread.c @itemx thread.h -Basic thread support. Much of your work will take place in these -files. @file{thread.h} defines @struct{thread}, which you are likely -to modify in all four projects. See @ref{struct thread} and @ref{Thread -Support} for more information. +Basic thread support. Much of your work will take place in these files. +@file{thread.h} defines @struct{thread}, which you are likely to modify +in all four projects. See @ref{struct thread} and @ref{Threads} for +more information. @item switch.S @itemx switch.h @@ -178,10 +178,11 @@ four projects. @xref{Synchronization}, for more information. Functions for I/O port access. This is mostly used by source code in the @file{devices} directory that you won't have to touch. -@item mmu.h -Functions and macros related to memory management, including page -directories and page tables. This will be more important to you in -project 3. For now, you can ignore it. +@item vaddr.h +@itemx pte.h +Functions and macros for working with virtual addresses and page table +entries. These will be more important to you in project 3. For now, +you can ignore them. @item flags.h Macros that define a few bits in the 80@var{x}86 ``flags'' register. diff --git a/doc/userprog.texi b/doc/userprog.texi index 1a6a77c..a9a84b2 100644 --- a/doc/userprog.texi +++ b/doc/userprog.texi @@ -1,4 +1,4 @@ -@node Project 2--User Programs, Project 3--Virtual Memory, Project 1--Threads, Top +@node Project 2--User Programs @chapter Project 2: User Programs Now that you've worked with Pintos and are becoming familiar with its @@ -10,12 +10,12 @@ is possible. In this project, you will enable programs to interact with the OS via system calls. You will be working out of the @file{userprog} directory for this -assignment. However, you will also be interacting with almost every -other part of the code for this assignment. We will describe the +assignment, but you will also be interacting with almost every +other part of Pintos. We will describe the relevant parts below. You can build project 2 on top of your project 1 submission or you can -start with a fresh copy. No code from project 1 is required for this +start fresh. No code from project 1 is required for this assignment. The ``alarm clock'' functionality may be useful in projects 3 and 4, but it is not strictly required. @@ -41,7 +41,7 @@ of the operating system kernel. This means, for example, that all the test code from the last assignment ran as part of the kernel, with full access to privileged parts of the system. Once we start running user programs on top of the operating system, this is no longer true. -This project deals with consequences of the change. +This project deals with the consequences. We allow more than one process to run at a time. Each process has one thread (multithreaded processes are not supported). User programs are @@ -81,9 +81,10 @@ Loads ELF binaries and starts processes. @item pagedir.c @itemx pagedir.h -A simple manager for 80@var{x}86 page directories and page tables. +A simple manager for 80@var{x}86 hardware page tables. Although you probably won't want to modify this code for this project, you may want to call some of its functions. +@xref{Page Tables}, for more information. @item syscall.c @itemx syscall.h @@ -97,9 +98,9 @@ else needed by system calls. @itemx exception.h When a user process performs a privileged or prohibited operation, it traps into the kernel as an ``exception'' or ``fault.''@footnote{We -will treat these terms as synonymous. There is no standard -distinction between them, although Intel processor manuals define -them slightly differently on 80@var{x}86.} These files handle +will treat these terms as synonyms. There is no standard +distinction between them, although Intel processor manuals make +a minor distinction between them on 80@var{x}86.} These files handle exceptions. Currently all exceptions simply print a message and terminate the process. Some, but not all, solutions to project 2 require modifying @func{page_fault} in this file. @@ -108,16 +109,16 @@ require modifying @func{page_fault} in this file. @itemx gdt.h The 80@var{x}86 is a segmented architecture. The Global Descriptor Table (GDT) is a table that describes the segments in use. These -files set up the GDT. @strong{You should not need to modify these -files for any of the projects.} You can read the code if +files set up the GDT. You should not need to modify these +files for any of the projects. You can read the code if you're interested in how the GDT works. @item tss.c @itemx tss.h The Task-State Segment (TSS) is used for 80@var{x}86 architectural task switching. Pintos uses the TSS only for switching stacks when a -user process enters an interrupt handler, as does Linux. @strong{You -should not need to modify these files for any of the projects.} +user process enters an interrupt handler, as does Linux. You +should not need to modify these files for any of the projects. You can read the code if you're interested in how the TSS works. @end table @@ -125,11 +126,13 @@ works. @node Using the File System @subsection Using the File System -You will need to use some file system code for this project. First, -user programs are loaded from the file system. Second, many of the +You will need to interface to the file system code for this project, +because +user programs are loaded from the file system and many of the system calls you must implement deal with the file system. However, -the focus of this project is not on the file system code, so we have -provided a simple file system in the @file{filesys} directory. You +the focus of this project is not the file system, so we have +provided a simple but complete file system in the @file{filesys} +directory. You will want to look over the @file{filesys.h} and @file{file.h} interfaces to understand how to use the file system, and especially its many limitations. @@ -140,13 +143,13 @@ distract you from this project's focus. Proper use of the file system routines now will make life much easier for project 4, when you improve the file -system implementation. Until then, you will have to put up with the +system implementation. Until then, you will have to tolerate the following limitations: @itemize @bullet @item -No synchronization. Concurrent accesses will interfere with one -another. You should use a lock to ensure that only one process at a +No internal synchronization. Concurrent accesses will interfere with one +another. You should use synchronization to ensure that only one process at a time is executing file system code. @item @@ -179,13 +182,13 @@ One important feature is included: Unix-like semantics for @func{filesys_remove} are implemented. That is, if a file is open when it is removed, its blocks are not deallocated and it may still be accessed by any -threads that have it open until the last one closes it. @xref{Removing +threads that have it open, until the last one closes it. @xref{Removing an Open File}, for more information. @end itemize You need to be able to create simulated disks. The @command{pintos-mkdisk} program provides this functionality. From the -@file{userprog/build} directory, execute @code{pintos-mkdisk fs.dsk 2}. +@file{userprog/build} directory, execute @code{pintos-mkdisk fs.dsk@tie{}2}. This command creates a 2 MB simulated disk named @file{fs.dsk}. Then format the disk by passing @option{-f -q} on the kernel's command line: @code{pintos -f -q}. The @option{-f} option causes the disk to be @@ -206,13 +209,13 @@ commands for copying files out of a VM are similar, but substitute Incidentally, these commands work by passing special commands @command{put} and @command{get} on the kernel's command line and copying to and from a special simulated ``scratch'' disk. If you're very -curious, you can look at the @command{pintos} program as well as +curious, you can look at the @command{pintos} script as well as @file{filesys/fsutil.c} to learn the implementation details. Here's a summary of how to create and format a disk, copy the @command{echo} program into the new disk, and then run @command{echo}, passing argument @code{x}. (Argument passing won't work until -you've implemented it.) It assumes +you implemented it.) It assumes that you've already built the examples in @file{examples} and that the current directory is @file{userprog/build}: @@ -250,8 +253,7 @@ You can delete a file from the Pintos file system using the @code{rm @node How User Programs Work @subsection How User Programs Work -Pintos can run normal C programs. In fact, Pintos can run any program -you want, as long as it's compiled into the proper file format and uses +Pintos can run normal C programs, as long as they fit into memory and use only the system calls you implement. Notably, @func{malloc} cannot be implemented because none of the system calls required for this project allow for memory allocation. Pintos also can't run programs that use @@ -261,7 +263,8 @@ processor's floating-point unit when switching threads. The @file{src/examples} directory contains a few sample user programs. The @file{Makefile} in this directory compiles the provided examples, and you can edit it -compile your own programs as well. +compile your own programs as well. Some of the example programs will +only work once projects 3 or 4 have been implemented. Pintos can load @dfn{ELF} executables with the loader provided for you in @file{userprog/process.c}. ELF is a file format used by Linux, @@ -272,7 +275,7 @@ for Pintos. (We've provided compilers and linkers that should do just fine.) You should realize immediately that, until you copy a -test program to the emulated disk, Pintos will be unable to do +test program to the simulated disk, Pintos will be unable to do useful work. You won't be able to do interesting things until you copy a variety of programs to the disk. You might want to create a clean reference disk and copy that @@ -285,7 +288,7 @@ which may happen occasionally while debugging. Virtual memory in Pintos is divided into two regions: user virtual memory and kernel virtual memory. User virtual memory ranges from virtual address 0 up to @code{PHYS_BASE}, which is defined in -@file{threads/mmu.h} and defaults to @t{0xc0000000} (3 GB). Kernel +@file{threads/vaddr.h} and defaults to @t{0xc0000000} (3 GB). Kernel virtual memory occupies the rest of the virtual address space, from @code{PHYS_BASE} up to 4 GB. @@ -294,7 +297,7 @@ When the kernel switches from one process to another, it also switches user virtual address spaces by changing the processor's page directory base register (see @func{pagedir_activate} in @file{userprog/pagedir.c}). @struct{thread} contains a pointer to a -process's page directory. +process's page table. Kernel virtual memory is global. It is always mapped the same way, regardless of what user process or kernel thread is running. In @@ -311,12 +314,13 @@ access kernel virtual memory causes a page fault, handled by will be terminated. Kernel threads can access both kernel virtual memory and, if a user process is running, the user virtual memory of the running process. However, even in the kernel, an attempt to -access memory at a user virtual address that doesn't have a page -mapped into it will cause a page fault. +access memory at an unmapped user virtual address +will cause a page fault. You must handle memory fragmentation gracefully, that is, a process that needs @var{N} pages of user virtual memory must not require those pages -to be contiguous in kernel virtual memory. +to be contiguous in physical memory (or, equivalently, in kernel virtual +memory). @menu * Typical Memory Layout:: @@ -403,7 +407,7 @@ There are at least two reasonable ways to do this correctly. The first method is to verify the validity of a user-provided pointer, then dereference it. If you choose this route, you'll want to look at the functions in -@file{userprog/pagedir.c} and in @file{threads/mmu.h}. This is the +@file{userprog/pagedir.c} and in @file{threads/vaddr.h}. This is the simplest way to handle user memory access. The second method is to check only that a user @@ -416,7 +420,7 @@ used in real kernels (including Linux). In either case, you need to make sure not to ``leak'' resources. For example, suppose that your system call has acquired a lock or -allocated a page of memory. If you encounter an invalid user pointer +allocated memory with @func{malloc}. If you encounter an invalid user pointer afterward, you must still be sure to release the lock or free the page of memory. If you choose to verify user pointers before dereferencing them, this should be straightforward. It's more difficult to handle @@ -585,11 +589,11 @@ need to set up the stack. Implement the system call handler in @file{userprog/syscall.c}. The skeleton implementation we provide ``handles'' system calls by terminating the process. It will need to retrieve the system call -number, then any system call arguments, and carry appropriate actions. +number, then any system call arguments, and carry out appropriate actions. Implement the following system calls. The prototypes listed are those seen by a user program that includes @file{lib/user/syscall.h}. (This -header and all other files in @file{lib/user} are for use by user +header, and all others in @file{lib/user}, are for use by user programs only.) System call numbers for each system call are defined in @file{lib/syscall-nr.h}: @@ -642,20 +646,19 @@ when called from @var{A}, even if @var{B} is dead. Consider all the ways a wait can occur: nested waits (@var{A} waits for @var{B}, then @var{B} waits for @var{C}), multiple waits (@var{A} waits for @var{B}, then @var{A} waits for @var{C}), and so on. + +Implementing this system call requires considerably more work than any +of the rest. @end deftypefn @deftypefn {System Call} bool create (const char *@var{file}, unsigned @var{initial_size}) Creates a new file called @var{file} initially @var{initial_size} bytes in size. Returns true if successful, false otherwise. - -Consider implementing this function in terms of @func{filesys_create}. @end deftypefn @deftypefn {System Call} bool remove (const char *@var{file}) Deletes the file called @var{file}. Returns true if successful, false otherwise. - -Consider implementing this function in terms of @func{filesys_remove}. @end deftypefn @deftypefn {System Call} int open (const char *@var{file}) @@ -664,21 +667,17 @@ called a ``file descriptor'' (fd), or -1 if the file could not be opened. File descriptors numbered 0 and 1 are reserved for the console: fd 0 -is standard input (@code{stdin}), fd 1 is standard output -(@code{stdout}). The @code{open} system call will never return either +(@code{STDIN_FILENO}) is standard input, fd 1 (@code{STDOUT_FILENO}) is +standard output. The @code{open} system call will never return either of these file descriptors, which are valid as system call arguments only as explicitly described below. Each process has an independent set of file descriptors. File descriptors are not inherited by child processes. - -Consider implementing this function in terms of @func{filesys_open}. @end deftypefn @deftypefn {System Call} int filesize (int @var{fd}) Returns the size, in bytes, of the file open as @var{fd}. - -Consider implementing this function in terms of @func{file_length}. @end deftypefn @deftypefn {System Call} int read (int @var{fd}, void *@var{buffer}, unsigned @var{size}) @@ -688,8 +687,6 @@ file), or -1 if the file could not be read (due to a condition other than end of file). Fd 0 reads from the keyboard using @func{kbd_getc}. (Keyboard input will not work if you pass the @option{-v} option to @command{pintos}.) - -Consider implementing this function in terms of @func{file_read}. @end deftypefn @deftypefn {System Call} int write (int @var{fd}, const void *@var{buffer}, unsigned @var{size}) @@ -708,8 +705,6 @@ long as @var{size} is not bigger than a few hundred bytes. (It is reasonable to break up larger buffers.) Otherwise, lines of text output by different processes may end up interleaved on the console, confusing both human readers and our grading scripts. - -Consider implementing this function in terms of @func{file_write}. @end deftypefn @deftypefn {System Call} void seek (int @var{fd}, unsigned @var{position}) @@ -724,23 +719,17 @@ have a fixed length until project 4 is complete, so writes past end of file will return an error.) These semantics are implemented in the file system and do not require any special effort in system call implementation. - -Consider implementing this function in terms of @func{file_seek}. @end deftypefn @deftypefn {System Call} unsigned tell (int @var{fd}) Returns the position of the next byte to be read or written in open file @var{fd}, expressed in bytes from the beginning of the file. - -Consider implementing this function in terms of @func{file_tell}. @end deftypefn @deftypefn {System Call} void close (int @var{fd}) Closes file descriptor @var{fd}. Exiting or terminating a process implicitly closes all its open file descriptors, as if by calling this function for each one. - -Consider implementing this function in terms of @func{file_close}. @end deftypefn The file defines other syscalls. Ignore them for now. You will @@ -757,17 +746,16 @@ pointer, a pointer into kernel memory, or a block partially in one of those regions? You should handle these cases by terminating the user process. We recommend writing and testing this code before implementing any other system -call functionality. +call functionality. @xref{Accessing User Memory}, for more information. You must synchronize system calls so that any number of user processes can make them at once. In particular, it is not safe to call into the file system code provided in the -@file{filesys} directory from multiple threads at once. For now, we -recommend adding a single lock that controls access to the file system -code. You should acquire this lock before calling any functions in -the @file{filesys} directory, and release it afterward. Don't forget -that @func{process_execute} also accesses files. @strong{For now, we -recommend against modifying code in the @file{filesys} directory.} +@file{filesys} directory from multiple threads at once. Your system +call implementation must treat the file system code as a critical +section. Don't forget +that @func{process_execute} also accesses files. For now, we +recommend against modifying code in the @file{filesys} directory. We have provided you a user-level function for each system call in @file{lib/user/syscall.c}. These provide a way for user processes to @@ -801,7 +789,7 @@ hurt even now. You can use @func{file_deny_write} to prevent writes to an open file. Calling @func{file_allow_write} on the file will re-enable them (unless the file is denied writes by another opener). Closing a file will also -re-enable writes. Thus, to deny writes to process's executable, you +re-enable writes. Thus, to deny writes to a process's executable, you must keep it open as long as the process is still running. @node Project 2 FAQ @@ -880,6 +868,7 @@ programs or object files. Invoke it as @code{objdump -d (@pxref{GDB}). @item Why do many C include files not work in Pintos programs? +@itemx Can I use lib@var{foo} in my Pintos programs? The C library we provide is very limited. It does not include many of the features that are expected of a real operating system's C library. @@ -888,9 +877,7 @@ architecture), since it must make system calls for I/O and memory allocation. (Not all functions do, of course, but usually the library is compiled as a unit.) -@item Can I use lib@var{foo} in my Pintos programs? - -The chances are good that lib@var{foo} uses parts of the C library +The chances are good that the library you want uses parts of the C library that Pintos doesn't implement. It will probably take at least some porting effort to make it work under Pintos. Notably, the Pintos user program C library does not have a @func{malloc} implementation. @@ -919,9 +906,10 @@ You can choose whatever suitable types you like for @code{tid_t} and a one-to-one mapping, so that the same values in both identify the same process, or you can use a more complex mapping. It's up to you. -@item Keyboard input doesn't work with @command{pintos} option @option{-v}. +@item Keyboard input doesn't work. -Serial input isn't implemented. Don't use @option{-v} if you +You are probably passing @option{-v} to @command{pintos}, but +serial input isn't implemented. Don't use @option{-v} if you want to use the shell or otherwise need keyboard input. @end table @@ -934,12 +922,12 @@ want to use the shell or otherwise need keyboard input. @subsection Argument Passing FAQ @table @asis -@item Isn't the top of stack off the top of user virtual memory? +@item Isn't the top of stack in kernel virtual memory? The top of stack is at @code{PHYS_BASE}, typically @t{0xc0000000}, which is also where kernel virtual memory starts. -But when the processor pushes data on the stack, it decrements the stack -pointer first. Thus, the first (4-byte) value pushed on the stack +But before the processor pushes data on the stack, it decrements the stack +pointer. Thus, the first (4-byte) value pushed on the stack will be at address @t{0xbffffffc}. @item Is @code{PHYS_BASE} fixed? @@ -991,9 +979,9 @@ better solution. This section summarizes important points of the convention used for normal function calls on 32-bit 80@var{x}86 implementations of Unix. Some details are omitted for brevity. If you do want all the details, -you can refer to @bibref{SysV-i386}. +refer to @bibref{SysV-i386}. -The basic calling convention works like this: +The calling convention works like this: @enumerate 1 @item @@ -1001,6 +989,10 @@ The caller pushes each of the function's arguments on the stack one by one, normally using the @code{PUSH} assembly language instruction. Arguments are pushed in right-to-left order. +The stack grows downward: each push decrements the stack pointer, then +stores into the location it now points to, like the C expression +@samp{*--sp = @var{value}}. + @item The caller pushes the address of its next instruction (the @dfn{return address}) on the stack and jumps to the first instruction of the callee. @@ -1026,7 +1018,7 @@ The caller pops the arguments off the stack. Consider a function @func{f} that takes three @code{int} arguments. This diagram shows a sample stack frame as seen by the callee at the beginning of step 3 above, supposing that @func{f} is invoked as -@code{f(1, 2, 3)}. The stack addresses are arbitrary: +@code{f(1, 2, 3)}. The initial stack address is arbitrary: @html
@@ -1034,11 +1026,8 @@ beginning of step 3 above, supposing that @func{f} is invoked as @example +----------------+ 0xbffffe7c | 3 | - +----------------+ 0xbffffe78 | 2 | - +----------------+ 0xbffffe74 | 1 | - +----------------+ stack pointer --> 0xbffffe70 | return address | +----------------+ @end example @@ -1067,14 +1056,15 @@ _start (int argc, char *argv[]) @} @end example -The kernel is responsible for setting up the arguments for the initial -function on the stack, in accordance with the calling convention -explained in the preceding section, before it allows the user program to -begin executing. +The kernel must put the arguments for the initial function on the stack +before it allows the user program to begin executing. The arguments are +passed in the same way as the normal calling convention (@pxref{80x86 +Calling Convention}). -Consider the following example command: @samp{/bin/ls -l foo bar}. -First, the kernel must break the command into words, as @samp{/bin/ls}, -@samp{-l}, @samp{foo}, and @samp{bar}, and place them at the top of the +Consider how to handle arguments for the following example command: +@samp{/bin/ls -l foo bar}. +First, break the command into words: @samp{/bin/ls}, +@samp{-l}, @samp{foo}, @samp{bar}. Place the words at the top of the stack. Order doesn't matter, because they will be referenced through pointers. @@ -1122,7 +1112,7 @@ In this example, the stack pointer would be initialized to As shown above, your code should start the stack at the very top of the user virtual address space, in the page just below virtual address -@code{PHYS_BASE} (defined in @file{threads/mmu.h}). +@code{PHYS_BASE} (defined in @file{threads/vaddr.h}). You may find the non-standard @func{hex_dump} function, declared in @file{}, useful for debugging your argument passing code. @@ -1155,7 +1145,7 @@ is handled in the same way as other software exceptions. In Pintos, user programs invoke @samp{int $0x30} to make a system call. The system call number and any additional arguments are expected to be pushed on the stack in the normal fashion before invoking the -interrupt. +interrupt (@pxref{80x86 Calling Convention}). Thus, when the system call handler @func{syscall_handler} gets control, the system call number is in the 32-bit word at the caller's stack diff --git a/doc/vm.texi b/doc/vm.texi index 2fe6653..de90105 100644 --- a/doc/vm.texi +++ b/doc/vm.texi @@ -1,22 +1,25 @@ -@node Project 3--Virtual Memory, Project 4--File Systems, Project 2--User Programs, Top +@node Project 3--Virtual Memory @chapter Project 3: Virtual Memory -By now you should be familiar with the inner workings of Pintos. Your +By now you should have some familiarity with the inner workings of +Pintos. Your OS can properly handle multiple threads of execution with proper synchronization, and can load multiple user programs at once. However, the number and size of programs that can run is limited by the machine's main memory size. In this assignment, you will remove that limitation. -You will build this assignment on top of the last one. It will benefit -you to get your project 2 in good working order before this assignment -so those bugs don't keep haunting you. Test programs from the previous -project should also work with this project. +You will build this assignment on top of the last one. Test programs +from project 2 should also work with project 3. You should take care to +fix any bugs in your project 2 submission before you start work on +project 3, because those bugs will most likely cause the same problems +in project 3. You will continue to handle Pintos disks and file systems the same way you did in the previous assignment (@pxref{Using the File System}). @menu * Project 3 Background:: +* Project 3 Suggested Order of Implementation:: * Project 3 Requirements:: * Project 3 FAQ:: @end menu @@ -26,9 +29,12 @@ you did in the previous assignment (@pxref{Using the File System}). @menu * Project 3 Source Files:: -* Page Faults:: -* Disk as Backing Store:: -* Memory Mapped Files Background:: +* Memory Terminology:: +* Resource Management Overview:: +* Managing the Segment Table:: +* Managing the Frame Table:: +* Managing the Swap Table:: +* Managing Memory Mapped Files Back:: @end menu @node Project 3 Source Files @@ -49,230 +55,424 @@ Provides access to the physical disk, abstracting away the rather awful IDE interface. You will use this interface to access the swap disk. @end table +@node Memory Terminology +@subsection Memory Terminology + +Careful definitions are needed to keep discussion of virtual memory from +being confusing. Thus, we begin by presenting some terminology for +memory and storage. Some of these terms should be familiar from project +2 (@pxref{Virtual Memory Layout}), but much of it is new. + @menu -* Page Faults:: -* Disk as Backing Store:: -* Memory Mapped Files:: +* Pages:: +* Frames:: +* Page Tables:: +* Swap Slots:: @end menu -@node Page Faults -@subsection Page Faults +@node Pages +@subsubsection Pages + +A @dfn{page}, sometimes called a @dfn{virtual page}, is a contiguous +region of virtual memory 4,096 bytes (the @dfn{page size}) in length. A +page must be @dfn{page-aligned}, that is, start on a virtual address +evenly divisible by the page size. Thus, a 32-bit virtual address can +be divided into a 20-bit @dfn{page number} and a 12-bit @dfn{page +offset} (or just @dfn{offset}), like this: -For the last assignment, whenever a context switch occurred, the new -process installed its own page table into the machine. The new page -table contained all the virtual-to-physical translations for the -process. Whenever the processor needed to look up a translation, it -consulted the page table. As long as the process only accessed -memory that it owned, all was well. If the process accessed -memory it didn't own, it ``page faulted'' and @func{page_fault} -terminated the process. +@example +@group + 31 12 11 0 + +-------------------+-----------+ + | Page Number | Offset | + +-------------------+-----------+ + Virtual Address +@end group +@end example -When we implement virtual memory, the rules have to change. A page -fault is no longer necessarily an error, since it might only indicate -that the page must be brought in from a disk file or from swap. You -will have to implement a more sophisticated page fault handler to -handle these cases. +Each process has an independent set of @dfn{user (virtual) pages}, which +are those pages below virtual address @code{PHYS_BASE}, typically +@t{0xc0000000} (3 GB). The set of @dfn{kernel (virtual) pages}, on the +other hand, is global, remaining the same regardless of what thread or +process is active. The kernel may access both user and kernel pages, +but a user process may access only its own user pages. @xref{Virtual +Memory Layout}, for more information. -@menu -* Page Table Structure:: -* Working with Virtual Addresses:: -* Accessed and Dirty Bits:: -@end menu +Pintos provides several useful functions for working with virtual +addresses. @xref{Virtual Addresses}, for details. + +@node Frames +@subsubsection Frames + +A @dfn{frame}, sometimes called a @dfn{physical frame} or a @dfn{page +frame}, is a contiguous region of physical memory. Like pages, frames +must be page-size and page-aligned. Thus, a 32-bit physical address can +be divided into a 20-bit @dfn{frame number} and a 12-bit @dfn{frame +offset} (or just @dfn{offset}), like this: + +@example +@group + 31 12 11 0 + +-------------------+-----------+ + | Frame Number | Offset | + +-------------------+-----------+ + Physical Address +@end group +@end example + +The 80@var{x}86 doesn't provide any way to directly access memory at a +physical address. Pintos works around this by mapping kernel virtual +memory directly to physical memory: the first page of kernel virtual +memory is mapped to the first frame of physical memory, the second page +to the second frame, and so on. Thus, frames can be accessed through +kernel virtual memory. + +Pintos provides functions for translating between physical addresses and +kernel virtual addresses. @xref{Virtual Addresses}, for details. + +@node Page Tables +@subsubsection Page Tables -@node Page Table Structure -@subsubsection Page Table Structure - -On the 80@var{x}86, the page table format is fixed by hardware. We -have provided code for managing page tables for you to use in -@file{userprog/pagedir.c}. The functions in there provide an -abstract interface to all the page table functionality that you should need -to complete the project. However, you may still find it worthwhile to -understand a little about the hardware page table format, so we'll go -into a little of detail about that in this section. - -The top-level paging data structure is a page called the ``page -directory'' (PD) arranged as an array of 1,024 32-bit page directory -entries (PDEs), each of which represents 4 MB of virtual memory. Each -PDE may point to the physical address of another page called a -``page table'' (PT) arranged, similarly, as an array of 1,024 -32-bit page table entries (PTEs), each of which translates a single 4 -kB virtual page to a physical page. - -Translation of a virtual address into a physical address follows -the three-step process illustrated in the diagram -below:@footnote{Actually, virtual to physical translation on the -80@var{x}86 architecture occurs via an intermediate ``linear -address,'' but Pintos (and most other 80@var{x}86 OSes) set up the CPU -so that linear and virtual addresses are one and the same. Thus, you -can effectively ignore this CPU feature.} +In Pintos, a @dfn{page table} is a data structure that the CPU uses to +translate a virtual address to a physical address, that is, from a page +to a frame. The page table format is dictated by the 80@var{x}86 +architecture. Pintos provides page table management code in +@file{pagedir.c} (@pxref{Page Table}). + +The diagram below illustrates the relationship between pages and frames. +The virtual address, on the left, consists of a page number and an +offset. The page table translates the page number into a frame number, +which is combined with the unmodified offset to obtain the physical +address, on the right. + +@example +@group + +----------+ + .--------------->|Page Table|-----------. + / +----------+ | + 0 | 12 11 0 0 V 12 11 0 + +---------+----+ +---------+----+ + |Page Nr | Ofs| |Frame Nr | Ofs| + +---------+----+ +---------+----+ + Virt Addr | Phys Addr ^ + \_______________________________________/ +@end group +@end example + +@node Swap Slots +@subsubsection Swap Slots + +A @dfn{swap slot} is a contiguous, page-size region of disk space on the +swap disk. Although hardware limitations dictating the placement of +slots are looser than for pages and frames, swap slots should be +page-aligned because there is no downside in doing so. + +@node Resource Management Overview +@subsection Resource Management Overview + +You will need to design the following data structures: + +@table @asis +@item Segment table + +Enables page fault handling by supplementing the page table. +@xref{Managing the Segment Table}. + +@item Frame table + +Allows efficient implementation of eviction policy. +@xref{Managing the Frame Table}. + +@item Swap table + +Tracks usage of swap slots. +@xref{Managing the Swap Table}. + +@item Table of file mappings + +Processes may map files into their virtual memory space. You need a +table to track which files are mapped into which pages. +@end table + +You do not necessarily need to implement four completely distinct data +structures: it may be convenient to wholly or partially merge related +resources into a unified data structure. + +For each data structure, you need to determine what information each +element should contain. You also need to decide on the data structure's +scope, either local (per-process) or global (applying to the whole +system), and how many instances are required within its scope. + +To simplify your design, you may store these data structures in +non-pageable memory. That means that you can be sure that pointers +among them will remain valid. + +Possible choices of data structures include arrays, lists, bitmaps, and +hash tables. An array is often the simplest approach, but a sparsely +populated array wastes memory. Lists are also simple, but traversing a +long list to find a particular position wastes time. Both arrays and +lists can be resized, but lists more efficiently support insertion and +deletion in the middle. + +Pintos includes a bitmap data structure in @file{lib/kernel/bitmap.c} +and @file{lib/kernel/bitmap.h}. A bitmap is an array of bits, each of +which can be true or false. Bitmaps are typically used to track usage +in a set of (identical) resources: if resource @var{n} is in use, then +bit @var{n} of the bitmap is true. Pintos bitmaps are fixed in size, +although you could extend their implementation to support resizing. + +Pintos also includes a hash table data structure (@pxref{Hash Table}). +Pintos hash tables efficiently support insertions and deletions over a +wide range of table sizes. + +Although more complex data structures may yield performance or other +benefits, they may also needlessly complicate your implementation. +Thus, we do not recommend implementing any advanced data structure +(e.g.@: a balanced binary tree) as part of your design. + +@node Managing the Segment Table +@subsection Managing the Segment Table + +The @dfn{segment table} supplements the page table with additional data +about each page. It is required because of the limitations imposed by +the page table's format. Such a supplementary data structure is often +called a ``page table'' also; we call it a segment table to avoid +confusion. + +The segment table is used for at least two purposes. Most importantly, +on a page fault, the kernel looks up the virtual page that faulted in +the segment table to find out what data should be there. Second, the +kernel consults the segment table when a process terminates, to decide +what resources to free. + +You may organize the segment table as you wish. There are at least two +basic approaches to its organization: in terms of segments or in terms +of pages. Optionally, you may use the page table as part of your +segment table design. You will have to modify the Pintos page table +implementation in @file{pagedir.c} to do so. We recommend this approach +for advanced students only. @xref{Page Table Entry Format}, for more +information. + +The most important user of the segment table is the page fault handler. +In project 2, a page fault always indicated a bug in the kernel or a +user program. In project 3, this is no longer true. Now, a page fault +might only indicate that the page must be brought in from a file or +swap. You will have to implement a more sophisticated page fault +handler to handle these cases. Your page fault handler, which you +should implement by modifying @func{page_fault} in +@file{threads/exception.c}, needs to do roughly the following: @enumerate 1 @item -The most-significant 10 bits of the virtual address (bits 22@dots{}32) -index the page directory. If the PDE is marked ``present,'' the -physical address of a page table is read from the PDE thus obtained. -If the PDE is marked ``not present'' then a page fault occurs. +Locate the page that faulted in the segment table. If the memory +reference is valid, use the segment table entry to locate the data that +goes in the page, which might be in the file system, or in a swap slot, +or it might simply be an all-zero page. If you implement sharing, the +page's data might even already be in a page frame, but not in the page +table. + +If the page is unmapped, that is, if there's no data there, or if the +page lies within kernel virtual memory, or if the access is an attempt +to write to a read-only page, then the access is invalid. Any invalid +access terminates the process and thereby frees all of its resources. + +@item +Obtain a frame to store the page. @xref{Managing the Frame Table}, for +details. + +If you implement sharing, the data you need may already be in a frame, +in which case you must be able to locate that frame. @item -The next 10 bits of the virtual address (bits 12@dots{}21) index -the page table. If the PTE is marked ``present,'' the physical -address of a data page is read from the PTE thus obtained. If the PTE -is marked ``not present'' then a page fault occurs. +Fetch the data into the frame, by reading it from the file system or +swap, zeroing it, etc. + +If you implement sharing, the page you need may already be in a frame, +in which case no action is necessary in this step. @item -The least-significant 12 bits of the virtual address (bits 0@dots{}11) -are added to the data page's physical base address, yielding the final -physical address. +Point the page table entry for the faulting virtual address to the +physical page. You can use the functions in @file{userprog/pagedir.c}. @end enumerate -@example -@group - 31 22 21 12 11 0 -+----------------------+----------------------+----------------------+ -| Page Directory Index | Page Table Index | Page Offset | -+----------------------+----------------------+----------------------+ - | | | - _______/ _______/ _____/ - / / / - / Page Directory / Page Table / Data Page - / .____________. / .____________. / .____________. - |1,023|____________| |1,023|____________| | |____________| - |1,022|____________| |1,022|____________| | |____________| - |1,021|____________| |1,021|____________| \__\|____________| - |1,020|____________| |1,020|____________| /|____________| - | | | | | | | | - | | | \____\| |_ | | - | | . | /| . | \ | . | - \____\| . |_ | . | | | . | - /| . | \ | . | | | . | - | . | | | . | | | . | - | | | | | | | | - |____________| | |____________| | |____________| - 4|____________| | 4|____________| | |____________| - 3|____________| | 3|____________| | |____________| - 2|____________| | 2|____________| | |____________| - 1|____________| | 1|____________| | |____________| - 0|____________| \__\0|____________| \____\|____________| - / / -@end group -@end example +@node Managing the Frame Table +@subsection Managing the Frame Table + +The @dfn{frame table} contains one entry for each frame that contains a +user page. Each entry in the frame table contains a pointer to the +page, if any, that currently occupies it, and other data of your choice. +The frame table allows Pintos to efficiently implement an eviction +policy, by choosing a page to evict when no frames are free. -@node Working with Virtual Addresses -@subsubsection Working with Virtual Addresses +The frames used for user pages should be obtained from the ``user +pool,'' by calling @code{palloc_get_page(PAL_USER)}. You must use +@code{PAL_USER} to avoid allocating from the ``kernel pool,'' which +could cause some test cases to fail unexpectedly (@pxref{Why +PAL_USER?}). If you modify @file{palloc.c} as part of your frame table +implementation, be sure to retain the distinction between the two pools. -Header @file{threads/mmu.h} has useful functions for various -operations on virtual addresses. You should look over the header -yourself. The most important functions are described below. +The most important operation on the frame table is obtaining an unused +frame. This is easy when a frame is free. When none is free, a frame +must be made free by evicting some page from its frame. If no frame can +be evicted without allocating a swap slot, but swap is full, some +process must be killed to free memory (the choice of process to kill is +up to you). -@deftypefun uintptr_t pd_no (const void *@var{va}) -Returns the page directory index for virtual address @var{va}. -@end deftypefun +The process of eviction comprises roughly the following steps: -@deftypefun uintptr_t pt_no (const void *@var{va}) -Returns the page table index for virtual address @var{va}. -@end deftypefun +@enumerate 1 +@item +Choose a frame to evict, using your page replacement algorithm. The +``accessed'' and ``dirty'' bits in the page table, described below, will +come in handy. -@deftypefun unsigned pg_ofs (const void *@var{va}) -Returns the page offset of virtual address @var{va}. -@end deftypefun +@item +Remove references to the frame from any page table that refers to it. + +Unless you have implemented sharing, only a single page should refer to +a frame at any given time. + +@item +If necessary, write the page to the file system or to swap. +@end enumerate -@deftypefun {void *} pg_round_down (const void *@var{va}) -Returns @var{va} rounded down to the nearest page boundary, that is, -@var{va} with its page offset set to 0. -@end deftypefun +The evicted frame may then be used to store a different page. -@deftypefun {void *} pg_round_up (const void *@var{va}) -Returns @var{va} rounded up to the nearest page boundary. -@end deftypefun +@menu +* Accessed and Dirty Bits:: +@end menu @node Accessed and Dirty Bits @subsubsection Accessed and Dirty Bits -Most of the page table is under the control of the operating system, but -two bits in each page table entry are also manipulated by the CPU. On -any read or write to the page referenced by a PTE, the CPU sets the -PTE's @dfn{accessed bit} to 1; on any write, the CPU sets the @dfn{dirty -bit} to 1. The CPU never resets these bits to 0, but the OS may do so. - -You will need to use the accessed and dirty bits in your submission to -choose which pages to evict from memory and to decide whether evicted -pages need to be written to disk. The page table code in -@file{userprog/pagedir.c} provides functions for checking and setting -these bits. These functions are described at the end of this section. - -You need to watch out for @dfn{aliases}, that is, two (or more) -different virtual pages that refer to the same physical page frame. -When an aliased page is accessed, the accessed and dirty bits are -updated in only one page table entry (the one for the virtual address -used to access the page). The accessed and dirty bits for the other -aliased virtual addresses are not updated. +80@var{x}86 hardware provides some assistance for implementing page +replacement algorithms, through a pair of bits in the page table entry +(PTE) for each page. On any read or write to a page, the CPU sets the +@dfn{accessed bit} to 1 in the page's PTE, and on any write, the CPU +sets the @dfn{dirty bit} to 1. The CPU never resets these bits to 0, +but the OS may do so. + +You need to be aware of @dfn{aliases}, that is, two (or more) pages that +refer to the same frame. When an aliased frame is accessed, the +accessed and dirty bits are updated in only one page table entry (the +one for the page used for access). The accessed and dirty bits for the +other aliases are not updated. In Pintos, every user virtual page is aliased to its kernel virtual -address. You must manage these aliases somehow. For example, your code +page. You must manage these aliases somehow. For example, your code could check and update the accessed and dirty bits for both addresses. Alternatively, the kernel could avoid the problem by only accessing user data through the user virtual address. -Other aliases should only arise if you implement sharing, as extra -credit (@pxref{VM Extra Credit}), or as bugs elsewhere in your code. - -@deftypefun bool pagedir_is_dirty (uint32_t *@var{pd}, const void *@var{vpage}) -@deftypefunx bool pagedir_is_accessed (uint32_t *@var{pd}, const void *@var{vpage}) -Returns true if page directory @var{pd} contains a page table entry for -virtual page @var{vpage} that is marked dirty (or accessed). Otherwise, -returns false. -@end deftypefun - -@deftypefun void pagedir_set_dirty (uint32_t *@var{pd}, const void *@var{vpage}, bool @var{value}) -@deftypefunx void pagedir_set_accessed (uint32_t *@var{pd}, const void *@var{vpage}, bool @var{value}) -If page directory @var{pd} has a page table entry for @var{vpage}, then -its dirty (or accessed) bit is set to @var{value}. -@end deftypefun - -@node Disk as Backing Store -@subsection Disk as Backing Store - -VM systems effectively use memory as a cache for disk. From another -perspective, disk is a ``backing store'' for memory. This provides the -abstraction of an (almost) unlimited virtual memory size. You must -implement such a system, with the additional constraint that performance -should be close to that provided by physical memory. You can use dirty -bits to tell whether pages need to be written back to disk when they're -evicted from main memory, and the accessed bits for page replacement -algorithms (@pxref{Accessed and Dirty Bits}). - -As with any caching system, performance depends on the policy used to -decide what to keep in the cache and what to evict. On a page fault, -the kernel must decide which page to replace. Ideally, it will throw -out a page that will not be referenced for a long time, keeping in -memory those pages that are soon to be referenced. Another -consideration is that if the replaced page has been modified, the page -must be first written to disk before the needed page can be brought in. -Many virtual memory systems avoid this extra overhead by writing -modified pages to disk in advance, so that later page faults can be -completed more quickly (but you do not have to implement this -optimization). - -@node Memory Mapped Files Background -@subsection Memory Mapped Files +Other aliases should only arise if you implement sharing for extra +credit (@pxref{VM Extra Credit}), or if there is a bug in your code. + +@xref{Page Table Accessed and Dirty Bits}, for details of the functions +to work with accessed and dirty bits. + +@node Managing the Swap Table +@subsection Managing the Swap Table + +The swap table tracks in-use and free swap slots. It should allow +picking an unused swap slot for evicting a page from its frame to the +swap disk. It should allow freeing a swap slot when its page is read +back or the process whose page was swapped is terminated. + +You may use the disk on interface @code{hd1:1} as the swap disk, using +the disk interface prototyped in @code{devices/disk.h}. From the +@file{vm/build} directory, use the command @code{pintos-mkdisk swap.dsk +@var{n}} to create an @var{n} MB swap disk named @file{swap.dsk}. +Afterward, @file{swap.dsk} will automatically be attached as +@code{hd1:1} when you run @command{pintos}. Alternatively, you can tell +@command{pintos} to use a temporary @var{n}-MB swap disk for a single +run with @option{--swap-disk=@var{n}}. + +Swap slots should be allocated lazily, that is, only when they are +actually required by eviction. Reading data pages from the executable +and writing them to swap immediately at process startup is not lazy. +Swap slots should not be reserved to store particular pages. + +Free a swap slot when its contents are read back into a frame. + +@node Managing Memory Mapped Files Back +@subsection Managing Memory Mapped Files The file system is most commonly accessed with @code{read} and -@code{write} system calls. A secondary interface is to ``map'' -the file into the virtual address space. The program can then use load -and store instructions directly on the file data. An alternative view -is to see the file system is as ``durable memory'': files just store -data structures, so if you access ordinary data structures using normal -program instructions, why not access durable data structures the same -way? +@code{write} system calls. A secondary interface is to ``map'' the file +into virtual pages, using the @code{mmap} system call. The program can +then use memory instructions directly on the file data. Suppose file @file{foo} is @t{0x1000} bytes (4 kB, or one page) long. If @file{foo} is mapped into memory starting at address @t{0x5000}, then any memory accesses to locations @t{0x5000}@dots{}@t{0x5fff} will access the corresponding bytes of @file{foo}. -A consequence of memory mapped files is that address spaces are -sparsely populated with lots of segments, one for each memory mapped -file (plus one each for code, data, and stack). +Here's a program that uses @code{mmap} to print a file to the console. +It opens the file specified on the command line, maps it at virtual +address @t{0x10000000}, writes the mapped data to the console (fd 1), +and unmaps the file. + +@example +#include +#include +int main (int argc UNUSED, char *argv[]) +@{ + void *data = (void *) 0x10000000; /* @r{Address at which to map.} */ + + int fd = open (argv[1]); /* @r{Open file.} */ + mapid_t map = mmap (fd, data); /* @r{Map file.} */ + write (1, data, filesize (fd)); /* @r{Write file to console.} */ + munmap (map); /* @r{Unmap file (optional).} */ + return 0; +@} +@end example + +A similar program with full error handling is included as @file{mcat.c} +in the @file{examples} directory, which also contains @file{mcp.c} as a +second example of @code{mmap}. + +Your submission must be able to track what memory is used by memory +mapped files. This is necessary to properly handle page faults in the +mapped regions and to ensure that mapped files do not overlap any other +segments within the process. + +@node Project 3 Suggested Order of Implementation +@section Suggested Order of Implementation + +We suggest the following initial order of implementation: + +@enumerate 1 +@item +Frame table (@pxref{Managing the Frame Table}). Change @file{process.c} +to use your frame table allocator. + +Do not implement swapping yet. If you run out of frames, fail the +allocator or panic the kernel. + +After this step, your kernel should still pass all the project 2 test +cases. + +@item +Segment table and page fault handler (@pxref{Managing the Segment +Table}). Change @file{process.c} to record the necessary information in +the segment table when loading an executable and setting up its stack. +Implement loading of code and data segments in the page fault handler. +For now, consider only valid accesses. + +After this step, your kernel should pass all of the project 2 +functionality test cases, but only some of the robustness tests. +@end enumerate + +From here, you can implement stack growth, mapped files, and page +reclamation on process exit in parallel. + +The next step is to implement eviction (@pxref{Managing the Frame +Table}). Initially you could choose the page to evict randomly. At +this point, you need to consider how to manage accessed and dirty bits +and aliasing of user and kernel pages. Synchronization is also a +concern: how do you deal with it if process A faults on a page whose +frame process B is in the process of evicting? Finally, implement a +eviction strategy such as the clock algorithm. @node Project 3 Requirements @section Requirements @@ -286,9 +486,7 @@ disk, how to implement paging, etc. @menu * Project 3 Design Document:: -* Page Table Management:: -* Paging To and From Disk:: -* Lazy Loading:: +* Paging:: * Stack Growth:: * Memory Mapped Files:: @end menu @@ -303,148 +501,32 @@ read the design document template before you start working on the project. @xref{Project Documentation}, for a sample design document that goes along with a fictitious project. -@node Page Table Management -@subsection Page Table Management - -Implement page directory and page table management to support virtual -memory. You will need data structures to accomplish the following -tasks: - -@itemize @bullet -@item -Some way of translating in software from virtual page frames to -physical page frames. Pintos provides a hash table that you may find -useful for this purpose (@pxref{Hash Table}). - -You don't strictly need a new data structure for this. You could -instead modify the code in @file{userprog/pagedir.c}. If you do that -you'll need to thoroughly understand how 80@var{x}86 page tables work -by, e.g.,@: studying section 3.7, ``Page Translation Using 32-Bit -Physical Addressing,'' in @bibref{IA32-v3a}. In practice, most groups -use a separate data structure. - -@item -Some way of finding a page on disk (in a file or in swap) if it is not -in memory. - -You can generalize the virtual-to-physical page table, so that it allows -you to locate a page wherever it is in physical memory or on disk, or -you can make this a separate table. - -@item -Some way of translating from physical page frames back to virtual page -frames, so that when you evict a physical page from its frame, you can -invalidate its page table entry (or entries). -@end itemize - -The page fault handler, @func{page_fault} in -@file{threads/exception.c}, needs to do roughly the following: +@node Paging +@subsection Paging -@enumerate 1 -@item -Locate the page backing the virtual -address that faulted. It might be in the file system, in swap, -or it might be an invalid virtual address. -If you implement sharing, it might even -already be in physical memory, but not in the page table. - -If the virtual address is invalid, that is, if there's nothing -assigned to go there, or if the virtual address is above -@code{PHYS_BASE}, meaning that it belongs to the kernel instead of the -user, then the process's memory access must be disallowed. -In this case, terminate the process and free all of its resources. - -@item -If the page is not in physical memory, fetch it by appropriate means. -If necessary to make room, first evict some other page from memory. -(When you do that you need to first remove references to the page from -any page table that refers to it.) - -@item -Point the page table entry for the faulting virtual address to the -physical page. You can use the functions in @file{userprog/pagedir.c}. -@end enumerate +Implement paging for segments loaded from executables. All of these +pages should be loaded lazily, that is, only as the kernel intercepts +page faults for them. Upon eviction, pages modified since load (e.g.@: +as indicated by the ``dirty bit'') should be written to swap. +Unmodified pages, including read-only pages, should never be written to +swap because they can always be read back from the executable. -You'll need to modify the ELF loader in @file{userprog/process.c} to -do page table management according to your new design. As supplied, -it reads all the process's pages from disk and initializes the page -tables for them at the same time. As a first step, you might -want to leave the code that reads the pages from disk, but -use your new page table management code to construct the page tables -only as page faults occur for them. - -You should use the @func{palloc_get_page} function to get the page -frames that you use for storing user virtual pages. Be sure to pass -the @code{PAL_USER} flag to this function when you do so, because that -allocates pages from a ``user pool'' separate from the ``kernel pool'' -that other calls to @func{palloc_get_page} make (@pxref{Why PAL_USER?}). - -You might find the Pintos bitmap code, in @file{lib/kernel/bitmap.c} and -@file{lib/kernel/bitmap.h}, useful for tracking pages. A bitmap is an -array of bits, each of which can be true or false. Bitmaps are -typically used to track usage in a set of (identical) resources: if -resource @var{n} is in use, then bit @var{n} of the bitmap is true. - -There are many possible ways to implement virtual memory. The above -is simply an outline of our suggested implementation. - -@node Paging To and From Disk -@subsection Paging To and From Disk - -Implement paging to and from files and the swap disk. You may use the -disk on interface @code{hd1:1} as the swap disk, using the disk -interface prototyped in @code{devices/disk.h}. From the @file{vm/build} -directory, use the command @code{pintos-mkdisk swap.dsk @var{n}} to -create an @var{n} MB swap disk named @file{swap.dsk}. Afterward, -@file{swap.dsk} will automatically be attached as @code{hd1:1} when you run -@command{pintos}. Alternatively, you can tell @command{pintos} to -use a temporary @var{n}-MB swap disk for a single run with -@option{--swap-disk=@var{n}}. - -You will need routines to move a page from memory to disk and from -disk to memory, where ``disk'' is either a file or the swap disk. If -you do a good job, your VM should still work when you -implement your own file system for the next assignment. - -To fully handle page faults, you will need a way to track pages that -are used by a process but which are not in physical memory. Pages in -swap should not be constrained to any particular ordering. You will -also need a way to track physical page frames, to find an unused one -when needed, or to evict a page when memory is needed but no empty pages -are available. The page table data structure that you designed should -do most of the work for you. - -Implement a global page replacement algorithm. You should be able to -use the ``accessed'' and ``dirty'' bits (@pxref{Accessed and Dirty -Bits}) to implement an approximation to LRU. Your algorithm should -perform at least as well as the ``second chance'' or ``clock'' -algorithm. +Implement a global page replacement algorithm that approximates LRU. +Your algorithm should perform at least as well as the ``second chance'' +or ``clock'' algorithm. Your design should allow for parallelism. If one page fault requires I/O, in the meantime processes that do not fault should continue executing and other page faults that do not require I/O should be able -to complete. These criteria require some synchronization effort. - -@node Lazy Loading -@subsection Lazy Loading - -Since you will already be paging from disk, you should implement a -``lazy'' loading scheme for new processes. When a process is created, -it will not need all of its resources immediately, so it doesn't make -sense to load all its code, data, and stack into memory when the process -is created. Instead, bring pages in from -the executable only as needed. Use the -executable file itself as backing store for read-only segments, since -these segments won't change. This means that read-only pages should not -be written to swap. - -The core of the program loader is the loop in @func{load_segment} in -@file{userprog/process.c}. -Each time around the loop, @code{page_read_bytes} receives the number of -bytes to read from the executable file and @code{page_zero_bytes} receives -the number of bytes to initialize to zero following the bytes read. The -two always sum to @code{PGSIZE} (4,096). The handling of a page depends -on these variables' values: +to complete. This will require some synchronization effort. + +You'll need to modify the core of the program loader, which is the loop +in @func{load_segment} in @file{userprog/process.c}. Each time around +the loop, @code{page_read_bytes} receives the number of bytes to read +from the executable file and @code{page_zero_bytes} receives the number +of bytes to initialize to zero following the bytes read. The two always +sum to @code{PGSIZE} (4,096). The handling of a page depends on these +variables' values: @itemize @bullet @item @@ -458,21 +540,11 @@ such pages by creating a new page consisting of all zeroes at the first page fault. @item -If neither @code{page_read_bytes} nor @code{page_zero_bytes} equals -@code{PGSIZE}, then part of the page is to be read from disk and the -remainder zeroed. This is a special case. You are allowed to handle -it by reading the partial page from disk at executable load time and -zeroing the rest of the page. This is the only case in which we will -allow you to load a page in a non-``lazy'' fashion. Many real OSes -such as Linux do not load partial pages lazily. +Otherwise, neither @code{page_read_bytes} nor @code{page_zero_bytes} +equals @code{PGSIZE}. In this case, an initial part of the page is to +be read from disk and the remainder zeroed. @end itemize -Incidentally, if you have trouble handling the third case above, you -can eliminate it temporarily by linking the test programs with a -special ``linker script.'' Read @file{Makefile.userprog} for -details. We will not test your submission with this special linker -script, so the code you turn in must properly handle all cases. - @node Stack Growth @subsection Stack Growth @@ -524,6 +596,9 @@ for it to be faulted in. (Even if you did wait, the very first instruction in the user program is likely to be one that faults in the page.) +All stack pages should be candidates for eviction. An evicted stack +page should be written to swap. + @node Memory Mapped Files @subsection Memory Mapped Files @@ -543,8 +618,6 @@ If the file's length is not a multiple of @code{PGSIZE}, then some bytes in the final mapped page ``stick out'' beyond the end of the file. Set these bytes to zero when the page is faulted in from disk, and discard them when the page is written back to disk. -A partial page need not be lazily loaded, as in the case of a partial -page in an executable (@pxref{Lazy Loading}). If successful, this function returns a ``mapping ID'' that uniquely identifies the mapping within the process. On failure, @@ -604,23 +677,23 @@ files modified by the reference solution, and some may modify files not modified by the reference solution. @verbatim - Makefile.build | 4 + Makefile.build | 4 devices/timer.c | 42 ++ - threads/init.c | 5 - threads/interrupt.c | 2 + threads/init.c | 5 + threads/interrupt.c | 2 threads/thread.c | 31 + threads/thread.h | 37 +- - userprog/exception.c | 12 - userprog/pagedir.c | 10 + userprog/exception.c | 12 + userprog/pagedir.c | 10 userprog/process.c | 319 +++++++++++++----- userprog/syscall.c | 545 ++++++++++++++++++++++++++++++- - userprog/syscall.h | 1 + userprog/syscall.h | 1 vm/frame.c | 162 +++++++++ vm/frame.h | 23 + vm/page.c | 297 ++++++++++++++++ vm/page.h | 50 ++ vm/swap.c | 85 ++++ - vm/swap.h | 11 + vm/swap.h | 11 17 files changed, 1532 insertions(+), 104 deletions(-) @end verbatim @@ -638,17 +711,6 @@ process. If you carefully designed your page table data structures, sharing of read-only pages should not make this part significantly harder. -@end table - -@menu -* Page Table and Paging FAQ:: -* Memory Mapped File FAQ:: -@end menu - -@node Page Table and Paging FAQ -@subsection Page Table and Paging FAQ - -@table @b @item Does the virtual memory system need to support data segment growth? No. The size of the data segment is determined by the linker. We still @@ -671,63 +733,4 @@ you like, but it should be the underlying mechanism. Also, you can use the @option{-ul} option to @command{pintos} to limit the size of the user pool, which makes it easy to test your VM implementation with various user memory sizes. - -@item Data pages might need swap space. Can I swap them out at process load? - -No. Reading data pages from the executable and writing them to swap -immediately at program startup is not demand paging. You need to demand -page everything (except partial pages). -@end table - -@node Memory Mapped File FAQ -@subsection Memory Mapped File FAQ - -@table @b -@item How do we interact with memory-mapped files? - -Let's say you want to map a file called @file{foo} into your address -space at address @t{0x10000000}. You open the file then use @code{mmap}: - -@example -#include -#include - -int main (void) -@{ - void *addr = (void *) 0x10000000; - int fd = open ("foo"); - mapid_t map = mmap (fd, addr); - if (map != -1) - printf ("success!\n"); -@} -@end example - -Suppose @file{foo} is a text file and you want to print the first 64 -bytes on the screen (assuming, of course, that the length of the file -is at least 64). Without @code{mmap}, you'd need to allocate a -buffer, use @code{read} to get the data from the file into the buffer, -and finally use @code{write} to put the buffer out to the display. But -with the file mapped into your address space, you can directly address -it like so: - -@example -write (STDOUT_FILENO, addr, 64); -@end example - -Similarly, if you wanted to replace the first byte of the file, -all you need to do is: - -@example -addr[0] = 'b'; -@end example - -When you're done using the memory-mapped file, you simply unmap -it: - -@example -munmap (map); -@end example - -The @command{mcp} program in @file{src/examples} shows how to copy a -file using memory-mapped I/O. @end table diff --git a/doc/vm.tmpl b/doc/vm.tmpl index 85e9d83..aef4fb8 100644 --- a/doc/vm.tmpl +++ b/doc/vm.tmpl @@ -32,17 +32,17 @@ FirstName LastName ---- ALGORITHMS ---- ->> Describe your code for locating the physical page, if any, that ->> contains the data of a given virtual page. +>> Describe your code for locating the frame, if any, that contains +>> the data of a given page. ->> How does your code coordinate accessed and dirty bits between kernel ->> and user virtual addresses that alias a single physical page, or +>> How does your code coordinate accessed and dirty bits between +>> kernel and user virtual addresses that alias a single frame, or >> alternatively how do you avoid the issue? ---- SYNCHRONIZATION ---- ->> When two user processes both need a new page frame at the same time, ->> how are races avoided? +>> When two user processes both need a new frame at the same time, how +>> are races avoided? ---- RATIONALE ---- @@ -60,13 +60,12 @@ FirstName LastName ---- ALGORITHMS ---- ->> When a physical page frame is required but none is free, some page ->> frame must be evicted. Describe your code for choosing a frame to ->> evict. +>> When a frame is required but none is free, some frame must be +>> evicted. Describe your code for choosing a frame to evict. ->> When a process P obtains a physical frame that was previously used ->> by a process Q, how do you adjust the page table (and any other ->> data structures) to reflect the physical frame Q no longer has? +>> When a process P obtains a frame that was previously used by a +>> process Q, how do you adjust the page table (and any other data +>> structures) to reflect the frame Q no longer has? >> Explain your heuristic for deciding whether a page fault for an >> invalid virtual address should cause the stack to be extended into the @@ -78,18 +77,19 @@ FirstName LastName >> explain how it prevents deadlock. (Refer to the textbook for an >> explanation of the necessary conditions for deadlock.) ->> A page fault in process P can cause another process Q's page frame to ->> be evicted. How do you ensure that Q cannot access or modify the page +>> A page fault in process P can cause another process Q's frame to be +>> evicted. How do you ensure that Q cannot access or modify the page >> during the eviction process? How do you avoid a race between P ->> evicting Q's page frame and Q faulting the page back in? +>> evicting Q's frame and Q faulting the page back in? ->> Suppose a page fault in process P causes a page to be read from disk. ->> How do you ensure that a second process Q cannot interfere by e.g. ->> attempting to evict the page while it is still being read in? +>> Suppose a page fault in process P causes a page to be read from the +>> file system or swap. How do you ensure that a second process Q +>> cannot interfere by e.g. attempting to evict the frame while it is +>> still being read in? >> Explain how you handle access to paged-out pages that occur during >> system calls. Do you use page faults to bring in pages (as in user ->> programs), or do you have a mechanism for "locking" pages into +>> programs), or do you have a mechanism for "locking" frames into >> physical memory, or do you use some other design? How do you >> gracefully handle attempted accesses to invalid virtual addresses? @@ -114,7 +114,7 @@ FirstName LastName >> Describe how memory mapped files integrate into your virtual memory >> subsystem. Explain how the page fault and eviction processes differ ->> for swap pages and other pages. +>> between swap pages and other pages. >> Explain how you determine whether a new file mapping overlaps any >> existing segment. diff --git a/solutions/p1.patch b/solutions/p1.patch index ce21107..fbfb405 100644 --- a/solutions/p1.patch +++ b/solutions/p1.patch @@ -364,11 +364,11 @@ diff -u src/threads/thread.c~ src/threads/thread.c +#include "threads/init.h" #include "threads/interrupt.h" #include "threads/intr-stubs.h" - #include "threads/mmu.h" #include "threads/palloc.h" #include "threads/switch.h" #include "threads/synch.h" +#include "devices/timer.h" + #include "threads/vaddr.h" #ifdef USERPROG #include "userprog/process.h" #endif diff --git a/solutions/p2.patch b/solutions/p2.patch index 9022ac9..90faed7 100644 --- a/solutions/p2.patch +++ b/solutions/p2.patch @@ -122,10 +122,10 @@ diff -u src/userprog/process.c~ src/userprog/process.c @@ -14,11 +14,23 @@ #include "threads/init.h" #include "threads/interrupt.h" - #include "threads/mmu.h" +#include "threads/malloc.h" #include "threads/palloc.h" #include "threads/thread.h" + #include "threads/vaddr.h" static thread_func execute_thread NO_RETURN; -static bool load (const char *cmdline, void (**eip) (void), void **esp); @@ -504,9 +504,9 @@ diff -u src/userprog/syscall.c~ src/userprog/syscall.c +#include "threads/init.h" #include "threads/interrupt.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/palloc.h" #include "threads/thread.h" ++#include "threads/vaddr.h" - + + diff --git a/solutions/p3.patch b/solutions/p3.patch index 754d868..69393df 100644 --- a/solutions/p3.patch +++ b/solutions/p3.patch @@ -329,13 +329,13 @@ diff -u src/userprog/pagedir.c~ src/userprog/pagedir.c @@ -35,15 +35,7 @@ pagedir_destroy (uint32_t *pd) ASSERT (pd != base_page_dir); for (pde = pd; pde < pd + pd_no (PHYS_BASE); pde++) - if (*pde & PG_P) + if (*pde & PTE_P) - { - uint32_t *pt = pde_get_pt (*pde); - uint32_t *pte; - - for (pte = pt; pte < pt + PGSIZE / sizeof *pte; pte++) -- if (*pte & PG_P) +- if (*pte & PTE_P) - palloc_free_page (pte_get_page (*pte)); - palloc_free_page (pt); - } @@ -350,10 +350,10 @@ diff -u src/userprog/process.c~ src/userprog/process.c @@ -15,11 +15,25 @@ #include "threads/init.h" #include "threads/interrupt.h" - #include "threads/mmu.h" +#include "threads/malloc.h" #include "threads/palloc.h" #include "threads/thread.h" + #include "threads/vaddr.h" +#include "vm/page.h" +#include "vm/frame.h" @@ -823,9 +823,9 @@ diff -u src/userprog/syscall.c~ src/userprog/syscall.c +#include "threads/init.h" #include "threads/interrupt.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/palloc.h" #include "threads/thread.h" ++#include "threads/vaddr.h" - +#include "vm/page.h" + @@ -1393,9 +1393,9 @@ diff -u src/vm/frame.c~ src/vm/frame.c +#include "devices/timer.h" +#include "threads/init.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/palloc.h" +#include "threads/synch.h" ++#include "threads/vaddr.h" + +static struct frame *frames; +static size_t frame_cnt; @@ -1589,9 +1589,9 @@ diff -u src/vm/page.c~ src/vm/page.c +#include "vm/swap.h" +#include "filesys/file.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/thread.h" +#include "userprog/pagedir.h" ++#include "threads/vaddr.h" + +/* Maximum size of process stack, in bytes. */ +#define STACK_MAX (1024 * 1024) @@ -1942,8 +1942,8 @@ diff -u src/vm/swap.c~ src/vm/swap.c +#include "vm/frame.h" +#include "vm/page.h" +#include "devices/disk.h" -+#include "threads/mmu.h" +#include "threads/synch.h" ++#include "threads/vaddr.h" + +/* The swap disk. */ +static struct disk *swap_disk; diff --git a/solutions/p4.patch b/solutions/p4.patch index c6cbc4d..9e11e25 100644 --- a/solutions/p4.patch +++ b/solutions/p4.patch @@ -2261,13 +2261,13 @@ diff -u src/userprog/pagedir.c~ src/userprog/pagedir.c @@ -35,15 +35,7 @@ pagedir_destroy (uint32_t *pd) ASSERT (pd != base_page_dir); for (pde = pd; pde < pd + pd_no (PHYS_BASE); pde++) - if (*pde & PG_P) + if (*pde & PTE_P) - { - uint32_t *pt = pde_get_pt (*pde); - uint32_t *pte; - - for (pte = pt; pte < pt + PGSIZE / sizeof *pte; pte++) -- if (*pte & PG_P) +- if (*pte & PTE_P) - palloc_free_page (pte_get_page (*pte)); - palloc_free_page (pt); - } @@ -2282,10 +2282,10 @@ diff -u src/userprog/process.c~ src/userprog/process.c @@ -15,11 +15,26 @@ #include "threads/init.h" #include "threads/interrupt.h" - #include "threads/mmu.h" +#include "threads/malloc.h" #include "threads/palloc.h" #include "threads/thread.h" + #include "threads/vaddr.h" +#include "vm/page.h" +#include "vm/frame.h" @@ -2771,9 +2771,9 @@ diff -u src/userprog/syscall.c~ src/userprog/syscall.c +#include "threads/init.h" #include "threads/interrupt.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/palloc.h" #include "threads/thread.h" ++#include "threads/vaddr.h" - +#include "vm/page.h" + @@ -3378,9 +3378,9 @@ diff -u src/vm/frame.c~ src/vm/frame.c +#include "devices/timer.h" +#include "threads/init.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/palloc.h" +#include "threads/synch.h" ++#include "threads/vaddr.h" + +static struct frame *frames; +static size_t frame_cnt; @@ -3573,9 +3573,9 @@ diff -u src/vm/page.c~ src/vm/page.c +#include "vm/swap.h" +#include "filesys/file.h" +#include "threads/malloc.h" -+#include "threads/mmu.h" +#include "threads/thread.h" +#include "userprog/pagedir.h" ++#include "threads/vaddr.h" + +/* Maximum size of process stack, in bytes. */ +#define STACK_MAX (1024 * 1024) @@ -3927,8 +3927,8 @@ diff -u src/vm/swap.c~ src/vm/swap.c +#include "vm/frame.h" +#include "vm/page.h" +#include "devices/disk.h" -+#include "threads/mmu.h" +#include "threads/synch.h" ++#include "threads/vaddr.h" + +/* The swap disk. */ +static struct disk *swap_disk; diff --git a/src/Makefile.userprog b/src/Makefile.userprog index 870aae4..447088e 100644 --- a/src/Makefile.userprog +++ b/src/Makefile.userprog @@ -5,10 +5,7 @@ $(PROGS): CPPFLAGS += -I$(SRCDIR)/lib/user -I. # Linker flags. $(PROGS): LDFLAGS = -nostdlib -static -Wl,-T,$(LDSCRIPT) $(PROGS): LDLIBS = $(shell $(CC) -print-libgcc-file-name) -$(PROGS): LDSCRIPT = $(SRCDIR)/lib/user/normal.lds -# Uncomment the following line to round up section sizes -# to full pages (for debugging only). -#$(PROGS): LDSCRIPT = $(SRCDIR)/lib/user/fullpage.lds +$(PROGS): LDSCRIPT = $(SRCDIR)/lib/user/user.lds # Library code shared between kernel and user programs. lib_SRC = lib/debug.c # Debug code. diff --git a/src/devices/vga.c b/src/devices/vga.c index 8ab08f6..949dd97 100644 --- a/src/devices/vga.c +++ b/src/devices/vga.c @@ -4,8 +4,8 @@ #include #include #include "threads/io.h" -#include "threads/mmu.h" #include "threads/interrupt.h" +#include "threads/vaddr.h" /* VGA text screen support. See [FREEVGA] for more information. */ diff --git a/src/examples/Makefile b/src/examples/Makefile index e15f824..66bc626 100644 --- a/src/examples/Makefile +++ b/src/examples/Makefile @@ -3,7 +3,7 @@ SRCDIR = .. # Test programs to compile, and a list of sources for each. # To add a new test, put its name on the PROGS list # and then add a name_SRC line that lists its source files. -PROGS = cat cmp cp echo halt hex-dump ls mcp mkdir rm shell \ +PROGS = cat cmp cp echo halt hex-dump ls mcat mcp mkdir rm shell \ bubsort insult lineup matmult recursor cat_SRC = cat.c @@ -13,6 +13,7 @@ echo_SRC = echo.c halt_SRC = halt.c hex-dump_SRC = hex-dump.c ls_SRC = ls.c +mcat_SRC = mcat.c mcp_SRC = mcp.c mkdir_SRC = mkdir.c rm_SRC = rm.c diff --git a/src/examples/mcat.c b/src/examples/mcat.c new file mode 100644 index 0000000..4d98db5 --- /dev/null +++ b/src/examples/mcat.c @@ -0,0 +1,45 @@ +/* mcat.c + + Prints files specified on command line to the console, using + mmap. */ + +#include +#include + +int +main (int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc; i++) + { + int fd; + mapid_t map; + void *data = (void *) 0x10000000; + int size; + + /* Open input file. */ + fd = open (argv[i]); + if (fd < 0) + { + printf ("%s: open failed\n", argv[i]); + return 1; + } + size = filesize (fd); + + /* Map files. */ + map = mmap (fd, data); + if (map == MAP_FAILED) + { + printf ("%s: mmap failed\n", argv[i]); + return 1; + } + + /* Write file to console. */ + write (STDOUT_FILENO, data, size); + + /* Unmap files (optional). */ + munmap (map); + } + return 0; +} diff --git a/src/filesys/fsutil.c b/src/filesys/fsutil.c index 8ef5d98..9c81fb0 100644 --- a/src/filesys/fsutil.c +++ b/src/filesys/fsutil.c @@ -6,9 +6,9 @@ #include "filesys/file.h" #include "filesys/filesys.h" #include "devices/disk.h" -#include "threads/mmu.h" #include "threads/malloc.h" #include "threads/palloc.h" +#include "threads/vaddr.h" /* List files in the root directory. */ void diff --git a/src/lib/user/fullpage.lds b/src/lib/user/fullpage.lds deleted file mode 100644 index b438695..0000000 --- a/src/lib/user/fullpage.lds +++ /dev/null @@ -1,50 +0,0 @@ -OUTPUT_FORMAT("elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - . = 0x08048000 + SIZEOF_HEADERS; - .text : { *(.text) . = ALIGN(0x1000); } = 0x90 - .rodata : { *(.rodata) . = ALIGN(0x1000); } - .data : { *(.data) . = ALIGN(0x1000); } - .bss : { *(.bss) . = ALIGN(0x1000); } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /DISCARD/ : { *(.note.GNU-stack) } - /DISCARD/ : { *(.eh_frame) } -} diff --git a/src/lib/user/normal.lds b/src/lib/user/user.lds similarity index 100% rename from src/lib/user/normal.lds rename to src/lib/user/user.lds diff --git a/src/threads/init.c b/src/threads/init.c index cb4edf9..57797be 100644 --- a/src/threads/init.c +++ b/src/threads/init.c @@ -16,8 +16,8 @@ #include "threads/io.h" #include "threads/loader.h" #include "threads/malloc.h" -#include "threads/mmu.h" #include "threads/palloc.h" +#include "threads/pte.h" #include "threads/thread.h" #ifdef USERPROG #include "userprog/process.h" diff --git a/src/threads/interrupt.c b/src/threads/interrupt.c index 29aea87..6d3f6bb 100644 --- a/src/threads/interrupt.c +++ b/src/threads/interrupt.c @@ -6,8 +6,8 @@ #include "threads/flags.h" #include "threads/intr-stubs.h" #include "threads/io.h" -#include "threads/mmu.h" #include "threads/thread.h" +#include "threads/vaddr.h" #include "devices/timer.h" /* Number of x86 interrupts. */ diff --git a/src/threads/malloc.c b/src/threads/malloc.c index 2bb5571..8e5861c 100644 --- a/src/threads/malloc.c +++ b/src/threads/malloc.c @@ -5,9 +5,9 @@ #include #include #include -#include "threads/mmu.h" #include "threads/palloc.h" #include "threads/synch.h" +#include "threads/vaddr.h" /* A simple implementation of malloc(). diff --git a/src/threads/mmu.h b/src/threads/mmu.h deleted file mode 100644 index 9f71d4a..0000000 --- a/src/threads/mmu.h +++ /dev/null @@ -1,214 +0,0 @@ -#ifndef THREADS_MMU_H -#define THREADS_MMU_H - -#include -#include -#include - -#include "threads/loader.h" - -/* Virtual to physical translation works like this on an x86: - - - The top 10 bits of the virtual address (bits 22:31) are used - to index into the page directory. If the PDE is marked - "present," the physical address of a page table is read from - the PDE thus obtained. If the PDE is marked "not present" - then a page fault occurs. - - - The next 10 bits of the virtual address (bits 12:21) are - used to index into the page table. If the PTE is marked - "present," the physical address of a data page is read from - the PTE thus obtained. If the PTE is marked "not present" - then a page fault occurs. - - - The bottom 12 bits of the virtual address (bits 0:11) are - added to the data page's physical base address, producing - the final physical address. - - - 31 22 21 12 11 0 - +--------------------------------------------------------------------+ - | Page Directory Index | Page Table Index | Page Offset | - +--------------------------------------------------------------------+ - | | | - _______/ _______/ _____/ - / / / - / Page Directory / Page Table / Data Page - / .____________. / .____________. / .____________. - |1,023|____________| |1,023|____________| | |____________| - |1,022|____________| |1,022|____________| | |____________| - |1,021|____________| |1,021|____________| \__\|____________| - |1,020|____________| |1,020|____________| /|____________| - | | | | | | | | - | | | \____\| |_ | | - | | . | /| . | \ | . | - \____\| . |_ | . | | | . | - /| . | \ | . | | | . | - | . | | | . | | | . | - | | | | | | | | - |____________| | |____________| | |____________| - 4|____________| | 4|____________| | |____________| - 3|____________| | 3|____________| | |____________| - 2|____________| | 2|____________| | |____________| - 1|____________| | 1|____________| | |____________| - 0|____________| \__\0|____________| \____\|____________| - / / -*/ - -#define MASK(SHIFT, CNT) (((1ul << (CNT)) - 1) << (SHIFT)) - -/* Page offset (bits 0:12). */ -#define PGSHIFT 0 /* Index of first offset bit. */ -#define PGBITS 12 /* Number of offset bits. */ -#define PGMASK MASK(PGSHIFT, PGBITS) /* Page offset bits (0:12). */ -#define PGSIZE (1 << PGBITS) /* Bytes in a page. */ - -/* Page table (bits 12:22). */ -#define PTSHIFT PGBITS /* Index of first page table bit. */ -#define PTBITS 10 /* Number of page table bits. */ -#define PTMASK MASK(PTSHIFT, PTBITS) /* Page table bits (12:22). */ -#define PTSPAN (1 << PTBITS << PGBITS) /* Bytes covered by a page table. */ - -/* Page directory (bits 22:32). */ -#define PDSHIFT (PTSHIFT + PTBITS) /* First page dir bit. */ -#define PDBITS 10 /* Number of page dir bits. */ -#define PDMASK MASK(PDSHIFT, PDBITS) /* Page directory bits (22:32). */ - -/* Offset within a page. */ -static inline unsigned pg_ofs (const void *va) { - return (uintptr_t) va & PGMASK; -} - -/* Virtual page number. */ -static inline uintptr_t pg_no (const void *va) { - return (uintptr_t) va >> PTSHIFT; -} - -/* Round up to nearest page boundary. */ -static inline void *pg_round_up (const void *va) { - return (void *) (((uintptr_t) va + PGSIZE - 1) & ~PGMASK); -} - -/* Round down to nearest page boundary. */ -static inline void *pg_round_down (const void *va) { - return (void *) ((uintptr_t) va & ~PGMASK); -} - -/* Base address of the 1:1 physical-to-virtual mapping. Physical - memory is mapped starting at this virtual address. Thus, - physical address 0 is accessible at PHYS_BASE, physical - address address 0x1234 at (uint8_t *) PHYS_BASE + 0x1234, and - so on. - - This address also marks the end of user programs' address - space. Up to this point in memory, user programs are allowed - to map whatever they like. At this point and above, the - virtual address space belongs to the kernel. */ -#define PHYS_BASE ((void *) LOADER_PHYS_BASE) - -/* Returns true if VADDR is a user virtual address. */ -static inline bool -is_user_vaddr (const void *vaddr) -{ - return vaddr < PHYS_BASE; -} - -/* Returns true if VADDR is a kernel virtual address. */ -static inline bool -is_kernel_vaddr (const void *vaddr) -{ - return vaddr >= PHYS_BASE; -} - -/* Returns kernel virtual address at which physical address PADDR - is mapped. */ -static inline void * -ptov (uintptr_t paddr) -{ - ASSERT ((void *) paddr < PHYS_BASE); - - return (void *) (paddr + PHYS_BASE); -} - -/* Returns physical address at which kernel virtual address VADDR - is mapped. */ -static inline uintptr_t -vtop (const void *vaddr) -{ - ASSERT (is_kernel_vaddr (vaddr)); - - return (uintptr_t) vaddr - (uintptr_t) PHYS_BASE; -} - -/* Page directories and page tables. - - For more information see [IA32-v3a] 3.7.6 "Page-Directory and - Page-Table Entries". - - PDEs and PTEs share a common format: - - 31 12 11 0 - +------------------------------------+------------------------+ - | Physical Address | Flags | - +------------------------------------+------------------------+ - - In a PDE, the physical address points to a page table. - In a PTE, the physical address points to a data or code page. - The important flags are listed below. - When a PDE or PTE is not "present", the other flags are - ignored. - A PDE or PTE that is initialized to 0 will be interpreted as - "not present", which is just fine. */ -#define PG_P 0x1 /* 1=present, 0=not present. */ -#define PG_W 0x2 /* 1=read/write, 0=read-only. */ -#define PG_U 0x4 /* 1=user/kernel, 0=kernel only. */ -#define PG_A 0x20 /* 1=accessed, 0=not acccessed. */ -#define PG_D 0x40 /* 1=dirty, 0=not dirty (PTEs only). */ - -/* Obtains page directory index from a virtual address. */ -static inline uintptr_t pd_no (const void *va) { - return (uintptr_t) va >> PDSHIFT; -} - -/* Returns a PDE that points to page table PT. */ -static inline uint32_t pde_create (uint32_t *pt) { - ASSERT (pg_ofs (pt) == 0); - return vtop (pt) | PG_U | PG_P | PG_W; -} - -/* Returns a pointer to the page table that page directory entry - PDE, which must "present", points to. */ -static inline uint32_t *pde_get_pt (uint32_t pde) { - ASSERT (pde & PG_P); - return ptov (pde & ~PGMASK); -} - -/* Obtains page table index from a virtual address. */ -static inline unsigned pt_no (const void *va) { - return ((uintptr_t) va & PTMASK) >> PTSHIFT; -} - -/* Returns a PTE that points to PAGE. - The PTE's page is readable. - If WRITABLE is true then it will be writable as well. - The page will be usable only by ring 0 code (the kernel). */ -static inline uint32_t pte_create_kernel (uint32_t *page, bool writable) { - ASSERT (pg_ofs (page) == 0); - return vtop (page) | PG_P | (writable ? PG_W : 0); -} - -/* Returns a PTE that points to PAGE. - The PTE's page is readable. - If WRITABLE is true then it will be writable as well. - The page will be usable by both user and kernel code. */ -static inline uint32_t pte_create_user (uint32_t *page, bool writable) { - return pte_create_kernel (page, writable) | PG_U; -} - -/* Returns a pointer to the page that page table entry PTE points - to. */ -static inline void *pte_get_page (uint32_t pte) { - return ptov (pte & ~PGMASK); -} - -#endif /* threads/mmu.h */ diff --git a/src/threads/palloc.c b/src/threads/palloc.c index 22a6b15..eab41e4 100644 --- a/src/threads/palloc.c +++ b/src/threads/palloc.c @@ -9,8 +9,8 @@ #include #include "threads/init.h" #include "threads/loader.h" -#include "threads/mmu.h" #include "threads/synch.h" +#include "threads/vaddr.h" /* Page allocator. Hands out memory in page-size (or page-multiple) chunks. See malloc.h for an allocator that diff --git a/src/threads/pte.h b/src/threads/pte.h new file mode 100644 index 0000000..74a52f4 --- /dev/null +++ b/src/threads/pte.h @@ -0,0 +1,107 @@ +#ifndef THREADS_PTE_H +#define THREADS_PTE_H + +#include "threads/vaddr.h" + +/* Functions and macros for working with x86 hardware page + tables. + + See vaddr.h for more generic functions and macros for virtual + addresses. + + Virtual addresses are structured as follows: + + 31 22 21 12 11 0 + +----------------------+----------------------+----------------------+ + | Page Directory Index | Page Table Index | Page Offset | + +----------------------+----------------------+----------------------+ +*/ + +/* Page table index (bits 12:21). */ +#define PTSHIFT PGBITS /* First page table bit. */ +#define PTBITS 10 /* Number of page table bits. */ +#define PTSPAN (1 << PTBITS << PGBITS) /* Bytes covered by a page table. */ +#define PTMASK BITMASK(PTSHIFT, PTBITS) /* Page table bits (12:21). */ + +/* Page directory index (bits 22:31). */ +#define PDSHIFT (PTSHIFT + PTBITS) /* First page directory bit. */ +#define PDBITS 10 /* Number of page dir bits. */ +#define PDMASK BITMASK(PDSHIFT, PDBITS) /* Page directory bits (22:31). */ + +/* Obtains page table index from a virtual address. */ +static inline unsigned pt_no (const void *va) { + return ((uintptr_t) va & PTMASK) >> PTSHIFT; +} + +/* Obtains page directory index from a virtual address. */ +static inline uintptr_t pd_no (const void *va) { + return (uintptr_t) va >> PDSHIFT; +} + +/* Page directory and page table entries. + + For more information see the section on page tables in the + Pintos reference guide chapter, or [IA32-v3a] 3.7.6 + "Page-Directory and Page-Table Entries". + + PDEs and PTEs share a common format: + + 31 12 11 0 + +------------------------------------+------------------------+ + | Physical Address | Flags | + +------------------------------------+------------------------+ + + In a PDE, the physical address points to a page table. + In a PTE, the physical address points to a data or code page. + The important flags are listed below. + When a PDE or PTE is not "present", the other flags are + ignored. + A PDE or PTE that is initialized to 0 will be interpreted as + "not present", which is just fine. */ +#define PTE_FLAGS 0x00000fff /* Flag bits. */ +#define PTE_ADDR 0xfffff000 /* Address bits. */ +#define PTE_AVL 0x00000e00 /* Bits available for OS use. */ +#define PTE_P 0x1 /* 1=present, 0=not present. */ +#define PTE_W 0x2 /* 1=read/write, 0=read-only. */ +#define PTE_U 0x4 /* 1=user/kernel, 0=kernel only. */ +#define PTE_A 0x20 /* 1=accessed, 0=not acccessed. */ +#define PTE_D 0x40 /* 1=dirty, 0=not dirty (PTEs only). */ + +/* Returns a PDE that points to page table PT. */ +static inline uint32_t pde_create (uint32_t *pt) { + ASSERT (pg_ofs (pt) == 0); + return vtop (pt) | PTE_U | PTE_P | PTE_W; +} + +/* Returns a pointer to the page table that page directory entry + PDE, which must "present", points to. */ +static inline uint32_t *pde_get_pt (uint32_t pde) { + ASSERT (pde & PTE_P); + return ptov (pde & PTE_ADDR); +} + +/* Returns a PTE that points to PAGE. + The PTE's page is readable. + If WRITABLE is true then it will be writable as well. + The page will be usable only by ring 0 code (the kernel). */ +static inline uint32_t pte_create_kernel (uint32_t *page, bool writable) { + ASSERT (pg_ofs (page) == 0); + return vtop (page) | PTE_P | (writable ? PTE_W : 0); +} + +/* Returns a PTE that points to PAGE. + The PTE's page is readable. + If WRITABLE is true then it will be writable as well. + The page will be usable by both user and kernel code. */ +static inline uint32_t pte_create_user (uint32_t *page, bool writable) { + return pte_create_kernel (page, writable) | PTE_U; +} + +/* Returns a pointer to the page that page table entry PTE points + to. */ +static inline void *pte_get_page (uint32_t pte) { + return ptov (pte & PTE_ADDR); +} + +#endif /* threads/pte.h */ + diff --git a/src/threads/thread.c b/src/threads/thread.c index ca8a8b0..8d9f335 100644 --- a/src/threads/thread.c +++ b/src/threads/thread.c @@ -7,10 +7,10 @@ #include "threads/flags.h" #include "threads/interrupt.h" #include "threads/intr-stubs.h" -#include "threads/mmu.h" #include "threads/palloc.h" #include "threads/switch.h" #include "threads/synch.h" +#include "threads/vaddr.h" #ifdef USERPROG #include "userprog/process.h" #endif diff --git a/src/threads/vaddr.h b/src/threads/vaddr.h new file mode 100644 index 0000000..184c824 --- /dev/null +++ b/src/threads/vaddr.h @@ -0,0 +1,89 @@ +#ifndef THREADS_VADDR_H +#define THREADS_VADDR_H + +#include +#include +#include + +#include "threads/loader.h" + +/* Functions and macros for working with virtual addresses. + + See pte.h for functions and macros specifically for x86 + hardware page tables. */ + +#define BITMASK(SHIFT, CNT) (((1ul << (CNT)) - 1) << (SHIFT)) + +/* Page offset (bits 0:12). */ +#define PGSHIFT 0 /* Index of first offset bit. */ +#define PGBITS 12 /* Number of offset bits. */ +#define PGSIZE (1 << PGBITS) /* Bytes in a page. */ +#define PGMASK BITMASK(PGSHIFT, PGBITS) /* Page offset bits (0:12). */ + +/* Offset within a page. */ +static inline unsigned pg_ofs (const void *va) { + return (uintptr_t) va & PGMASK; +} + +/* Virtual page number. */ +static inline uintptr_t pg_no (const void *va) { + return (uintptr_t) va >> PGBITS; +} + +/* Round up to nearest page boundary. */ +static inline void *pg_round_up (const void *va) { + return (void *) (((uintptr_t) va + PGSIZE - 1) & ~PGMASK); +} + +/* Round down to nearest page boundary. */ +static inline void *pg_round_down (const void *va) { + return (void *) ((uintptr_t) va & ~PGMASK); +} + +/* Base address of the 1:1 physical-to-virtual mapping. Physical + memory is mapped starting at this virtual address. Thus, + physical address 0 is accessible at PHYS_BASE, physical + address address 0x1234 at (uint8_t *) PHYS_BASE + 0x1234, and + so on. + + This address also marks the end of user programs' address + space. Up to this point in memory, user programs are allowed + to map whatever they like. At this point and above, the + virtual address space belongs to the kernel. */ +#define PHYS_BASE ((void *) LOADER_PHYS_BASE) + +/* Returns true if VADDR is a user virtual address. */ +static inline bool +is_user_vaddr (const void *vaddr) +{ + return vaddr < PHYS_BASE; +} + +/* Returns true if VADDR is a kernel virtual address. */ +static inline bool +is_kernel_vaddr (const void *vaddr) +{ + return vaddr >= PHYS_BASE; +} + +/* Returns kernel virtual address at which physical address PADDR + is mapped. */ +static inline void * +ptov (uintptr_t paddr) +{ + ASSERT ((void *) paddr < PHYS_BASE); + + return (void *) (paddr + PHYS_BASE); +} + +/* Returns physical address at which kernel virtual address VADDR + is mapped. */ +static inline uintptr_t +vtop (const void *vaddr) +{ + ASSERT (is_kernel_vaddr (vaddr)); + + return (uintptr_t) vaddr - (uintptr_t) PHYS_BASE; +} + +#endif /* threads/vaddr.h */ diff --git a/src/userprog/gdt.c b/src/userprog/gdt.c index 0e393ec..bed5d58 100644 --- a/src/userprog/gdt.c +++ b/src/userprog/gdt.c @@ -1,8 +1,8 @@ #include "userprog/gdt.h" #include #include "userprog/tss.h" -#include "threads/mmu.h" #include "threads/palloc.h" +#include "threads/vaddr.h" /* The Global Descriptor Table (GDT). diff --git a/src/userprog/pagedir.c b/src/userprog/pagedir.c index 013f8aa..7d99375 100644 --- a/src/userprog/pagedir.c +++ b/src/userprog/pagedir.c @@ -3,7 +3,7 @@ #include #include #include "threads/init.h" -#include "threads/mmu.h" +#include "threads/pte.h" #include "threads/palloc.h" static uint32_t *active_pd (void); @@ -34,13 +34,13 @@ pagedir_destroy (uint32_t *pd) ASSERT (pd != base_page_dir); for (pde = pd; pde < pd + pd_no (PHYS_BASE); pde++) - if (*pde & PG_P) + if (*pde & PTE_P) { uint32_t *pt = pde_get_pt (*pde); uint32_t *pte; for (pte = pt; pte < pt + PGSIZE / sizeof *pte; pte++) - if (*pte & PG_P) + if (*pte & PTE_P) palloc_free_page (pte_get_page (*pte)); palloc_free_page (pt); } @@ -110,7 +110,7 @@ pagedir_set_page (uint32_t *pd, void *upage, void *kpage, bool writable) if (pte != NULL) { - ASSERT ((*pte & PG_P) == 0); + ASSERT ((*pte & PTE_P) == 0); *pte = pte_create_user (kpage, writable); return true; } @@ -130,7 +130,7 @@ pagedir_get_page (uint32_t *pd, const void *uaddr) ASSERT (is_user_vaddr (uaddr)); pte = lookup_page (pd, uaddr, false); - if (pte != NULL && (*pte & PG_P) != 0) + if (pte != NULL && (*pte & PTE_P) != 0) return pte_get_page (*pte) + pg_ofs (uaddr); else return NULL; @@ -149,9 +149,9 @@ pagedir_clear_page (uint32_t *pd, void *upage) ASSERT (is_user_vaddr (upage)); pte = lookup_page (pd, upage, false); - if (pte != NULL && (*pte & PG_P) != 0) + if (pte != NULL && (*pte & PTE_P) != 0) { - *pte &= ~PG_P; + *pte &= ~PTE_P; invalidate_pagedir (pd); } } @@ -164,7 +164,7 @@ bool pagedir_is_dirty (uint32_t *pd, const void *vpage) { uint32_t *pte = lookup_page (pd, vpage, false); - return pte != NULL && (*pte & PG_D) != 0; + return pte != NULL && (*pte & PTE_D) != 0; } /* Set the dirty bit to DIRTY in the PTE for virtual page VPAGE @@ -176,10 +176,10 @@ pagedir_set_dirty (uint32_t *pd, const void *vpage, bool dirty) if (pte != NULL) { if (dirty) - *pte |= PG_D; + *pte |= PTE_D; else { - *pte &= ~(uint32_t) PG_D; + *pte &= ~(uint32_t) PTE_D; invalidate_pagedir (pd); } } @@ -193,7 +193,7 @@ bool pagedir_is_accessed (uint32_t *pd, const void *vpage) { uint32_t *pte = lookup_page (pd, vpage, false); - return pte != NULL && (*pte & PG_A) != 0; + return pte != NULL && (*pte & PTE_A) != 0; } /* Sets the accessed bit to ACCESSED in the PTE for virtual page @@ -205,10 +205,10 @@ pagedir_set_accessed (uint32_t *pd, const void *vpage, bool accessed) if (pte != NULL) { if (accessed) - *pte |= PG_A; + *pte |= PTE_A; else { - *pte &= ~(uint32_t) PG_A; + *pte &= ~(uint32_t) PTE_A; invalidate_pagedir (pd); } } diff --git a/src/userprog/process.c b/src/userprog/process.c index a526461..2207a3d 100644 --- a/src/userprog/process.c +++ b/src/userprog/process.c @@ -14,9 +14,9 @@ #include "threads/flags.h" #include "threads/init.h" #include "threads/interrupt.h" -#include "threads/mmu.h" #include "threads/palloc.h" #include "threads/thread.h" +#include "threads/vaddr.h" static thread_func execute_thread NO_RETURN; static bool load (const char *cmdline, void (**eip) (void), void **esp); diff --git a/src/userprog/tss.c b/src/userprog/tss.c index dfda3b9..f103823 100644 --- a/src/userprog/tss.c +++ b/src/userprog/tss.c @@ -2,8 +2,8 @@ #include #include #include "userprog/gdt.h" -#include "threads/mmu.h" #include "threads/palloc.h" +#include "threads/vaddr.h" /* The Task-State Segment (TSS). -- 2.30.2