Make it a little less broken.

[pintos-anon] / doc / userprog.texi
diff --git a/doc/userprog.texi b/doc/userprog.texi

index dd3bf12b0a50ced384546a39dd1b0692fd443000..db1ad33bbf3e458ea2657a3b03e3f0eab270790d 100644 (file)
--- a/doc/userprog.texi
+++ b/doc/userprog.texi
@@ -125,7 +125,48 @@ interfaces to understand how to use the file system, and especially
  its many limitations.  @strong{You should not modify the file system
  code for this project}.  Proper use of the file system routines now
  will make life much easier for project 4, when you improve the file
-system implementation.
+system implementation.  Until then, you will have to put up with the
+following limitations:
+
+@itemize @bullet
+@item
+No synchronization.  Concurrent accesses will interfere with one
+another, so external synchronization is needed.  @xref{Synchronizing
+File Access}, for more details.
+
+@item
+File size is fixed at creation time.  Because the root directory is
+represented as a file, the number of files that may be created is also
+limited.
+
+@item
+File data is allocated as a single extent, that is, data in a single
+file must occupy a contiguous range of sectors on disk.  External
+fragmentation can therefore become a serious problem as a file system is
+used over time.
+
+@item
+No subdirectories.
+
+@item
+File names are limited to 14 characters.
+
+@item
+A system crash mid-operation may corrupt the disk in a way
+that cannot be repaired automatically.  No `fsck' tool is
+provided in any case.
+@end itemize
+
+However one important feature is included:
+
+@itemize @bullet
+@item
+Unix-like semantics for filesys_remove() are implemented.
+That is, if a file is open when it is removed, its blocks
+are not deallocated and it may still be accessed by the
+threads that have it open until the last one closes it.  @xref{Removing
+an Open File}, for more information.
+@end itemize
  
  You need to be able to create and format simulated disks.  The
  @command{pintos} program provides this functionality with its
@@ -191,14 +232,13 @@ directory.  By default, the @file{Makefile} in this directory will
  compile the test programs we provide.  You can edit the
  @file{Makefile} to compile your own test programs as well.
  
-One thing you should realize immediately is that, until you use the
-above operation to copy a test program to the emulated disk, Pintos
-will be unable to do very much useful work.  You will also find that
-you won't be able to do interesting things until you copy a variety of
-programs to the disk.  A useful technique is to create a clean
-reference disk and copy that over whenever you trash your
-@file{fs.dsk} beyond a useful state, which may happen occasionally
-while debugging.
+One thing you should realize immediately is that, until you copy a
+test program to the emulated disk, Pintos will be unable to do very
+much useful work.  You will also find that you won't be able to do
+interesting things until you copy a variety of programs to the disk.
+A useful technique is to create a clean reference disk and copy that
+over whenever you trash your @file{fs.dsk} beyond a useful state,
+which may happen occasionally while debugging.
  
  @node Virtual Memory Layout
  @section Virtual Memory Layout
@@ -248,11 +288,32 @@ requirements:
  
  @itemize @bullet
  @item
-The kernel should print out the program's name and exit status
-whenever a process exits, e.g.@: @code{shell: exit(-1)}.  The name
-printed should be the full name passed to @func{process_execute},
-except that it is acceptable to truncate it to 15 characters to allow
-for the limited space in @struct{thread}.
+The kernel should print out the program's name and exit status whenever
+a process terminates, whether termination is caused by the @code{exit}
+system call or for another reason.
+
+@itemize @minus
+@item
+The message must be formatted exactly as if it was printed with
+@code{printf ("%s: exit(%d)\n", @dots{});} given appropriate arguments.
+
+@item
+The name printed should be the full name passed to
+@func{process_execute}, except that it is acceptable to truncate it to
+15 characters to allow for the limited space in @struct{thread}.  The
+name printed need not include arguments.
+
+@item
+Do not print a message when a kernel thread that is not a process
+terminates.
+
+@item
+Do not print messages about process termination for the @code{halt}
+system call.
+
+@item
+No message need be printed when a process fails to load.
+@end itemize
  
  @item
  Aside from this, the kernel should print out no other messages that
@@ -342,10 +403,11 @@ a successful exit.  Other values may be used to indicate user-defined
  conditions (usually errors).
  
  @item SYS_exec
-@itemx pid_t exec (const char *@var{file})
-Run the executable in @var{file} and return the new process's program
-id (pid).  If there is an error loading this program, returns pid -1,
-which otherwise should not be a valid id number.
+@itemx pid_t exec (const char *@var{cmd_line})
+Runs the executable whose name is given in @var{cmd_line}, passing any
+given arguments, and returns the new process's program id (pid).  Must
+return pid -1, which otherwise should not be a valid program id, if
+there is an error loading this program.
  
  @item SYS_join
  @itemx int join (pid_t @var{pid})
@@ -359,11 +421,12 @@ be disrupted).
  @item SYS_create
  @itemx bool create (const char *@var{file}, unsigned @var{initial_size})
  Create a new file called @var{file} initially @var{initial_size} bytes
-in size.  Returns -1 if failed, 0 if OK.
+in size.  Returns true if successful, false otherwise.
  
  @item SYS_remove
  @itemx bool remove (const char *@var{file})
-Delete the file called @var{file}.  Returns -1 if failed, 0 if OK.
+Delete the file called @var{file}.  Returns true if successful, false
+otherwise.
  
  @item SYS_open
  @itemx int open (const char *@var{file})
@@ -384,15 +447,22 @@ Returns the size, in bytes, of the file open as @var{fd}.
  @item SYS_read
  @itemx int read (int @var{fd}, void *@var{buffer}, unsigned @var{size})
  Read @var{size} bytes from the file open as @var{fd} into
-@var{buffer}.  Returns the number of bytes actually read, or -1 if the
-file could not be read.  Fd 0 reads from the keyboard using
+@var{buffer}.  Returns the number of bytes actually read (0 at end of
+file), or -1 if the file could not be read (due to a condition other
+than end of file).  Fd 0 reads from the keyboard using
  @func{kbd_getc}.
  
  @item SYS_write
  @itemx int write (int @var{fd}, const void *@var{buffer}, unsigned @var{size})
  Write @var{size} bytes from @var{buffer} to the open file @var{fd}.
  Returns the number of bytes actually written, or -1 if the file could
-not be written.   Fd 1 writes to the console.
+not be written.   
+
+Fd 1 writes to the console.  Your code to write to the console should
+write all of @var{buffer} in one call to @func{putbuf}, at least as
+long as @var{size} is not bigger than a few hundred bytes.  Otherwise,
+lines of text output by different processes may end up interleaved on
+the console, confusing both human readers and our grading scripts.
  
  @item SYS_seek
  @itemx void seek (int @var{fd}, unsigned @var{position})
@@ -400,6 +470,14 @@ Changes the next byte to be read or written in open file @var{fd} to
  @var{position}, expressed in bytes from the beginning of the file.
  (Thus, a @var{position} of 0 is the file's start.)
  
+A seek past the current end of a file is not an error.  A later read
+obtains 0 bytes, indicating end of file.  A later write extends the
+file, filling any unwritten gap with zeros.  (However, in Pintos files
+have a fixed length until project 4 is complete, so writes past end of
+file will return an error.)  These semantics are implemented in the
+file system and do not require any special effort in system call
+implementation.
+
  @item SYS_tell
  @itemx unsigned tell (int @var{fd})
  Returns the position of the next byte to be read or written in open
@@ -425,26 +503,30 @@ on the user's stack in the user's virtual address space.  We recommend
  writing and testing this code before implementing any other system
  call functionality.
  
+@anchor{Synchronizing File Access}
  You must make sure that system calls are properly synchronized so that
  any number of user processes can make them at once.  In particular, it
-is not safe to call into the filesystem code provided in the
+is not safe to call into the file system code provided in the
  @file{filesys} directory from multiple threads at once.  For now, we
-recommend adding a single lock that controls access to the filesystem
+recommend adding a single lock that controls access to the file system
  code.  You should acquire this lock before calling any functions in
  the @file{filesys} directory, and release it afterward.  Don't forget
  that @func{process_execute} also accesses files.  @strong{For now, we
  recommend against modifying code in the @file{filesys} directory.}
  
-We have provided you a function for each system call in
+We have provided you a user-level function for each system call in
  @file{lib/user/syscall.c}.  These provide a way for user processes to
-invoke each system call from a C program.  Each of them calls an
-assembly language routine in @file{lib/user/syscall-stub.S}, which in
-turn invokes the system call interrupt and returns.
+invoke each system call from a C program.  Each uses a little inline
+assembly code to invoke the system call and (if appropriate) returns the
+system call's return value.
  
  When you're done with this part, and forevermore, Pintos should be
  bulletproof.  Nothing that a user program can do should ever cause the
-OS to crash, halt, assert fail, or otherwise stop running.  The sole
-exception is a call to the @code{halt} system call.
+OS to crash, halt, assert fail, or otherwise stop running.  It is
+important to emphasize this point: our tests will try to break your
+system calls in many, many ways.  You need to think of all the corner
+cases and handle them.  The sole way a user program should be able to
+cause the OS to halt is by invoking the @code{halt} system call.
  
  If a system call is passed an invalid argument, acceptable options
  include returning an error value (for those calls that return a
@@ -473,14 +555,22 @@ Here are the most common causes:
  The disk hasn't yet been formatted (with @samp{pintos run -f}).
  
  @item
-The filename specified is too long.  The file system limits file names
+The file name specified is too long.  The file system limits file names
  to 14 characters.  If you're using a command like @samp{pintos put
  ../../tests/userprog/echo}, that overflows the limit.  Use
  @samp{pintos put ../../tests/userprog/echo echo} to put the file under
  the name @file{echo} instead.
  
  @item
-The file is too big.  The file system has a 63 kB limit.
+The file system is full.
+
+@item
+The file system already contains 10 files.  (There's a 10-file limit for
+the base Pintos file system.)
+
+@item
+The file system is so fragmented that there's not enough contiguous
+space for your file.
  @end itemize
  
  @item
@@ -496,7 +586,8 @@ isn't properly set up yet, this causes a page fault.
  @samp{system call!}.}
  
  Every reasonable program tries to make at least one system call
-(@func{exit}) and most programs make more than that.  The default
+(@func{exit}) and most programs make more than that.  Notably,
+@func{printf} invokes the @code{write} system call.  The default
  system call handler just prints @samp{system call!} and terminates the
  program.  You'll have to implement 2-2 before you see anything more
  interesting.  Until then, you can use @func{hex_dump} to convince
@@ -595,7 +686,7 @@ provide a little bit of helpful code:
     Returns true if successful, false if USRC is invalid. */
  static inline bool get_user (uint8_t *dst, const uint8_t *usrc) {
    int eax;
-  asm ("movl $1f, %%eax; movb %2, %%al; movb %%al, %0; 1:"
+  asm ("mov %%eax, offset 1f; mov %%al, %2; mov %0, %%al; 1:"
         : "=m" (*dst), "=&a" (eax) : "m" (*usrc));
    return eax != 0;
  }
@@ -604,7 +695,7 @@ static inline bool get_user (uint8_t *dst, const uint8_t *usrc) {
     Returns true if successful, false if UDST is invalid. */
  static inline bool put_user (uint8_t *udst, uint8_t byte) {
    int eax;
-  asm ("movl $1f, %%eax; movb %b2, %0; 1:"
+  asm ("mov %%eax, offset 1f; mov %0, %b2; 1:"
         : "=m" (*udst), "=&a" (eax) : "r" (byte));
    return eax != 0;
  }
@@ -807,7 +898,7 @@ After we push all of the strings onto the stack, we adjust the stack
  pointer so that it is word-aligned: that is, we move it down to the
  next 4-byte boundary.  This is required because we will next be
  placing several words of data on the stack, and they must be aligned
-in order to be read correctly.  In our example, as you'll see below,
+to be read correctly.  In our example, as you'll see below,
  the strings start at address @t{0xffed}.  One word below that would be
  at @t{0xffe9}, so we could in theory put the next word on the stack
  there.  However, since the stack pointer should always be
@@ -939,3 +1030,9 @@ In this example, the caller's stack pointer would be at
  The 80@var{x}86 convention for function return values is to place them
  in the @samp{EAX} register.  System calls that return a value can do
  so by modifying the @samp{eax} member of @struct{intr_frame}.
+
+You should try to avoid writing large amounts of repetitive code for
+implementing system calls.  Each system call argument, whether an
+integer or a pointer, takes up 4 bytes on the stack.  You should be able
+to take advantage of this to avoid writing much near-identical code for
+retrieving each system call's arguments from the stack.