From: Ben Pfaff Date: Mon, 20 Jun 2005 20:24:19 +0000 (+0000) Subject: Add grading system. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a705e9ae16e14e24d313916a0c5402e175f8cce5;p=pintos-anon Add grading system. --- diff --git a/src/filesys/Make.vars b/src/filesys/Make.vars index 5a010be..7fec172 100644 --- a/src/filesys/Make.vars +++ b/src/filesys/Make.vars @@ -1,11 +1,12 @@ # -*- makefile -*- os.dsk: DEFINES = -DUSERPROG -DFILESYS - KERNEL_SUBDIRS = threads devices lib lib/kernel userprog filesys TEST_SUBDIRS = tests/userprog tests/filesys/base tests/filesys/extended +GRADING_FILE = $(SRCDIR)/tests/filesys/Grading.no-vm # Uncomment the lines below to enable VM. #os.dsk: DEFINES += -DVM #KERNEL_SUBDIRS += vm #TEST_SUBDIRS += tests/vm +#GRADING_FILE = $(SRCDIR)/tests/filesys/Grading.with-vm diff --git a/src/tests/Make.tests b/src/tests/Make.tests index b7c1432..b60243f 100644 --- a/src/tests/Make.tests +++ b/src/tests/Make.tests @@ -18,36 +18,29 @@ RESULTS = $(addsuffix .result,$(TESTS)) clean:: rm -f $(OUTPUTS) $(ERRORS) $(RESULTS) -grade:: ../rubric.txt results - @pass=; \ - for d in $(TESTS); do \ - if echo PASS | cmp -s $$d.result -; then \ - pass="$$pass $$d"; \ - fi \ - done; \ - $(SRCDIR)/tests/make-grade $< $$pass +grade:: results + $(SRCDIR)/tests/make-grade $(SRCDIR) $< $(GRADING_FILE) | tee $@ check:: results - @f=0; \ - n=0; \ - echo "Test summary:"; \ - for d in $(TESTS); do \ + @cat $< + @COUNT="`egrep '^(pass|FAIL) ' $< | wc -l`"; \ + FAILURES="`egrep '^FAIL ' $< | wc -l`"; \ + if [ "$$FAILURES" = 0 ]; then \ + echo "All $$COUNT tests passed."; \ + else \ + echo "$$FAILURES of $$COUNT tests failed."; \ + exit 1; \ + fi + +results: $(RESULTS) + @for d in $(TESTS); do \ if echo PASS | cmp -s $$d.result -; then \ echo "pass $$d"; \ else \ echo "FAIL $$d"; \ - f=`expr $$f + 1`; \ fi; \ - n=`expr $$n + 1`; \ - done; \ - if [ $$f = 0 ]; then \ - echo "All $$n tests passed."; \ - else \ - echo "$$f of $$n tests failed."; \ - exit 1; \ - fi + done > $@ -results:: $(RESULTS) outputs:: $(OUTPUTS) $(foreach prog,$(PROGS),$(eval $(prog).output: $(prog))) diff --git a/src/tests/filesys/Grading.no-vm b/src/tests/filesys/Grading.no-vm new file mode 100644 index 0000000..c116fbd --- /dev/null +++ b/src/tests/filesys/Grading.no-vm @@ -0,0 +1,15 @@ +# Precentage of the testing point total designated for each set of +# tests. + +# This project is primarily about implementing the file system, but +# all the previous functionality should work too. It's not too easy +# to screw it up, thus the emphasis. + +# 75% for file system. +33.3% tests/filesys/extended/Rubric.functionality +16.7% tests/filesys/extended/Rubric.robustness +25% tests/filesys/base/Rubric + +# 25% for the rest. +12.5% tests/userprog/Rubric.functionality +12.5% tests/userprog/Rubric.robustness diff --git a/src/tests/filesys/Grading.with-vm b/src/tests/filesys/Grading.with-vm new file mode 100644 index 0000000..8a440e6 --- /dev/null +++ b/src/tests/filesys/Grading.with-vm @@ -0,0 +1,19 @@ +# Precentage of the testing point total designated for each set of +# tests. + +# This project is primarily about implementing the file system, but +# all the previous functionality should work too. It's not too easy +# to screw it up, thus the emphasis. + +# 75% for file system. +33.3% tests/filesys/extended/Rubric.functionality +16.7% tests/filesys/extended/Rubric.robustness +25% tests/filesys/base/Rubric + +# 25% for the rest. +12.5% tests/userprog/Rubric.functionality +12.5% tests/userprog/Rubric.robustness + +# Up to 5% bonus for working VM functionality. +2.5% tests/vm/Rubric.functionality +2.5% tests/vm/Rubric.robustness diff --git a/src/tests/filesys/base/Rubric b/src/tests/filesys/base/Rubric new file mode 100644 index 0000000..49a9d15 --- /dev/null +++ b/src/tests/filesys/base/Rubric @@ -0,0 +1,19 @@ +Functionality of base file system: +- Test basic support for small files. +1 sm-create +2 sm-full +2 sm-random +2 sm-seq-block +3 sm-seq-random + +- Test basic support for large files. +1 lg-create +2 lg-full +2 lg-random +2 lg-seq-block +3 lg-seq-random + +- Test synchronized multiprogram access to files. +4 syn-read +4 syn-write +2 syn-remove diff --git a/src/tests/filesys/extended/Rubric.functionality b/src/tests/filesys/extended/Rubric.functionality new file mode 100644 index 0000000..83e6e5f --- /dev/null +++ b/src/tests/filesys/extended/Rubric.functionality @@ -0,0 +1,33 @@ +Functionality of extended file system: + +- Test directory support. + +1 dir-mkdir +2 dir-mk-vine +3 dir-mk-tree + +1 dir-rmdir +2 dir-rm-vine +3 dir-rm-tree + +1 dir-lsdir + +- Test file growth. + +1 grow-create +1 grow-seq-sm +2 grow-seq-lg +2 grow-sparse +3 grow-two-files +1 grow-tell +1 grow-file-size + +- Test directory growth. + +1 grow-dir-lg +1 grow-root-sm +1 grow-root-lg + +- Test writing from multiple processes. + +3 syn-rw diff --git a/src/tests/filesys/extended/Rubric.robustness b/src/tests/filesys/extended/Rubric.robustness new file mode 100644 index 0000000..9ac7ae0 --- /dev/null +++ b/src/tests/filesys/extended/Rubric.robustness @@ -0,0 +1,13 @@ +Robustness of file system: + +1 dir-empty-name +1 dir-open +1 dir-over-file +1 dir-under-file + +1 dir-rm-cd +2 dir-rm-cwd-cd +2 dir-rm-parent +1 dir-rm-root + +1 grow-too-big diff --git a/src/tests/make-grade b/src/tests/make-grade index adb0fdd..20e0cd9 100755 --- a/src/tests/make-grade +++ b/src/tests/make-grade @@ -3,51 +3,141 @@ use strict; use warnings; -my (@rubric) = read_text_file (shift); -my (@pass) = @ARGV; - -my (@grade); - -our ($possible_overall, $score_overall) = (0, 0); -our ($possible, $score) = (0, 0); -for my $i (0...$#rubric) { - local ($_) = $rubric[$i]; - if (/^\S/ || /^\s*$/) { - end_section (); - push (@grade, $_); - } elsif (my ($value, $name, $desc) = /^\s+(\d+)\s+(\S+):\s+(.*)$/) { - $possible += $value; - my ($marker); - if (grep ($_ eq $name, @pass)) { - $score += $value; - $marker = ' '; - } else { - $marker = '-'; - } - push (@grade, " $marker$value $name: $desc"); - } else { - die; +@ARGV == 3 || die; +my ($src_dir, $results_file, $grading_file) = @ARGV; + +# Read pass/file verdicts from $results_file. +open (RESULTS, '<', $results_file) || die "$results_file: open: $!\n"; +my (%verdicts, %verdict_counts); +while () { + my ($verdict, $test) = /^(pass|FAIL) (.*)$/ or die; + $verdicts{$test} = $verdict eq 'pass'; +} +close RESULTS; + +my (@failures); +my (@overall, @rubrics, @summary); +my ($pct_actual, $pct_possible) = (0, 0); + +# Read grading file. +my (@items); +open (GRADING, '<', $grading_file) || die "$grading_file: open: $!\n"; +while () { + s/#.*//; + next if /^\s*$/; + my ($max_pct, $rubric_suffix) = /^\s*(\d+)%\t(.*)/ or die; + my ($dir) = $rubric_suffix =~ /^(.*)\//; + my ($rubric_file) = "$src_dir/$rubric_suffix"; + open (RUBRIC, '<', $rubric_file) or die "$rubric_file: open: $!\n"; + + # Rubric file must begin with title line. + my $title = ; + chomp $title; + $title =~ s/:$// or die; + $title .= " ($rubric_suffix):"; + push (@rubrics, $title); + + my ($score, $possible) = (0, 0); + my ($cnt, $passed) = (0, 0); + my ($was_score) = 0; + while () { + chomp; + push (@rubrics, "\t$_"), next if /^-/; + push (@rubrics, ""), next if /^\s*$/; + my ($poss, $name) = /^(\d+)\t(.*)$/ or die; + my ($test) = "$dir/$name"; + my ($points) = 0; + if (!defined $verdicts{$test}) { + push (@overall, "warning: $test not tested, assuming failure"); + } elsif ($verdicts{$test}) { + $points = $poss; + $passed++; + } + push (@failures, $test) if !$points; + $verdict_counts{$test}++; + push (@rubrics, sprintf ("\t%4s%2d/%2d %s", + $points ? '' : '**', $points, $poss, $test)); + $score += $points; + $possible += $poss; + $cnt++; } + close (RUBRIC); + + push (@rubrics, ""); + push (@rubrics, "\t- Section summary."); + push (@rubrics, sprintf ("\t%4s%3d/%3d %s", + '', $passed, $cnt, 'tests passed')); + push (@rubrics, sprintf ("\t%4s%3d/%3d %s", + '', $score, $possible, 'points subtotal')); + push (@rubrics, ''); + + my ($pct) = ($score / $possible) * $max_pct; + push (@summary, sprintf ("%-40s %3d/%3d %5.1f%%/%5.1f%%", + $rubric_suffix, + $score, $possible, + $pct, $max_pct)); + $pct_actual += $pct; + $pct_possible += $max_pct; } -end_section (); +close GRADING; -push (@grade, "", "TESTING TOTAL: $score_overall of $possible_overall points"); +my ($sum_line) + = "---------------------------------------- --- --- ------ ------"; +unshift (@summary, + "SUMMARY BY TEST SET", + '', + sprintf ("%-40s %3s %3s %6s %6s", + "Test Set", "Pts", "Max", "% Ttl", "% Max"), + $sum_line); +push (@summary, + $sum_line, + sprintf ("%-40s %3s %3s %5.1f%%/%5.1f%%", + 'Total', '', '', $pct_actual, $pct_possible)); -print map ("$_\n", @grade); +unshift (@rubrics, + "SUMMARY OF INDIVIDUAL TESTS", + ''); -sub end_section { - return if !$possible; - push (@grade, "Subtotal: $score of $possible points"); - $possible_overall += $possible; - $score_overall += $score; - $possible = $score = 0; +foreach my $name (keys (%verdicts)) { + my ($count) = $verdict_counts{$name}; + if (!defined ($count) || $count != 1) { + if (!defined ($count) || !$count) { + push (@overall, "warning: test $name doesn't count for grading"); + } else { + push (@overall, + "warning: test $name counted $count times in grading"); + } + } } +push (@overall, sprintf ("TOTAL TESTING SCORE: %.1f%%", $pct_actual)); + +my (@divider) = ('', '- ' x 38, ''); -sub read_text_file { - my ($file_name) = @_; - open (FILE, '<', $file_name) or die "$file_name: open: $!\n"; - my (@content) = ; - chomp (@content); - close (FILE); - return @content; +print map ("$_\n", @overall, @divider, @summary, @divider, @rubrics); + +for my $test (@failures) { + open (RESULT, '<', "$test.result") or next; + print map ("$_\n", @divider); + print "DETAILS OF $test FAILURE:\n\n"; + my $first_line = ; + my ($cnt) = 0; + while () { + print; + $cnt++; + } + close (RESULT); + + if ($cnt == 0) { + open (OUTPUT, '<', "$test.output") or next; + my ($panics) = 0; + while () { + if (/PANIC/ && ++$panics > 2) { + print "[...details of additional panic(s) omitted...]\n"; + last; + } + print; + } + close (OUTPUT); + } } + diff --git a/src/tests/threads/Grading b/src/tests/threads/Grading new file mode 100644 index 0000000..88a4923 --- /dev/null +++ b/src/tests/threads/Grading @@ -0,0 +1,6 @@ +# Percentage of the testing point total designated for each set of +# tests. + +33.3% tests/threads/Rubric.alarm +33.3% tests/threads/Rubric.priority +33.4% tests/threads/Rubric.mlfqs diff --git a/src/tests/threads/Rubric.alarm b/src/tests/threads/Rubric.alarm new file mode 100644 index 0000000..a790833 --- /dev/null +++ b/src/tests/threads/Rubric.alarm @@ -0,0 +1,8 @@ +Functionality and robustness of alarm clock: + +5 alarm-single +5 alarm-multiple +5 alarm-priority + +1 alarm-zero +1 alarm-negative diff --git a/src/tests/threads/Rubric.mlfqs b/src/tests/threads/Rubric.mlfqs new file mode 100644 index 0000000..2a095a7 --- /dev/null +++ b/src/tests/threads/Rubric.mlfqs @@ -0,0 +1,13 @@ +Functionality of advanced scheduler: + +5 mlfqs-load-1 +5 mlfqs-load-60 +3 mlfqs-load-avg + +5 mlfqs-recent-1 + +5 mlfqs-fair-2 +3 mlfqs-fair-20 + +4 mlfqs-nice-2 +2 mlfqs-nice-10 diff --git a/src/tests/threads/Rubric.priority b/src/tests/threads/Rubric.priority new file mode 100644 index 0000000..4f32254 --- /dev/null +++ b/src/tests/threads/Rubric.priority @@ -0,0 +1,10 @@ +Functionality of priority scheduler: + +5 priority-preempt +5 priority-donate-one +5 priority-donate-multiple +5 priority-change + +3 priority-fifo +3 priority-sema +3 priority-condvar diff --git a/src/tests/userprog/Grading b/src/tests/userprog/Grading new file mode 100644 index 0000000..3746aae --- /dev/null +++ b/src/tests/userprog/Grading @@ -0,0 +1,11 @@ +# Percentage of the testing point total designated for each set of +# tests. + +# This project is primarily about implementing system calls. +# If you do so properly, the base file system functionality +# should come "for free". Thus, the points emphasis below. + +35% tests/userprog/Rubric.functionality +35% tests/userprog/Rubric.robustness + 5% tests/userprog/no-vm/Rubric +25% tests/filesys/base/Rubric diff --git a/src/tests/userprog/Rubric.functionality b/src/tests/userprog/Rubric.functionality new file mode 100644 index 0000000..d80d89f --- /dev/null +++ b/src/tests/userprog/Rubric.functionality @@ -0,0 +1,65 @@ +Functionality of system calls: + +- Test argument passing on Pintos command line. + +5 args-none +5 args-single +5 args-multiple +3 args-many +3 args-dbl-space + +- Test "create" system call. + +5 create-empty +5 create-long +5 create-normal +5 create-exists + +- Test "open" system call. + +5 open-missing +5 open-normal +5 open-twice + +- Test "read" system call. + +5 read-normal +5 read-zero + +- Test "write" system call. + +5 write-normal +5 write-zero + +- Test "close" system call. + +5 close-normal + +- Test "exec" system call. + +5 exec-once +5 exec-multiple +5 exec-arg + +- Test "wait" system call. + +5 wait-simple +5 wait-twice + +- Test "exit" system call. + +5 exit + +- Test "halt" system call. + +5 halt + +- Test recursive execution of user programs. + +15 multi-recurse + +- Test read-only executable feature. + +5 rox-simple +3 rox-child +3 rox-multichild diff --git a/src/tests/userprog/Rubric.robustness b/src/tests/userprog/Rubric.robustness new file mode 100644 index 0000000..6a2a51a --- /dev/null +++ b/src/tests/userprog/Rubric.robustness @@ -0,0 +1,48 @@ +Robustness of system calls: + +- Test robustness of file descriptor handling. + +2 close-stdin +2 close-stdout +2 close-bad-fd +2 close-twice +2 read-bad-fd +2 read-stdout +2 write-bad-fd +2 write-stdin +2 multi-child-fd + +- Test robustness of pointer handling. + +5 create-bad-ptr +5 exec-bad-ptr +5 open-bad-ptr +5 read-bad-ptr +5 write-bad-ptr + +- Test robustness of buffer copying across page boundaries. + +3 create-bound +3 open-boundary +3 read-boundary +3 write-boundary + +- Test handling of null pointer and empty strings. + +2 create-null +2 open-null +2 open-empty + +- Test robustness of system call implementation. + +5 sc-bad-arg +5 sc-bad-sp +5 sc-boundary +5 sc-boundary-2 + +- Test robustness of "exec" and "wait" system calls. + +5 exec-missing +5 wait-bad-pid +5 wait-killed + diff --git a/src/tests/userprog/no-vm/Rubric b/src/tests/userprog/no-vm/Rubric new file mode 100644 index 0000000..c3816c6 --- /dev/null +++ b/src/tests/userprog/no-vm/Rubric @@ -0,0 +1,3 @@ +Functionality of features that VM might break: + +1 multi-oom diff --git a/src/tests/vm/Grading b/src/tests/vm/Grading new file mode 100644 index 0000000..579b38e --- /dev/null +++ b/src/tests/vm/Grading @@ -0,0 +1,12 @@ +# Percentage of the testing point total designated for each set of +# tests. + +# This project is primarily about virtual memory, but all the previous +# functionality should work too, and it's easy to screw it up, thus +# the equal weight placed on each. + +25% tests/vm/Rubric.functionality +25% tests/vm/Rubric.robustness +12.5% tests/userprog/Rubric.functionality +12.5% tests/userprog/Rubric.robustness +25% tests/filesys/base diff --git a/src/tests/vm/Rubric.functionality b/src/tests/vm/Rubric.functionality new file mode 100644 index 0000000..4e8a0ee --- /dev/null +++ b/src/tests/vm/Rubric.functionality @@ -0,0 +1,31 @@ +Functionality of virtual memory subsystem: + +- Test page table. + +6 pt-grow-stack +6 pt-big-stk-obj +6 pt-grow-pusha + +- Test paging behavior. + +2 page-linear +3 page-parallel +3 page-shuffle +5 page-merge-seq +5 page-merge-par + +- Test "mmap" system call. + +2 mmap-read +2 mmap-write +2 mmap-shuffle + +2 mmap-twice + +2 mmap-unmap +1 mmap-exit + +3 mmap-clean + +2 mmap-close +2 mmap-remove diff --git a/src/tests/vm/Rubric.robustness b/src/tests/vm/Rubric.robustness new file mode 100644 index 0000000..52da99f --- /dev/null +++ b/src/tests/vm/Rubric.robustness @@ -0,0 +1,24 @@ +Robustness of virtual memory subsystem: + +- Test robustness of page table support. + +2 pt-bad-addr +3 pt-bad-read +2 pt-write-code +3 pt-write-code2 +4 pt-grow-bad + +- Test robustness of "mmap" system call. + +1 mmap-bad-fd +1 mmap-inherit +1 mmap-null +1 mmap-zero + +2 mmap-misalign + +2 mmap-over-code +2 mmap-over-data +2 mmap-over-stk +2 mmap-overlap + diff --git a/src/threads/Make.vars b/src/threads/Make.vars index 24e7762..1c90f59 100644 --- a/src/threads/Make.vars +++ b/src/threads/Make.vars @@ -3,3 +3,4 @@ os.dsk: DEFINES = KERNEL_SUBDIRS = threads devices lib lib/kernel $(TEST_SUBDIRS) TEST_SUBDIRS = tests/threads +GRADING_FILE = $(SRCDIR)/tests/threads/Grading diff --git a/src/userprog/Make.vars b/src/userprog/Make.vars index 5d09807..711b091 100644 --- a/src/userprog/Make.vars +++ b/src/userprog/Make.vars @@ -3,3 +3,4 @@ os.dsk: DEFINES = -DUSERPROG -DFILESYS KERNEL_SUBDIRS = threads devices lib lib/kernel userprog filesys TEST_SUBDIRS = tests/userprog tests/userprog/no-vm tests/filesys/base +GRADING_FILE = $(SRCDIR)/tests/userprog/Grading diff --git a/src/vm/Make.vars b/src/vm/Make.vars index 525f2b5..a65d137 100644 --- a/src/vm/Make.vars +++ b/src/vm/Make.vars @@ -3,3 +3,4 @@ os.dsk: DEFINES = -DUSERPROG -DFILESYS -DVM KERNEL_SUBDIRS = threads devices lib lib/kernel userprog filesys vm TEST_SUBDIRS = tests/userprog tests/vm tests/filesys/base +GRADING_FILE = $(SRCDIR)/tests/vm/Grading diff --git a/tests/Makefile b/tests/Makefile index be8b0d5..38751cd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -21,7 +21,7 @@ cd $@/src && $(MAKE) clean $(SUBMAKEFLAGS) endef define run-tests -cd $@/src/$(PROJECT) && make check +cd $@/src/$(PROJECT) && make check && make grade endef define compile