From cbdfa35f7fb46948d1ee8aee7b7438cf1a5fd44c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 29 Oct 2006 00:01:52 +0000 Subject: [PATCH] Change some aspects of the PRINT, PRINT EJECT, and WRITE transformations to more closely match SPSS behavior: - PRINT puts spaces between variables, unless a format is specified explicitly. - WRITE doesn't put space between variables. - PRINT to an external file prefixes each line with a space. - PRINT EJECT to an external file indicates a formfeed by a "1" in the first column. - WRITE writes out spaces for system-missing values, not a period. - When no output is specified, an empty record is output. Also, update documentation and tests to match this behavior. --- doc/data-io.texi | 74 +++++++--- src/data/ChangeLog | 4 + src/data/format.c | 20 ++- src/data/format.h | 1 + src/language/data-io/ChangeLog | 24 ++++ src/language/data-io/data-writer.c | 58 +++++--- src/language/data-io/print.c | 99 +++++++++---- tests/command/print-strings.sh | 4 +- tests/command/print.sh | 218 +++++++++++++++-------------- tests/command/very-long-strings.sh | 6 +- 10 files changed, 326 insertions(+), 182 deletions(-) diff --git a/doc/data-io.texi b/doc/data-io.texi index 07e01e60..d7e18ae8 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -482,10 +482,11 @@ exception). By default, each tab is 4 characters wide, but an alternate width may be specified on TABWIDTH. A tab width of 0 suppresses tab expansion entirely. -In IMAGE mode, the data file is opened in ANSI C binary mode and records -are fixed in length. In IMAGE mode, LRECL specifies the record length in -bytes, with a default of 1024. Tab characters are never expanded to -spaces in binary mode. +In IMAGE mode, the data file is opened in ANSI C binary mode. Record +length is fixed, with output data truncated or padded with spaces to +the record length. LRECL specifies the record length in bytes, with a +default of 1024. Tab characters are never expanded to spaces in +binary mode. Records The NAME subcommand specifies the name of the file associated with the handle. It is required in CHARACTER and IMAGE modes. @@ -808,7 +809,7 @@ PRINT OUTFILE='file-name' RECORDS=n_lines @{NOTABLE,TABLE@} - /[line_no] arg@dots{} + [/[line_no] arg@dots{}] arg takes one of the following forms: 'string' [start-end] @@ -817,17 +818,20 @@ arg takes one of the following forms: var_list * @end display -The @cmd{PRINT} transformation writes variable data to an output file. -@cmd{PRINT} is executed when a procedure causes the data to be read. -Follow @cmd{PRINT} by @cmd{EXECUTE} to print variable data without -invoking a procedure (@pxref{EXECUTE}). +The @cmd{PRINT} transformation writes variable data to the listing +file or an output file. @cmd{PRINT} is executed when a procedure +causes the data to be read. Follow @cmd{PRINT} by @cmd{EXECUTE} to +print variable data without invoking a procedure (@pxref{EXECUTE}). -All @cmd{PRINT} subcommands are optional. +All @cmd{PRINT} subcommands are optional. If no strings or variables +are specified, PRINT outputs a single blank line. The OUTFILE subcommand specifies the file to receive the output. The file may be a file name as a string or a file handle (@pxref{File -Handles}). If OUTFILE is not present then output will be sent to PSPP's -output listing file. +Handles}). If OUTFILE is not present then output will be sent to +PSPP's output listing file. When OUTFILE is present, a space is +inserted at beginning of each output line, even lines that otherwise +would be blank. The RECORDS subcommand specifies the number of lines to be output. The number of lines may optionally be surrounded by parentheses. @@ -880,8 +884,20 @@ arg takes one of the following forms: var_list * @end display -@cmd{PRINT EJECT} writes data to an output file. Before the data is -written, the current page in the listing file is ejected. +@cmd{PRINT EJECT} advances to the beginning of a new output page in +the listing file or output file. It can also output data in the same +way as @cmd{PRINT}. + +All @cmd{PRINT EJECT} subcommands are optional. + +Without OUTFILE, PRINT EJECT ejects the current page in +the listing file, then it produces other output, if any is specified. + +With OUTFILE, PRINT EJECT writes its output to the specified file. +The first line of output is written with @samp{1} inserted in the +first column. Commonly, this is the only line of output. If +additional lines of output are specified, these additional lines are +written with a space inserted in the first column, as with PRINT. @xref{PRINT}, for more information on syntax and usage. @@ -1034,11 +1050,29 @@ arg takes one of the following forms: @code{WRITE} writes text or binary data to an output file. -@xref{PRINT}, for more information on syntax and usage. The main -difference between @code{PRINT} and @code{WRITE} is that @cmd{WRITE} -uses write formats by default, where PRINT uses print formats. +@xref{PRINT}, for more information on syntax and usage. @cmd{PRINT} +and @cmd{WRITE} differ in only a few ways: + +@itemize @bullet +@item +@cmd{WRITE} uses write formats by default, whereas @cmd{PRINT} uses +print formats. -The sole additional difference is that if @cmd{WRITE} is used to send output -to a binary file, carriage control characters will not be output. -@xref{FILE HANDLE}, for information on how to declare a file as binary. +@item +@cmd{PRINT} inserts a space between variables unless a format is +explicitly specified, but @cmd{WRITE} never inserts space between +variables in output. + +@item +@cmd{PRINT} inserts a space at the beginning of each line that it +writes to an output file (and @cmd{PRINT EJECT} inserts @samp{1} at +the beginning of each line that should begin a new page), but +@cmd{WRITE} does not. + +@item +@cmd{PRINT} outputs the system-missing value according to its +specified output format, whereas @cmd{WRITE} outputs the +system-missing value as a field filled with spaces. Binary formats +are an exception. +@end itemize @setfilename ignored diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 4e15a8de..be7cf441 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,7 @@ +Sat Oct 28 11:56:50 2006 Ben Pfaff + + * format.c (fmt_is_binary): New function. + Thu Oct 19 22:59:56 WST 2006 John Darrington * procedure.c procedure.h: Encapsulated the static data into a single diff --git a/src/data/format.c b/src/data/format.c index 022b5b18..63f536ee 100644 --- a/src/data/format.c +++ b/src/data/format.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -388,6 +388,24 @@ make_output_format (int type, int w, int d) return f; } +/* Returns true if TYPE is a binary format, + false otherwise. */ +bool +fmt_is_binary (int type) +{ + switch (type) + { + case FMT_P: + case FMT_PK: + case FMT_IB: + case FMT_PIB: + case FMT_RB: + return true; + + default: + return false; + } +} bool measure_is_valid(enum measure m) diff --git a/src/data/format.h b/src/data/format.h index b5e7130a..b7200555 100644 --- a/src/data/format.h +++ b/src/data/format.h @@ -124,5 +124,6 @@ char *fmt_to_string (const struct fmt_spec *); void num_to_string (double v, char *s, int w, int d); struct fmt_spec make_input_format (int type, int w, int d); struct fmt_spec make_output_format (int type, int w, int d); +bool fmt_is_binary (int type); #endif /* !format_h */ diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 24c932ab..bf87e21a 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,27 @@ +Sat Oct 28 11:57:19 2006 Ben Pfaff + + * data-writer.c (struct dfm_writer): Removed `bounce' member, and + all references to it. + (dfm_put_record) Change semantics so that it adds formatting + itself, such as new-line characters, instead of putting that + responsibility on the caller. Also, pad binary records with + spaces instead of zeros, for compatibility. + + * print.c (struct prt_out_spec) New member `sysmis_as_spaces'. + (struct print_trns) Remove `omit_new_lines' and all references, + since dfm_put_record() is taking care of that. Add + `include_prefix'. + (internal_cmd_print) Allow an empty set of data to print. Set + include_prefix. + (parse_specs) Allow an empty set of data to print. + (parse_variable_argument) Only add space with PRINT command. Set + sysmis_as_spaces. + (print_trns_proc) Indent records if include_prefix is set, for + compatibility. Output SYSMIS as spaces if sysmis_as_spaces is + set. Put "1" in first column if PRINT EJECT is used with an + external output file. + (flush_records) Ditto. + Sat Oct 28 16:19:57 WST 2006 John Darrington * data-reader.c: Eliminated references to extern variable getl_buf diff --git a/src/language/data-io/data-writer.c b/src/language/data-io/data-writer.c index 23220cd1..2c8f38a7 100644 --- a/src/language/data-io/data-writer.c +++ b/src/language/data-io/data-writer.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-2004 Free Software Foundation, Inc. + Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -18,16 +18,22 @@ 02110-1301, USA. */ #include + #include + #include #include #include + +#include +#include #include +#include #include -#include -#include #include +#include "minmax.h" + #include "gettext.h" #define _(msgid) gettext (msgid) @@ -36,7 +42,6 @@ struct dfm_writer { struct file_handle *fh; /* File handle. */ FILE *file; /* Associated file. */ - char *bounce; /* Bounce buffer for fixed-size fields. */ }; /* Opens a file handle for writing as a data file. */ @@ -55,7 +60,6 @@ dfm_open_writer (struct file_handle *fh) w = *aux = xmalloc (sizeof *w); w->fh = fh; w->file = fn_open (fh_get_file_name (w->fh), "wb"); - w->bounce = NULL; if (w->file == NULL) { @@ -79,9 +83,10 @@ dfm_write_error (const struct dfm_writer *writer) return ferror (writer->file); } -/* Writes record REC having length LEN to the file corresponding to - HANDLE. REC is not null-terminated. Returns true on success, - false on failure. */ +/* Writes record REC (which need not be null-terminated) having + length LEN to the file corresponding to HANDLE. Adds any + needed formatting, such as a trailing new-line. Returns true + on success, false on failure. */ bool dfm_put_record (struct dfm_writer *w, const char *rec, size_t len) { @@ -89,20 +94,34 @@ dfm_put_record (struct dfm_writer *w, const char *rec, size_t len) if (dfm_write_error (w)) return false; - - if (fh_get_mode (w->fh) == FH_MODE_BINARY - && len < fh_get_record_width (w->fh)) + + switch (fh_get_mode (w->fh)) { - size_t rec_width = fh_get_record_width (w->fh); - if (w->bounce == NULL) - w->bounce = xmalloc (rec_width); - memcpy (w->bounce, rec, len); - memset (&w->bounce[len], 0, rec_width - len); - rec = w->bounce; - len = rec_width; + case FH_MODE_TEXT: + fwrite (rec, len, 1, w->file); + putc ('\n', w->file); + break; + + case FH_MODE_BINARY: + { + size_t record_width = fh_get_record_width (w->fh); + size_t write_bytes = MIN (len, record_width); + size_t pad_bytes = record_width - write_bytes; + fwrite (rec, write_bytes, 1, w->file); + while (pad_bytes > 0) + { + static const char spaces[32] = " "; + size_t chunk = MIN (pad_bytes, sizeof spaces); + fwrite (spaces, chunk, 1, w->file); + pad_bytes -= chunk; + } + } + break; + + default: + NOT_REACHED (); } - fwrite (rec, len, 1, w->file); return !dfm_write_error (w); } @@ -130,7 +149,6 @@ dfm_close_writer (struct dfm_writer *w) if (!ok) msg (ME, _("I/O error occurred writing data file \"%s\"."), file_name); } - free (w->bounce); free (w); free (file_name); diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index c47189c9..3c36b98b 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,7 @@ struct prt_out_spec struct variable *var; /* Associated variable. */ struct fmt_spec format; /* Output spec. */ bool add_space; /* Add trailing space? */ + bool sysmis_as_spaces; /* Output SYSMIS as spaces? */ /* PRT_LITERAL only. */ struct string string; /* String to output. */ @@ -81,7 +83,7 @@ struct print_trns { struct pool *pool; /* Stores related data. */ bool eject; /* Eject page before printing? */ - bool omit_new_lines; /* Omit new-line characters? */ + bool include_prefix; /* Prefix lines with space? */ struct dfm_writer *writer; /* Output file, NULL=listing file. */ struct ll_list specs; /* List of struct prt_out_specs. */ size_t record_cnt; /* Number of records to write. */ @@ -147,7 +149,7 @@ internal_cmd_print (struct dataset *ds, tmp_pool = pool_create_subpool (trns->pool); /* Parse the command options. */ - while (token != '/') + while (token != '/' && token != '.') { if (lex_match_id ("OUTFILE")) { @@ -178,6 +180,10 @@ internal_cmd_print (struct dataset *ds, } } + /* When PRINT or PRINT EJECT writes to an external file, we + prefix each line with a space for compatibility. */ + trns->include_prefix = which_formats == PRINT && fh != NULL; + /* Parse variables and strings. */ if (!parse_specs (tmp_pool, trns, dataset_dict (ds), which_formats)) goto error; @@ -190,9 +196,6 @@ internal_cmd_print (struct dataset *ds, trns->writer = dfm_open_writer (fh); if (trns->writer == NULL) goto error; - - trns->omit_new_lines = (which_formats == WRITE - && fh_get_mode (fh) == FH_MODE_BINARY); } /* Output the variable table if requested. */ @@ -230,6 +233,12 @@ parse_specs (struct pool *tmp_pool, struct print_trns *trns, int record = 0; int column = 1; + if (token == '.') + { + trns->record_cnt = 1; + return true; + } + while (token != '.') { bool ok; @@ -325,7 +334,7 @@ parse_variable_argument (const struct dictionary *dict, struct variable *v = vars[i]; formats[i] = which_formats == PRINT ? v->print : v->write; } - add_space = true; + add_space = which_formats == PRINT; } var_idx = 0; @@ -346,6 +355,15 @@ parse_variable_argument (const struct dictionary *dict, spec->var = var; spec->format = *f; spec->add_space = add_space; + + /* This is a completely bizarre twist for compatibility: + WRITE outputs the system-missing value as a field + filled with spaces, instead of using the normal format + that usually contains a period. */ + spec->sysmis_as_spaces = (which_formats == WRITE + && var->type == NUMERIC + && !fmt_is_binary (spec->format.type)); + ll_push_tail (&trns->specs, &spec->ll); *column += f->w + add_space; @@ -415,62 +433,83 @@ dump_table (struct print_trns *trns, const struct file_handle *fh) /* Transformation. */ -static void flush_records (struct print_trns *, - int target_record, int *record); +static void flush_records (struct print_trns *, int target_record, + bool *eject, int *record); /* Performs the transformation inside print_trns T on case C. */ static int print_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED) { struct print_trns *trns = trns_; + bool eject = trns->eject; + int record = 1; struct prt_out_spec *spec; - int record; - - if (trns->eject) - som_eject_page (); - record = 1; ds_clear (&trns->line); + ds_put_char (&trns->line, ' '); ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) { - flush_records (trns, spec->record, &record); + flush_records (trns, spec->record, &eject, &record); - ds_set_length (&trns->line, spec->first_column - 1, ' '); + ds_set_length (&trns->line, spec->first_column, ' '); if (spec->type == PRT_VAR) { - data_out (ds_put_uninit (&trns->line, spec->format.w), - &spec->format, case_data (c, spec->var->fv)); + const union value *input = case_data (c, spec->var->fv); + char *output = ds_put_uninit (&trns->line, spec->format.w); + if (!spec->sysmis_as_spaces || input->f != SYSMIS) + data_out (output, &spec->format, input); + else + memset (output, ' ', spec->format.w); if (spec->add_space) ds_put_char (&trns->line, ' '); } else ds_put_substring (&trns->line, ds_ss (&spec->string)); } - flush_records (trns, trns->record_cnt + 1, &record); + flush_records (trns, trns->record_cnt + 1, &eject, &record); if (trns->writer != NULL && dfm_write_error (trns->writer)) return TRNS_ERROR; return TRNS_CONTINUE; } +/* Advance from *RECORD to TARGET_RECORD, outputting records + along the way. If *EJECT is true, then the first record + output is preceded by ejecting the page (and *EJECT is set + false). */ static void -flush_records (struct print_trns *trns, int target_record, int *record) +flush_records (struct print_trns *trns, int target_record, + bool *eject, int *record) { - while (target_record > *record) + for (; target_record > *record; (*record)++) { + char *line = ds_cstr (&trns->line); + size_t length = ds_length (&trns->line); + char leader = ' '; + + if (*eject) + { + *eject = false; + if (trns->writer == NULL) + som_eject_page (); + else + leader = '1'; + } + line[0] = leader; + if (trns->writer == NULL) - tab_output_text (TAB_FIX | TAT_NOWRAP, ds_cstr (&trns->line)); + tab_output_text (TAB_FIX | TAT_NOWRAP, &line[1]); else { - if (!trns->omit_new_lines) - ds_put_char (&trns->line, '\n'); - - dfm_put_record (trns->writer, - ds_data (&trns->line), ds_length (&trns->line)); + if (!trns->include_prefix) + { + line++; + length--; + } + dfm_put_record (trns->writer, line, length); } - ds_clear (&trns->line); - - (*record)++; + + ds_truncate (&trns->line, 1); } } diff --git a/tests/command/print-strings.sh b/tests/command/print-strings.sh index cc51b457..383d0e72 100755 --- a/tests/command/print-strings.sh +++ b/tests/command/print-strings.sh @@ -82,8 +82,8 @@ if [ $? -ne 0 ] ; then no_result ; fi diff $TEMPDIR/foobar - < $TEMPDIR/print.stat << foobar -title 'Test PRINT transformation'. - -data list free table file='$TEMPDIR/data-list.data'/A B C D. -print outfile="foo" table/A(f8.2) '/' B(e8.2) '/' C(n10) '/'. -print space a. -print outfile="foo" /a b c d. -list. - -data list list table file='$TEMPDIR/data-list.data'/A B C D. -print table/A B C D. -list. - +data list notable /x y 1-2. +begin data. +12 +34 + 6 +7 +90 +end data. + +print /x y. +print eject /x y 1-2. +print /x '-' y. +print. + +print outfile='print.out' /x y. +print eject outfile='print.out' /x y (f1,f1). +print outfile='print.out' /x '-' y. +print outfile='print.out'. + +write outfile='write.out' /x y. +write outfile='write.out' /x y (2(f1)). +write outfile='write.out' /x '-' y. +write outfile='write.out'. + +execute. foobar if [ $? -ne 0 ] ; then no_result ; fi activity="run program" $SUPERVISOR $PSPP --testing-mode -o raw-ascii --testing-mode $TEMPDIR/print.stat > $TEMPDIR/errs -# Note vv --- there are errors in input. Therefore, the command must FAIL -if [ $? -eq 0 ] ; then fail ; fi - -activity="compare error messages" -diff -w $TEMPDIR/errs - < $TEMPDIR/data < $TESTFILE <