FILE HANDLE: Add new ENDS subcommand to control new-lines in output.

[pspp] / doc / data-io.texi
diff --git a/doc/data-io.texi b/doc/data-io.texi

index 2fffbb71feb83f4fa79441a82bbaa0a3182d9bd1..79deb4a3ac6a79cea71707e3791b19af71442797 100644 (file)
--- a/doc/data-io.texi
+++ b/doc/data-io.texi
@@ -1,3 +1,8 @@
+@c (modify-syntax-entry ?_ "w")
+@c (modify-syntax-entry ?' "'")
+@c (modify-syntax-entry ?@ "'")
+
+
  @node Data Input and Output
  @chapter Data Input and Output
  @cindex input
@@ -71,7 +76,7 @@ END DATA.
  @section CLOSE FILE HANDLE
  
  @display
-CLOSE FILE HANDLE handle_name.
+CLOSE FILE HANDLE @var{handle_name}.
  @end display
  
  @cmd{CLOSE FILE HANDLE} disassociates the name of a file handle with a
@@ -91,10 +96,10 @@ DATA} and @cmd{END DATA}, cannot be closed.  Attempts to close it with
  
  @display
  DATAFILE ATTRIBUTE
-         ATTRIBUTE=name('value') [name('value')]@dots{}
-         ATTRIBUTE=name@b{[}index@b{]}('value') [name@b{[}index@b{]}('value')]@dots{}
-         DELETE=name [name]@dots{}
-         DELETE=name@b{[}index@b{]} [name@b{[}index@b{]}]@dots{}
+         ATTRIBUTE=@var{name}('@var{value}') [@var{name}('@var{value}')]@dots{}
+         ATTRIBUTE=@var{name}@b{[}@var{index}@b{]}('@var{value}') [@var{name}@b{[}@var{index}@b{]}('@var{value}')]@dots{}
+         DELETE=@var{name} [@var{name}]@dots{}
+         DELETE=@var{name}@b{[}@var{index}@b{]} [@var{name}@b{[}@var{index}@b{]}]@dots{}
  @end display
  
  @cmd{DATAFILE ATTRIBUTE} adds, modifies, or removes user-defined
@@ -137,11 +142,11 @@ by conditional and looping structures such as @cmd{DO IF} or
  @vindex DATASET
  
  @display
-DATASET NAME name [WINDOW=@{ASIS,FRONT@}].
-DATASET ACTIVATE name [WINDOW=@{ASIS,FRONT@}].
-DATASET COPY name [WINDOW=@{MINIMIZED,HIDDEN,FRONT@}].
-DATASET DECLARE name [WINDOW=@{MINIMIZED,HIDDEN,FRONT@}].
-DATASET CLOSE @{name,*,ALL@}.
+DATASET NAME @var{name} [WINDOW=@{ASIS,FRONT@}].
+DATASET ACTIVATE @var{name} [WINDOW=@{ASIS,FRONT@}].
+DATASET COPY @var{name} [WINDOW=@{MINIMIZED,HIDDEN,FRONT@}].
+DATASET DECLARE @var{name} [WINDOW=@{MINIMIZED,HIDDEN,FRONT@}].
+DATASET CLOSE @{@var{name},*,ALL@}.
  DATASET DISPLAY.
  @end display
  
@@ -252,15 +257,15 @@ situations.
  @display
  DATA LIST [FIXED]
          @{TABLE,NOTABLE@}
-        [FILE='file-name' [ENCODING='encoding']]
-        [RECORDS=record_count]
-        [END=end_var]
-        [SKIP=record_count]
-        /[line_no] var_spec@dots{}
-
-where each var_spec takes one of the forms
-        var_list start-end [type_spec]
-        var_list (fortran_spec)
+        [FILE='@var{file_name}' [ENCODING='@var{encoding}']]
+        [RECORDS=@var{record_count}]
+        [END=@var{end_var}]
+        [SKIP=@var{record_count}]
+        /[line_no] @var{var_spec}@dots{}
+
+where each @var{var_spec} takes one of the forms
+        @var{var_list} @var{start}-@var{end} [@var{type_spec}]
+        @var{var_list} (@var{fortran_spec})
  @end display
  
  @cmd{DATA LIST FIXED} is used to read data files that have values at fixed
@@ -272,11 +277,13 @@ external file.  It may be used to specify a file name as a string or a
  file handle (@pxref{File Handles}).  If the @subcmd{FILE} subcommand is not used,
  then input is assumed to be specified within the command file using
  @cmd{BEGIN DATA}@dots{}@cmd{END DATA} (@pxref{BEGIN DATA}).
-The @subcmd{ENCODING} subcommand may only be used if the @subcmd{FILE} subcommand is also used.
-It specifies the character encoding of the file.
+The @subcmd{ENCODING} subcommand may only be used if the @subcmd{FILE}
+subcommand is also used.  It specifies the character encoding of the
+file.  @xref{INSERT}, for information on supported encodings.
  
  The optional @subcmd{RECORDS} subcommand, which takes a single integer as an
-argument, is used to specify the number of lines per record.  If RECORDS
+argument, is used to specify the number of lines per record.
+If @subcmd{RECORDS}
  is not specified, then the number of lines per record is calculated from
  the list of variable specifications later in @cmd{DATA LIST}.
  
@@ -288,8 +295,8 @@ the beginning of an input file.  It can be used to skip over a row
  that contains variable names, for example.
  
  @cmd{DATA LIST} can optionally output a table describing how the data file
-will be read.  The @subcmd{TABLE} subcommand enables this output, and @subcmd{NOTABLE}
-disables it.  The default is to output the table.
+will be read.  The @subcmd{TABLE} subcommand enables this output, and
+@subcmd{NOTABLE} disables it.  The default is to output the table.
  
  The list of variables to be read from the data list must come last.
  Each line in the data record is introduced by a slash (@samp{/}).
@@ -298,7 +305,7 @@ of variable specifications may be present.
  
  Each variable specification consists of a list of variable names
  followed by a description of their location on the input line.  Sets of
-variables may be specified using the @code{DATA LIST} TO convention
+variables may be specified using the @cmd{DATA LIST} @subcmd{TO} convention
  (@pxref{Sets of
  Variables}).  There are two ways to specify the location of the variable
  on the line: columnar style and FORTRAN style.
@@ -465,15 +472,15 @@ This example shows keywords abbreviated to their first 3 letters.
  
  @display
  DATA LIST FREE
-        [(@{TAB,'c'@}, @dots{})]
+        [(@{TAB,'@var{c}'@}, @dots{})]
          [@{NOTABLE,TABLE@}]
-        [FILE='file-name' [ENCODING='encoding']]
-        [SKIP=record_cnt]
-        /var_spec@dots{}
+        [FILE='@var{file_name}' [ENCODING='@var{encoding}']]
+        [SKIP=@var{record_cnt}]
+        /@var{var_spec}@dots{}
  
-where each var_spec takes one of the forms
-        var_list [(type_spec)]
-        var_list *
+where each @var{var_spec} takes one of the forms
+        @var{var_list} [(@var{type_spec})]
+        @var{var_list} *
  @end display
  
  In free format, the input data is, by default, structured as a series
@@ -497,7 +504,8 @@ of quoting is allowed.
  The @subcmd{NOTABLE} and @subcmd{TABLE} subcommands are as in @cmd{DATA LIST FIXED} above.
  @subcmd{NOTABLE} is the default.
  
-The @subcmd{FILE} and @subcmd{SKIP} subcommands are as in @cmd{DATA LIST FIXED} above.
+The @subcmd{FILE}, @subcmd{SKIP}, and @subcmd{ENCODING} subcommands
+are as in @cmd{DATA LIST FIXED} above.
  
  The variables to be parsed are given as a single list of variable names.
  This list must be introduced by a single slash (@samp{/}).  The set of
@@ -517,15 +525,15 @@ on field width apply, but they are honored on output.
  
  @display
  DATA LIST LIST
-        [(@{TAB,'c'@}, @dots{})]
+        [(@{TAB,'@var{c}'@}, @dots{})]
          [@{NOTABLE,TABLE@}]
-        [FILE='file-name' [ENCODING='encoding']]
-        [SKIP=record_count]
-        /var_spec@dots{}
+        [FILE='@var{file_name}' [ENCODING='@var{encoding}']]
+        [SKIP=@var{record_count}]
+        /@var{var_spec}@dots{}
  
-where each var_spec takes one of the forms
-        var_list [(type_spec)]
-        var_list *
+where each @var{var_spec} takes one of the forms
+        @var{var_list} [(@var{type_spec})]
+        @var{var_list} *
  @end display
  
  With one exception, @cmd{DATA LIST LIST} is syntactically and
@@ -562,29 +570,34 @@ the current input program.  @xref{INPUT PROGRAM}.
  
  @display
  For text files:
-        FILE HANDLE handle_name
-                /NAME='file-name'
+        FILE HANDLE @var{handle_name}
+                /NAME='@var{file_name}
                  [/MODE=CHARACTER]
-                /TABWIDTH=tab_width
+                [/ENDS=@{CR,CRLF@}]
+                /TABWIDTH=@var{tab_width}
+                [ENCODING='@var{encoding}']
  
  For binary files in native encoding with fixed-length records:
-        FILE HANDLE handle_name
-                /NAME='file-name'
+        FILE HANDLE @var{handle_name}
+                /NAME='@var{file_name}'
                  /MODE=IMAGE
-                [/LRECL=rec_len]
+                [/LRECL=@var{rec_len}]
+                [ENCODING='@var{encoding}']
  
  For binary files in native encoding with variable-length records:
-        FILE HANDLE handle_name
-                /NAME='file-name'
+        FILE HANDLE @var{handle_name}
+                /NAME='@var{file_name}'
                  /MODE=BINARY
-                [/LRECL=rec_len]
+                [/LRECL=@var{rec_len}]
+                [ENCODING='@var{encoding}']
  
  For binary files encoded in EBCDIC:
-        FILE HANDLE handle_name
-                /NAME='file-name'
+        FILE HANDLE @var{handle_name}
+                /NAME='@var{file_name}'
                  /MODE=360
                  /RECFORM=@{FIXED,VARIABLE,SPANNED@}
-                [/LRECL=rec_len]
+                [/LRECL=@var{rec_len}]
+                [ENCODING='@var{encoding}']
  @end display
  
  Use @cmd{FILE HANDLE} to associate a file handle name with a file and
@@ -603,13 +616,12 @@ file handle name must not already have been used in a previous
  invocation of @cmd{FILE HANDLE}, unless it has been closed by an
  intervening command (@pxref{CLOSE FILE HANDLE}).
  
-The effect and syntax of FILE HANDLE depends on the selected MODE:
+The effect and syntax of @cmd{FILE HANDLE} depends on the selected MODE:
  
  @itemize
  @item
-In CHARACTER mode, the default, the data file is read as a text file,
-according to the local system's conventions, and each text line is
-read as one record.
+In CHARACTER mode, the default, the data file is read as a text file.
+Each text line is read as one record.
  
  In CHARACTER mode only, tabs are expanded to spaces by input programs,
  except by @cmd{DATA LIST FREE} with explicitly specified delimiters.
@@ -617,6 +629,11 @@ Each tab is 4 characters wide by default, but TABWIDTH (a @pspp{}
  extension) may be used to specify an alternate width.  Use a TABWIDTH
  of 0 to suppress tab expansion.
  
+By default, a file written in CHARACTER mode uses line feeds only at
+ends of lines, which is customary on Unix-like system.  Specify ENDS
+as CR or CRLF to override the default.  PSPP reads files using either
+convention on any kind of system, regardless of ENDS.
+
  @item
  In IMAGE mode, the data file is treated as a series of fixed-length
  binary records.  LRECL should be used to specify the record length in
@@ -720,6 +737,14 @@ The @subcmd{NAME} subcommand specifies the name of the file associated with the
  handle.  It is required in all modes but SCRATCH mode, in which its
  use is forbidden.
  
+The ENCODING subcommand specifies the encoding of text in the file.
+For reading text files in CHARACTER mode, all of the forms described
+for ENCODING on the INSERT command are supported (@pxref{INSERT}).
+For reading in other file-based modes, encoding autodetection is not
+supported; if the specified encoding requests autodetection then the
+default encoding will be used.  This is also true when a file handle
+is used for writing a file in any mode.
+
  @node INPUT PROGRAM
  @section INPUT PROGRAM
  @vindex INPUT PROGRAM
@@ -887,8 +912,8 @@ random variates between 0 and 10.
  
  @display
  LIST
-        /VARIABLES=var_list
-        /CASES=FROM start_index TO end_index BY incr_index
+        /VARIABLES=@var{var_list}
+        /CASES=FROM @var{start_index} TO @var{end_index} BY @var{incr_index}
          /FORMAT=@{UNNUMBERED,NUMBERED@} @{WRAP,SINGLE@}
  @end display
  
@@ -900,14 +925,14 @@ printed.  Keyword VARIABLES is optional.  If @subcmd{VARIABLES} subcommand is no
  specified then all variables in the active dataset are printed.
  
  The @subcmd{CASES} subcommand can be used to specify a subset of cases to be
-printed.  Specify FROM and the case number of the first case to print,
-TO and the case number of the last case to print, and BY and the number
+printed.  Specify @subcmd{FROM} and the case number of the first case to print,
+@subcmd{TO} and the case number of the last case to print, and @subcmd{BY} and the number
  of cases to advance between printing cases, or any subset of those
-settings.  If CASES is not specified then all cases are printed.
+settings.  If @subcmd{CASES} is not specified then all cases are printed.
  
-The @subcmd{FORMAT} subcommand can be used to change the output format.  NUMBERED
-will print case numbers along with each case; UNNUMBERED, the default,
-causes the case numbers to be omitted.  The WRAP and SINGLE settings are
+The @subcmd{FORMAT} subcommand can be used to change the output format.  @subcmd{NUMBERED}
+will print case numbers along with each case; @subcmd{UNNUMBERED}, the default,
+causes the case numbers to be omitted.  The @subcmd{WRAP} and @subcmd{SINGLE} settings are
  currently not used.
  
  Case numbers start from 1.  They are counted after all transformations
@@ -936,16 +961,17 @@ active dataset.
  
  @display
  PRINT 
-        OUTFILE='file-name'
-        RECORDS=n_lines
-        @{NOTABLE,TABLE@}
-        [/[line_no] arg@dots{}]
-
-arg takes one of the following forms:
-        'string' [start-end]
-        var_list start-end [type_spec]
-        var_list (fortran_spec)
-        var_list *
+        [OUTFILE='@var{file_name}']
+        [RECORDS=@var{n_lines}]
+        [@{NOTABLE,TABLE@}]
+        [ENCODING='@var{encoding}']
+        [/[@var{line_no}] @var{arg}@dots{}]
+
+@var{arg} takes one of the following forms:
+        '@var{string}' [@var{start}]
+        @var{var_list} @var{start}-@var{end} [@var{type_spec}]
+        @var{var_list} (@var{fortran_spec})
+        @var{var_list} *
  @end display
  
  The @cmd{PRINT} transformation writes variable data to the listing
@@ -954,15 +980,20 @@ causes the data to be read.  Follow @cmd{PRINT} by @cmd{EXECUTE} to
  print variable data without invoking a procedure (@pxref{EXECUTE}).
  
  All @cmd{PRINT} subcommands are optional.  If no strings or variables
-are specified, PRINT outputs a single blank line.
+are specified, @cmd{PRINT} outputs a single blank line.
  
  The @subcmd{OUTFILE} subcommand specifies the file to receive the output.  The
  file may be a file name as a string or a file handle (@pxref{File
-Handles}).  If OUTFILE is not present then output will be sent to
-@pspp{}'s output listing file.  When OUTFILE is present, a space is
+Handles}).  If @subcmd{OUTFILE} is not present then output will be sent to
+@pspp{}'s output listing file.  When @subcmd{OUTFILE} is present, a space is
  inserted at beginning of each output line, even lines that otherwise
  would be blank.
  
+The @subcmd{ENCODING} subcommand may only be used if the
+@subcmd{OUTFILE} subcommand is also used.  It specifies the character
+encoding of the file.  @xref{INSERT}, for information on supported
+encodings.
+
  The @subcmd{RECORDS} subcommand specifies the number of lines to be output.  The
  number of lines may optionally be surrounded by parentheses.
  
@@ -977,11 +1008,10 @@ line number, the next line number will be specified.  Multiple lines may
  be specified using multiple slashes with the intended output for a line
  following its respective slash.
  
-Literal strings may be printed.  Specify the string itself.  Optionally
-the string may be followed by a column number or range of column
-numbers, specifying the location on the line for the string to be
-printed.  Otherwise, the string will be printed at the current position
-on the line.
+Literal strings may be printed.  Specify the string itself.
+Optionally the string may be followed by a column number, specifying
+the column on the line where the string should start.  Otherwise, the
+string will be printed at the current position on the line.
  
  Variables to be printed can be specified in the same ways as available
  for @cmd{DATA LIST FIXED} (@pxref{DATA LIST FIXED}).  In addition, a
@@ -1002,16 +1032,16 @@ again extend the line to that length.
  
  @display
  PRINT EJECT 
-        OUTFILE='file-name'
-        RECORDS=n_lines
+        OUTFILE='@var{file_name}'
+        RECORDS=@var{n_lines}
          @{NOTABLE,TABLE@}
-        /[line_no] arg@dots{}
+        /[@var{line_no}] @var{arg}@dots{}
  
-arg takes one of the following forms:
-        'string' [start-end]
-        var_list start-end [type_spec]
-        var_list (fortran_spec)
-        var_list *
+@var{arg} takes one of the following forms:
+        '@var{string}' [@var{start}-@var{end}]
+        @var{var_list} @var{start}-@var{end} [@var{type_spec}]
+        @var{var_list} (@var{fortran_spec})
+        @var{var_list} *
  @end display
  
  @cmd{PRINT EJECT} advances to the beginning of a new output page in
@@ -1027,7 +1057,7 @@ With @subcmd{OUTFILE}, @cmd{PRINT EJECT} writes its output to the specified file
  The first line of output is written with @samp{1} inserted in the
  first column.  Commonly, this is the only line of output.  If
  additional lines of output are specified, these additional lines are
-written with a space inserted in the first column, as with PRINT.
+written with a space inserted in the first column, as with @subcmd{PRINT}.
  
  @xref{PRINT}, for more information on syntax and usage.
  
@@ -1036,7 +1066,7 @@ written with a space inserted in the first column, as with PRINT.
  @vindex PRINT SPACE
  
  @display
-PRINT SPACE OUTFILE='file-name' n_lines.
+PRINT SPACE [OUTFILE='file_name'] [ENCODING='@var{encoding}'] [n_lines].
  @end display
  
  @cmd{PRINT SPACE} prints one or more blank lines to an output file.
@@ -1046,6 +1076,10 @@ a file specified by file name as a string or file handle (@pxref{File
  Handles}).  If OUTFILE is not specified then output will be directed to
  the listing file.
  
+The @subcmd{ENCODING} subcommand may only be used if @subcmd{OUTFILE}
+is also used.  It specifies the character encoding of the file.
+@xref{INSERT}, for information on supported encodings.
+
  n_lines is also optional.  If present, it is an expression
  (@pxref{Expressions}) specifying the number of blank lines to be
  printed.  The expression must evaluate to a nonnegative value.
@@ -1055,7 +1089,7 @@ printed.  The expression must evaluate to a nonnegative value.
  @vindex REREAD
  
  @display
-REREAD FILE=handle COLUMN=column.
+REREAD [FILE=handle] [COLUMN=column] [ENCODING='@var{encoding}'].
  @end display
  
  The @cmd{REREAD} transformation allows the previous input line in a
@@ -1075,6 +1109,10 @@ re-reading.  Specify an expression (@pxref{Expressions}) evaluating to
  the first column that should be included in the re-read line.  Columns
  are numbered from 1 at the left margin.
  
+The @subcmd{ENCODING} subcommand may only be used if the @subcmd{FILE}
+subcommand is also used.  It specifies the character encoding of the
+file.   @xref{INSERT}, for information on supported encodings.
+
  Issuing @code{REREAD} multiple times will not back up in the data
  file.  Instead, it will re-read the same line multiple times.
  
@@ -1084,18 +1122,18 @@ file.  Instead, it will re-read the same line multiple times.
  
  @display
  REPEATING DATA
-        /STARTS=start-end
-        /OCCURS=n_occurs
-        /FILE='file-name'
-        /LENGTH=length
-        /CONTINUED[=cont_start-cont_end]
-        /ID=id_start-id_end=id_var
+        /STARTS=@var{start}-@var{end}
+        /OCCURS=@var{n_occurs}
+        /FILE='@var{file_name}'
+        /LENGTH=@var{length}
+        /CONTINUED[=@var{cont_start}-@var{cont_end}]
+        /ID=@var{id_start}-@var{id_end}=@var{id_var}
          /@{TABLE,NOTABLE@}
-        /DATA=var_spec@dots{}
+        /DATA=@var{var_spec}@dots{}
  
-where each var_spec takes one of the forms
-        var_list start-end [type_spec]
-        var_list (fortran_spec)
+where each @var{var_spec} takes one of the forms
+        @var{var_list} @var{start}-@var{end} [@var{type_spec}]
+        @var{var_list} (@var{fortran_spec})
  @end display
  
  @cmd{REPEATING DATA} parses groups of data repeating in
@@ -1166,16 +1204,16 @@ structure (@pxref{LOOP}).  Use @cmd{DATA LIST} before, not after,
  
  @display
  WRITE 
-        OUTFILE='file-name'
-        RECORDS=n_lines
+        OUTFILE='@var{file_name}'
+        RECORDS=@var{n_lines}
          @{NOTABLE,TABLE@}
-        /[line_no] arg@dots{}
+        /[@var{line_no}] @var{arg}@dots{}
  
-arg takes one of the following forms:
-        'string' [start-end]
-        var_list start-end [type_spec]
-        var_list (fortran_spec)
-        var_list *
+@var{arg} takes one of the following forms:
+        '@var{string}' [@var{start}-@var{end}]
+        @var{var_list} @var{start}-@var{end} [@var{type_spec}]
+        @var{var_list} (@var{fortran_spec})
+        @var{var_list} *
  @end display
  
  @code{WRITE} writes text or binary data to an output file.