From: Ben Pfaff Date: Fri, 22 Mar 2013 04:42:03 +0000 (-0700) Subject: FILE HANDLE: Add new ENDS subcommand to control new-lines in output. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=80788c833f111c2e9aef3e25bdb44d6a56423313;p=pspp FILE HANDLE: Add new ENDS subcommand to control new-lines in output. Requested by Ronald Crichton. --- diff --git a/NEWS b/NEWS index e0910fe6e6..7b20d4e9f7 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,5 @@ PSPP NEWS -- history of user-visible changes. -Copyright (C) 1996-2000, 2008-2012 Free Software Foundation, Inc. +Copyright (C) 1996-2000, 2008-2013 Free Software Foundation, Inc. See the end for copying conditions. Please send PSPP bug reports to bug-gnu-pspp@gnu.org. @@ -41,6 +41,8 @@ Changes from 0.6.2 to 0.7.9: contains an INCLUDE or INSERT command, substitutions are not applied to the included file. + - FILE HANDLE has a new ENDS subcommand to select CR or CRLF new-lines. + - HOST has been updated to use more modern syntax. - Most commands that work with data files now support a new diff --git a/doc/data-io.texi b/doc/data-io.texi index 88577a48e2..79deb4a3ac 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -573,6 +573,7 @@ For text files: FILE HANDLE @var{handle_name} /NAME='@var{file_name} [/MODE=CHARACTER] + [/ENDS=@{CR,CRLF@}] /TABWIDTH=@var{tab_width} [ENCODING='@var{encoding}'] @@ -619,9 +620,8 @@ The effect and syntax of @cmd{FILE HANDLE} depends on the selected MODE: @itemize @item -In CHARACTER mode, the default, the data file is read as a text file, -according to the local system's conventions, and each text line is -read as one record. +In CHARACTER mode, the default, the data file is read as a text file. +Each text line is read as one record. In CHARACTER mode only, tabs are expanded to spaces by input programs, except by @cmd{DATA LIST FREE} with explicitly specified delimiters. @@ -629,6 +629,11 @@ Each tab is 4 characters wide by default, but TABWIDTH (a @pspp{} extension) may be used to specify an alternate width. Use a TABWIDTH of 0 to suppress tab expansion. +By default, a file written in CHARACTER mode uses line feeds only at +ends of lines, which is customary on Unix-like system. Specify ENDS +as CR or CRLF to override the default. PSPP reads files using either +convention on any kind of system, regardless of ENDS. + @item In IMAGE mode, the data file is treated as a series of fixed-length binary records. LRECL should be used to specify the record length in diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c index 121a4909c4..78848c2019 100644 --- a/src/data/file-handle-def.c +++ b/src/data/file-handle-def.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -51,6 +51,7 @@ struct file_handle /* FH_REF_FILE only. */ char *file_name; /* File name as provided by user. */ enum fh_mode mode; /* File mode. */ + enum fh_line_ends line_ends; /* Line ends for text files. */ /* FH_REF_FILE and FH_REF_INLINE only. */ size_t record_width; /* Length of fixed-format records. */ @@ -236,6 +237,7 @@ fh_create_file (const char *id, const char *file_name, handle = create_handle (id, handle_name, FH_REF_FILE, properties->encoding); handle->file_name = xstrdup (file_name); handle->mode = properties->mode; + handle->line_ends = properties->line_ends; handle->record_width = properties->record_width; handle->tab_width = properties->tab_width; return handle; @@ -264,7 +266,7 @@ const struct fh_properties * fh_default_properties (void) { static const struct fh_properties default_properties - = {FH_MODE_TEXT, 1024, 4, (char *) "Auto"}; + = {FH_MODE_TEXT, FH_END_LF, 1024, 4, (char *) "Auto"}; return &default_properties; } @@ -314,6 +316,15 @@ fh_get_mode (const struct file_handle *handle) return handle->mode; } +/* Returns the line ends of HANDLE, which must be a handle associated with a + file. */ +enum fh_line_ends +fh_get_line_ends (const struct file_handle *handle) +{ + assert (handle->referent == FH_REF_FILE); + return handle->line_ends; +} + /* Returns the width of a logical record on HANDLE. */ size_t fh_get_record_width (const struct file_handle *handle) diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h index 53e1bbfb6d..bd1fed7c1a 100644 --- a/src/data/file-handle-def.h +++ b/src/data/file-handle-def.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -49,10 +49,22 @@ enum fh_access FH_ACC_WRITE /* Write to it. */ }; +/* How a line ends. + + This affects only writing FH_MODE_TEXT files. Writing in other modes does + not use line ends, and reading in FH_MODE_TEXT mode accepts all forms of + line ends. */ +enum fh_line_ends + { + FH_END_LF, /* Unix line ends (\n). */ + FH_END_CRLF /* MS-DOS line ends (\r\n). */ + }; + /* Properties of a file handle. */ struct fh_properties { enum fh_mode mode; /* File mode. */ + enum fh_line_ends line_ends; /* Line ends for text files. */ size_t record_width; /* Length of fixed-format records. */ size_t tab_width; /* Tab width, 0=do not expand tabs. */ const char *encoding; /* Charset for contents. */ @@ -87,6 +99,7 @@ const char *fh_get_encoding (const struct file_handle *); /* Properties of FH_REF_FILE file handles. */ const char *fh_get_file_name (const struct file_handle *); enum fh_mode fh_get_mode (const struct file_handle *) ; +enum fh_line_ends fh_get_line_ends (const struct file_handle *); /* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */ size_t fh_get_record_width (const struct file_handle *); diff --git a/src/language/data-io/data-writer.c b/src/language/data-io/data-writer.c index 5270db0e81..5f87d0060a 100644 --- a/src/language/data-io/data-writer.c +++ b/src/language/data-io/data-writer.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-2004, 2006, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -49,8 +49,10 @@ struct dfm_writer FILE *file; /* Associated file. */ struct replace_file *rf; /* Atomic file replacement support. */ char *encoding; /* Encoding. */ + enum fh_line_ends line_ends; /* Line ends for text files. */ int unit; /* Unit width, in bytes. */ + char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */ char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */ char spaces[32]; /* 32 bytes worth of ' ' in encoding. */ }; @@ -93,7 +95,9 @@ dfm_open_writer (struct file_handle *fh, const char *encoding) w->rf = replace_file_start (fh_get_file_name (w->fh), "wb", 0666, &w->file, NULL); w->encoding = xstrdup (encoding); + w->line_ends = fh_get_line_ends (fh); w->unit = ei.unit; + memcpy (w->cr, ei.cr, sizeof w->cr); memcpy (w->lf, ei.lf, sizeof w->lf); for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit) memcpy (&w->spaces[ofs], ei.space, ei.unit); @@ -134,6 +138,8 @@ dfm_put_record (struct dfm_writer *w, const char *rec, size_t len) { case FH_MODE_TEXT: fwrite (rec, len, 1, w->file); + if (w->line_ends == FH_END_CRLF) + fwrite (w->cr, w->unit, 1, w->file); fwrite (w->lf, w->unit, 1, w->file); break; diff --git a/src/language/data-io/file-handle.q b/src/language/data-io/file-handle.q index 26dfc97e0c..313adc952b 100644 --- a/src/language/data-io/file-handle.q +++ b/src/language/data-io/file-handle.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ lrecl=integer; tabwidth=integer; mode=mode:!character/binary/image/360; + ends=ends:lf/crlf; recform=recform:fixed/f/variable/v/spanned/vs; encoding=string. */ @@ -104,6 +105,10 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds) else msg (SE, _("%s must not be negative."), "TABWIDTH"); } + if (cmd.ends == FH_LF) + properties.line_ends = FH_END_LF; + else if (cmd.ends == FH_CRLF) + properties.line_ends = FH_END_CRLF; break; case FH_IMAGE: properties.mode = FH_MODE_FIXED; diff --git a/tests/language/data-io/print.at b/tests/language/data-io/print.at index 71259e0c68..04701f6ac7 100644 --- a/tests/language/data-io/print.at +++ b/tests/language/data-io/print.at @@ -302,3 +302,36 @@ AT_CHECK([cat foo2.out], [0], [dnl ls -l foo.out foo2.out AT_CHECK([test -c foo.out]) AT_CLEANUP + +AT_SETUP([PRINT with special line ends]) +AT_DATA([print.sps], [dnl +FILE HANDLE lf /NAME='lf.txt' /ENDS=LF. +FILE HANDLE crlf /NAME='crlf.txt' /ENDS=CRLF. +DATA LIST NOTABLE /x 1. +BEGIN DATA. +1 +2 +3 +4 +5 +END DATA. +PRINT OUTFILE=lf/x. +PRINT OUTFILE=crlf/x. +EXECUTE. +]) +AT_CHECK([pspp -O format=csv print.sps]) +AT_CHECK([cat lf.txt], [0], [dnl + 1 @&t@ + 2 @&t@ + 3 @&t@ + 4 @&t@ + 5 @&t@ +]) +AT_CHECK([tr '\r' R < crlf.txt], [0], [dnl + 1 R + 2 R + 3 R + 4 R + 5 R +]) +AT_CLEANUP