From c13375ebe886fc3ed5ce70a260bb2b01cebb4b1f Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 3 Nov 2007 03:43:10 +0000 Subject: [PATCH] Added support for reading Gnumeric spreadsheet files. Thanks to Ben for revieing this code. --- configure.ac | 13 + doc/files.texi | 67 +++ po/de.po | 182 ++++++-- po/en_GB.po | 177 ++++++-- po/pspp.pot | 179 ++++++-- src/ChangeLog | 4 + src/data/automake.mk | 5 + src/data/gnumeric-reader.c | 701 +++++++++++++++++++++++++++++++ src/data/gnumeric-reader.h | 40 ++ src/language/data-io/ChangeLog | 7 + src/language/data-io/automake.mk | 2 + src/language/data-io/get-data.c | 159 +++++++ src/language/data-io/get-data.h | 26 ++ src/language/data-io/get.c | 7 + src/libpspp/ChangeLog | 5 + src/libpspp/i18n.c | 6 +- src/libpspp/i18n.h | 13 +- src/ui/terminal/automake.mk | 1 + tests/Book1.gnm.unzipped | 535 +++++++++++++++++++++++ tests/ChangeLog | 5 + tests/automake.mk | 2 + tests/command/get-data-gnm.sh | 236 +++++++++++ 22 files changed, 2256 insertions(+), 116 deletions(-) create mode 100644 src/data/gnumeric-reader.c create mode 100644 src/data/gnumeric-reader.h create mode 100644 src/language/data-io/get-data.c create mode 100644 src/language/data-io/get-data.h create mode 100644 tests/Book1.gnm.unzipped create mode 100755 tests/command/get-data-gnm.sh diff --git a/configure.ac b/configure.ac index f738c50b..4fa093d7 100644 --- a/configure.ac +++ b/configure.ac @@ -45,6 +45,19 @@ if test x"$with_gui" != x"no" ; then fi AM_CONDITIONAL(WITHGUI, test x"$with_gui" != x"no") + +dnl Checks needed for gnumeric reader +gnm_support=yes; +PKG_CHECK_MODULES(LIBXML2, libxml-2.0,, + [PSPP_OPTIONAL_PREREQ([libxml2]); gnm_support=no]); +AC_SEARCH_LIBS(gzopen,z,,[PSPP_OPTIONAL_PREREQ([zlib]); gnm_support=no;]) +AC_CHECK_HEADERS(zlib.h,,[PSPP_OPTIONAL_PREREQ([zlib]); gnm_support=no;]) + +if test x"$gnm_support" = x"yes" ; then + AC_DEFINE([GNM_SUPPORT], 1, + [Define to 1 if building in support for reading Gnumeric files.]) +fi + AC_ARG_WITH( gui_tools, [AS_HELP_STRING([--with-gui-tools], [build the gui developer tools])]) diff --git a/doc/files.texi b/doc/files.texi index 35bc7c98..0910d260 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -8,6 +8,7 @@ portable files. * APPLY DICTIONARY:: Apply system file dictionary to active file. * EXPORT:: Write to a portable file. * GET:: Read from a system file. +* GET DATA:: Read from foreign files. * IMPORT:: Read from a portable file. * MATCH FILES:: Merge system files. * SAVE:: Write to a system file. @@ -156,6 +157,72 @@ is read later, when a procedure is executed. Use of @cmd{GET} to read a portable file or scratch file is a PSPP extension. +@node GET DATA +@section GET DATA +@vindex GET DATA + +@display +GET DATA /TYPE=gnm + /FILE=@{'file-name'@} + + /SHEET=@{NAME 'sheet-name', INDEX n@} + /CELLRANGE=@{RANGE 'range', FULL@} + /READNAMES=@{ON, OFF@} + /ASSUMEDVARWIDTH=n. +@end display + +The @cmd{GET DATA} command is used to read files and other data sources +created by other applications. +When this command is executed, the current dictionary and active file are +replaced with variables and data read from the specified source. +The TYPE subcommand is mandatory and determines the type of the file or source to read. +Currently @samp{gnm} is the only supported type. + +@cindex Gnumeric +@cindex spreadsheet files +The @samp{gnm} type is used to read spreadsheet files created by +Gnumeric (@url{http://gnumeric.org}). +With this type, the FILE subcommand must be used, to specify the +spreadsheet file to read. +All other subcommands are optional. +The format of each variable is determined by the format of the spreadsheet +cell containing the first datum for the variable. +If this cell is of string (text) format, then the width of the variable is +determined from the length of the string it contains, unless the +ASSUMEDVARWIDTH subcommand is given. + +The SHEET subcommand specifies the sheet within the spreadsheet file to read. +There are two forms of the SHEET subcommand. +In the first form, +@samp{/SHEET=name @var{sheet-name}}, the string @var{sheet-name} is the +name of the sheet to read. +In the second form, @samp{/SHEET=index @var{idx}}, @var{idx} is a +integer which is the index of the sheet to read. +The first sheet has the index 1. +If the SHEET subcommand is omitted, then the command will read the +first sheet in the file. + +The CELLRANGE subcommand specifies the range of cells within the sheet to read. +If the subcommand is given as @samp{/CELLRANGE=FULL}, then the entire +sheet is read. +To read only part of a sheet, use the form +@samp{/CELLRANGE=range '@var{top-left-cell}:@var{bottom-right-cell}'}. +For example, the subcommand @samp{/CELLRANGE=range 'C3:P19'} reads +columns C--P, and rows 3--19 inclusive. +If no CELLRANGE subcommand is given, then the entire sheet is read. + +If @samp{/READNAMES=ON} is specified, then the contents of cells of +the first row are used as the names of the variables in which to store +the data from subsequent rows. +If the READNAMES command is omitted, or if @samp{/READNAMES=OFF} is +used, then the variables receive automatically assigned names. + +The ASSUMEDVARWIDTH subcommand specifies the maximum width of string +variables read from the file. +If omitted, the default value is determined from the length of the +string in the first spreadsheet cell for each variable. + + @node IMPORT @section IMPORT @vindex IMPORT diff --git a/po/de.po b/po/de.po index eedffd9e..6053433e 100644 --- a/po/de.po +++ b/po/de.po @@ -10,7 +10,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.4.3\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2007-10-17 20:48+0800\n" +"POT-Creation-Date: 2007-11-03 12:36+0900\n" "PO-Revision-Date: 2006-07-28 19:32+0800\n" "Last-Translator: John Darrington \n" "Language-Team: German \n" @@ -192,23 +192,23 @@ msgstr "Spalten" msgid "%s field) " msgstr "" -#: src/data/data-out.c:466 +#: src/data/data-out.c:465 #, c-format msgid "Weekday number %f is not between 1 and 7." msgstr "" -#: src/data/data-out.c:487 +#: src/data/data-out.c:486 #, c-format msgid "Month number %f is not between 1 and 12." msgstr "" -#: src/data/dictionary.c:747 +#: src/data/dictionary.c:758 msgid "" "At least one case in the data file had a weight value that was user-missing, " "system-missing, zero, or negative. These case(s) were ignored." msgstr "" -#: src/data/dictionary.c:1048 +#: src/data/dictionary.c:1059 #, c-format msgid "Truncating document line to %d bytes." msgstr "" @@ -1210,21 +1210,21 @@ msgstr "" msgid "Handle for %s not allowed here." msgstr "" -#: src/language/data-io/get.c:98 +#: src/language/data-io/get.c:105 msgid "expecting COMM or TAPE" msgstr "" -#: src/language/data-io/get.c:269 src/language/data-io/get.c:283 -#: src/language/data-io/get.c:308 +#: src/language/data-io/get.c:276 src/language/data-io/get.c:290 +#: src/language/data-io/get.c:315 #, c-format msgid "expecting %s or %s" msgstr "" -#: src/language/data-io/get.c:501 src/language/data-io/print.c:176 +#: src/language/data-io/get.c:508 src/language/data-io/print.c:176 msgid "expecting a valid subcommand" msgstr "" -#: src/language/data-io/get.c:534 +#: src/language/data-io/get.c:541 #, c-format msgid "" "Cannot rename %s as %s because there already exists a variable named %s. To " @@ -1232,11 +1232,11 @@ msgid "" "as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, \"/RENAME (A B C=B C A)\"." msgstr "" -#: src/language/data-io/get.c:560 +#: src/language/data-io/get.c:567 msgid "`=' expected after variable list." msgstr "" -#: src/language/data-io/get.c:567 +#: src/language/data-io/get.c:574 #, c-format msgid "" "Number of variables on left side of `=' (%zu) does not match number of " @@ -1244,55 +1244,55 @@ msgid "" "subcommand." msgstr "" -#: src/language/data-io/get.c:580 +#: src/language/data-io/get.c:587 #, c-format msgid "Requested renaming duplicates variable name %s." msgstr "" -#: src/language/data-io/get.c:610 +#: src/language/data-io/get.c:617 msgid "Cannot DROP all variables from dictionary." msgstr "" -#: src/language/data-io/get.c:783 +#: src/language/data-io/get.c:790 msgid "Cannot specify the active file since no active file has been defined." msgstr "" -#: src/language/data-io/get.c:790 +#: src/language/data-io/get.c:797 msgid "" "MATCH FILES may not be used after TEMPORARY when the active file is an input " "source. Temporary transformations will be made permanent." msgstr "" -#: src/language/data-io/get.c:824 +#: src/language/data-io/get.c:831 msgid "Multiple IN subcommands for a single FILE or TABLE." msgstr "" -#: src/language/data-io/get.c:868 +#: src/language/data-io/get.c:875 #, c-format msgid "File %s lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:871 +#: src/language/data-io/get.c:878 #, c-format msgid "Active file lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:941 +#: src/language/data-io/get.c:948 msgid "BY is required when TABLE is specified." msgstr "" -#: src/language/data-io/get.c:946 +#: src/language/data-io/get.c:953 msgid "BY is required when IN is specified." msgstr "" -#: src/language/data-io/get.c:1051 +#: src/language/data-io/get.c:1058 #, c-format msgid "" "Variable name %s specified on %s subcommand duplicates an existing variable " "name." msgstr "" -#: src/language/data-io/get.c:1297 +#: src/language/data-io/get.c:1304 #, c-format msgid "" "Variable %s in file %s (%s) has different type or width from the same " @@ -4412,6 +4412,7 @@ msgid "Jump to Variable" msgstr "Variableansicht" #: src/ui/gui/data-editor.c:653 src/ui/gui/data-editor.glade:438 +#: src/ui/gui/oneway.glade:179 msgid "_Descriptives" msgstr "" @@ -4972,46 +4973,50 @@ msgstr "Unpassend Wert für Variable" msgid "Incorrect range specification" msgstr "Falshe Spannweitebeschreibung" -#: src/ui/gui/oneway-anova-dialog.c:317 +#: src/ui/gui/oneway-anova-dialog.c:337 #, c-format msgid "Contrast %d of %d" msgstr "" -#: src/ui/gui/oneway.glade:34 src/ui/gui/x.glade:126 +#: src/ui/gui/oneway.glade:30 msgid "_Factor:" msgstr "" -#: src/ui/gui/oneway.glade:70 src/ui/gui/x.glade:82 +#: src/ui/gui/oneway.glade:66 #, fuzzy -msgid "_Dependent Variable(s):" +msgid "Dependent _Variable(s):" msgstr "Variableansicht" -#: src/ui/gui/oneway.glade:172 -msgid "_Options..." +#: src/ui/gui/oneway.glade:190 +msgid "_Homogeneity" msgstr "" -#: src/ui/gui/oneway.glade:183 +#: src/ui/gui/oneway.glade:207 +msgid "Statistics" +msgstr "" + +#: src/ui/gui/oneway.glade:226 msgid "_Contrasts..." msgstr "" -#: src/ui/gui/oneway.glade:254 +#: src/ui/gui/oneway.glade:309 msgid "gtk-go-back" msgstr "" -#: src/ui/gui/oneway.glade:265 +#: src/ui/gui/oneway.glade:320 msgid "gtk-go-forward" msgstr "" -#: src/ui/gui/oneway.glade:288 +#: src/ui/gui/oneway.glade:343 #, fuzzy msgid "_Coefficients:" msgstr "Spalten" -#: src/ui/gui/oneway.glade:334 +#: src/ui/gui/oneway.glade:389 msgid "Coefficient Total: " msgstr "" -#: src/ui/gui/oneway.glade:367 +#: src/ui/gui/oneway.glade:422 msgid "Contrast 1 of 1" msgstr "" @@ -5101,7 +5106,7 @@ msgstr "Variableansicht" msgid "Current Status: " msgstr "" -#: src/ui/gui/psppire.glade:265 +#: src/ui/gui/psppire.glade:265 src/ui/gui/rank.glade:67 #, fuzzy msgid "Variable(s):" msgstr "Variableansicht" @@ -5339,6 +5344,111 @@ msgstr "Spezial" msgid "%d" msgstr "" +#: src/ui/gui/rank.glade:111 +msgid "By:" +msgstr "" + +#: src/ui/gui/rank.glade:197 +msgid "_Smallest Value" +msgstr "" + +#: src/ui/gui/rank.glade:209 +#, fuzzy +msgid "_Largest Value" +msgstr "Variableansicht" + +#: src/ui/gui/rank.glade:228 +msgid "Assign rank 1 to:" +msgstr "" + +#: src/ui/gui/rank.glade:246 +msgid "_Display summary tables" +msgstr "" + +#: src/ui/gui/rank.glade:262 +#, fuzzy +msgid "Rank T_ypes" +msgstr "_Stecken" + +#: src/ui/gui/rank.glade:273 +msgid "_Ties..." +msgstr "" + +#: src/ui/gui/rank.glade:343 +msgid "Ntiles" +msgstr "" + +#: src/ui/gui/rank.glade:376 +msgid "Rank" +msgstr "" + +#: src/ui/gui/rank.glade:386 +#, fuzzy +msgid "Savage score" +msgstr "Speichern unter" + +#: src/ui/gui/rank.glade:400 +msgid "Fractional rank" +msgstr "" + +#: src/ui/gui/rank.glade:414 +msgid "Fractional rank as %" +msgstr "" + +#: src/ui/gui/rank.glade:428 +msgid "Sum of case weights" +msgstr "" + +#: src/ui/gui/rank.glade:450 +msgid "Proportion Estimates" +msgstr "" + +#: src/ui/gui/rank.glade:460 +msgid "Normal Scores" +msgstr "" + +#: src/ui/gui/rank.glade:495 +msgid "Blom" +msgstr "" + +#: src/ui/gui/rank.glade:506 +msgid "Tukey" +msgstr "" + +#: src/ui/gui/rank.glade:520 +msgid "Rankit" +msgstr "" + +#: src/ui/gui/rank.glade:534 +msgid "Van der Wärden" +msgstr "" + +#: src/ui/gui/rank.glade:551 +msgid "Proportion Estimation Formula" +msgstr "" + +#: src/ui/gui/rank.glade:615 +msgid "_Mean" +msgstr "" + +#: src/ui/gui/rank.glade:627 +#, fuzzy +msgid "_Low" +msgstr "_Tief:" + +#: src/ui/gui/rank.glade:643 +#, fuzzy +msgid "_High" +msgstr "_Hoch:" + +#: src/ui/gui/rank.glade:661 +msgid "_Sequential ranks to unique values" +msgstr "" + +#: src/ui/gui/rank.glade:681 +msgid "Rank Assigned to Ties" +msgstr "" + #: src/ui/gui/select-cases-dialog.c:85 #, c-format msgid "Approximately %3d%% of all cases." diff --git a/po/en_GB.po b/po/en_GB.po index 8e8a78a3..d92a7d83 100644 --- a/po/en_GB.po +++ b/po/en_GB.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.4.3\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2007-10-17 20:48+0800\n" +"POT-Creation-Date: 2007-11-03 12:36+0900\n" "PO-Revision-Date: 2007-09-15 08:29+0800\n" "Last-Translator: John Darrington \n" "Language-Team: John Darrington \n" @@ -189,23 +189,23 @@ msgstr "" msgid "%s field) " msgstr "" -#: src/data/data-out.c:466 +#: src/data/data-out.c:465 #, c-format msgid "Weekday number %f is not between 1 and 7." msgstr "" -#: src/data/data-out.c:487 +#: src/data/data-out.c:486 #, c-format msgid "Month number %f is not between 1 and 12." msgstr "" -#: src/data/dictionary.c:747 +#: src/data/dictionary.c:758 msgid "" "At least one case in the data file had a weight value that was user-missing, " "system-missing, zero, or negative. These case(s) were ignored." msgstr "" -#: src/data/dictionary.c:1048 +#: src/data/dictionary.c:1059 #, c-format msgid "Truncating document line to %d bytes." msgstr "" @@ -1206,21 +1206,21 @@ msgstr "" msgid "Handle for %s not allowed here." msgstr "" -#: src/language/data-io/get.c:98 +#: src/language/data-io/get.c:105 msgid "expecting COMM or TAPE" msgstr "" -#: src/language/data-io/get.c:269 src/language/data-io/get.c:283 -#: src/language/data-io/get.c:308 +#: src/language/data-io/get.c:276 src/language/data-io/get.c:290 +#: src/language/data-io/get.c:315 #, c-format msgid "expecting %s or %s" msgstr "" -#: src/language/data-io/get.c:501 src/language/data-io/print.c:176 +#: src/language/data-io/get.c:508 src/language/data-io/print.c:176 msgid "expecting a valid subcommand" msgstr "" -#: src/language/data-io/get.c:534 +#: src/language/data-io/get.c:541 #, c-format msgid "" "Cannot rename %s as %s because there already exists a variable named %s. To " @@ -1228,11 +1228,11 @@ msgid "" "as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, \"/RENAME (A B C=B C A)\"." msgstr "" -#: src/language/data-io/get.c:560 +#: src/language/data-io/get.c:567 msgid "`=' expected after variable list." msgstr "" -#: src/language/data-io/get.c:567 +#: src/language/data-io/get.c:574 #, c-format msgid "" "Number of variables on left side of `=' (%zu) does not match number of " @@ -1240,55 +1240,55 @@ msgid "" "subcommand." msgstr "" -#: src/language/data-io/get.c:580 +#: src/language/data-io/get.c:587 #, c-format msgid "Requested renaming duplicates variable name %s." msgstr "" -#: src/language/data-io/get.c:610 +#: src/language/data-io/get.c:617 msgid "Cannot DROP all variables from dictionary." msgstr "" -#: src/language/data-io/get.c:783 +#: src/language/data-io/get.c:790 msgid "Cannot specify the active file since no active file has been defined." msgstr "" -#: src/language/data-io/get.c:790 +#: src/language/data-io/get.c:797 msgid "" "MATCH FILES may not be used after TEMPORARY when the active file is an input " "source. Temporary transformations will be made permanent." msgstr "" -#: src/language/data-io/get.c:824 +#: src/language/data-io/get.c:831 msgid "Multiple IN subcommands for a single FILE or TABLE." msgstr "" -#: src/language/data-io/get.c:868 +#: src/language/data-io/get.c:875 #, c-format msgid "File %s lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:871 +#: src/language/data-io/get.c:878 #, c-format msgid "Active file lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:941 +#: src/language/data-io/get.c:948 msgid "BY is required when TABLE is specified." msgstr "" -#: src/language/data-io/get.c:946 +#: src/language/data-io/get.c:953 msgid "BY is required when IN is specified." msgstr "" -#: src/language/data-io/get.c:1051 +#: src/language/data-io/get.c:1058 #, c-format msgid "" "Variable name %s specified on %s subcommand duplicates an existing variable " "name." msgstr "" -#: src/language/data-io/get.c:1297 +#: src/language/data-io/get.c:1304 #, c-format msgid "" "Variable %s in file %s (%s) has different type or width from the same " @@ -4396,6 +4396,7 @@ msgid "Jump to Variable" msgstr "" #: src/ui/gui/data-editor.c:653 src/ui/gui/data-editor.glade:438 +#: src/ui/gui/oneway.glade:179 msgid "_Descriptives" msgstr "" @@ -4932,44 +4933,48 @@ msgstr "" msgid "Incorrect range specification" msgstr "" -#: src/ui/gui/oneway-anova-dialog.c:317 +#: src/ui/gui/oneway-anova-dialog.c:337 #, c-format msgid "Contrast %d of %d" msgstr "" -#: src/ui/gui/oneway.glade:34 src/ui/gui/x.glade:126 +#: src/ui/gui/oneway.glade:30 msgid "_Factor:" msgstr "" -#: src/ui/gui/oneway.glade:70 src/ui/gui/x.glade:82 -msgid "_Dependent Variable(s):" +#: src/ui/gui/oneway.glade:66 +msgid "Dependent _Variable(s):" msgstr "" -#: src/ui/gui/oneway.glade:172 -msgid "_Options..." +#: src/ui/gui/oneway.glade:190 +msgid "_Homogeneity" msgstr "" -#: src/ui/gui/oneway.glade:183 +#: src/ui/gui/oneway.glade:207 +msgid "Statistics" +msgstr "" + +#: src/ui/gui/oneway.glade:226 msgid "_Contrasts..." msgstr "" -#: src/ui/gui/oneway.glade:254 +#: src/ui/gui/oneway.glade:309 msgid "gtk-go-back" msgstr "" -#: src/ui/gui/oneway.glade:265 +#: src/ui/gui/oneway.glade:320 msgid "gtk-go-forward" msgstr "" -#: src/ui/gui/oneway.glade:288 +#: src/ui/gui/oneway.glade:343 msgid "_Coefficients:" msgstr "" -#: src/ui/gui/oneway.glade:334 +#: src/ui/gui/oneway.glade:389 msgid "Coefficient Total: " msgstr "" -#: src/ui/gui/oneway.glade:367 +#: src/ui/gui/oneway.glade:422 msgid "Contrast 1 of 1" msgstr "" @@ -5055,7 +5060,7 @@ msgstr "" msgid "Current Status: " msgstr "" -#: src/ui/gui/psppire.glade:265 +#: src/ui/gui/psppire.glade:265 src/ui/gui/rank.glade:67 msgid "Variable(s):" msgstr "" @@ -5279,6 +5284,106 @@ msgstr "" msgid "%d" msgstr "" +#: src/ui/gui/rank.glade:111 +msgid "By:" +msgstr "" + +#: src/ui/gui/rank.glade:197 +msgid "_Smallest Value" +msgstr "" + +#: src/ui/gui/rank.glade:209 +msgid "_Largest Value" +msgstr "" + +#: src/ui/gui/rank.glade:228 +msgid "Assign rank 1 to:" +msgstr "" + +#: src/ui/gui/rank.glade:246 +msgid "_Display summary tables" +msgstr "" + +#: src/ui/gui/rank.glade:262 +msgid "Rank T_ypes" +msgstr "" + +#: src/ui/gui/rank.glade:273 +msgid "_Ties..." +msgstr "" + +#: src/ui/gui/rank.glade:343 +msgid "Ntiles" +msgstr "" + +#: src/ui/gui/rank.glade:376 +msgid "Rank" +msgstr "" + +#: src/ui/gui/rank.glade:386 +msgid "Savage score" +msgstr "" + +#: src/ui/gui/rank.glade:400 +msgid "Fractional rank" +msgstr "" + +#: src/ui/gui/rank.glade:414 +msgid "Fractional rank as %" +msgstr "" + +#: src/ui/gui/rank.glade:428 +msgid "Sum of case weights" +msgstr "" + +#: src/ui/gui/rank.glade:450 +msgid "Proportion Estimates" +msgstr "" + +#: src/ui/gui/rank.glade:460 +msgid "Normal Scores" +msgstr "" + +#: src/ui/gui/rank.glade:495 +msgid "Blom" +msgstr "" + +#: src/ui/gui/rank.glade:506 +msgid "Tukey" +msgstr "" + +#: src/ui/gui/rank.glade:520 +msgid "Rankit" +msgstr "" + +#: src/ui/gui/rank.glade:534 +msgid "Van der Wärden" +msgstr "" + +#: src/ui/gui/rank.glade:551 +msgid "Proportion Estimation Formula" +msgstr "" + +#: src/ui/gui/rank.glade:615 +msgid "_Mean" +msgstr "" + +#: src/ui/gui/rank.glade:627 +msgid "_Low" +msgstr "" + +#: src/ui/gui/rank.glade:643 +msgid "_High" +msgstr "" + +#: src/ui/gui/rank.glade:661 +msgid "_Sequential ranks to unique values" +msgstr "" + +#: src/ui/gui/rank.glade:681 +msgid "Rank Assigned to Ties" +msgstr "" + #: src/ui/gui/select-cases-dialog.c:85 #, c-format msgid "Approximately %3d%% of all cases." diff --git a/po/pspp.pot b/po/pspp.pot index 0f2e7797..7c814653 100644 --- a/po/pspp.pot +++ b/po/pspp.pot @@ -8,12 +8,12 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2007-10-17 20:48+0800\n" +"POT-Creation-Date: 2007-11-03 12:36+0900\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=CHARSET\n" +"Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" @@ -191,23 +191,23 @@ msgstr "" msgid "%s field) " msgstr "" -#: src/data/data-out.c:466 +#: src/data/data-out.c:465 #, c-format msgid "Weekday number %f is not between 1 and 7." msgstr "" -#: src/data/data-out.c:487 +#: src/data/data-out.c:486 #, c-format msgid "Month number %f is not between 1 and 12." msgstr "" -#: src/data/dictionary.c:747 +#: src/data/dictionary.c:758 msgid "" "At least one case in the data file had a weight value that was user-missing, " "system-missing, zero, or negative. These case(s) were ignored." msgstr "" -#: src/data/dictionary.c:1048 +#: src/data/dictionary.c:1059 #, c-format msgid "Truncating document line to %d bytes." msgstr "" @@ -1208,21 +1208,21 @@ msgstr "" msgid "Handle for %s not allowed here." msgstr "" -#: src/language/data-io/get.c:98 +#: src/language/data-io/get.c:105 msgid "expecting COMM or TAPE" msgstr "" -#: src/language/data-io/get.c:269 src/language/data-io/get.c:283 -#: src/language/data-io/get.c:308 +#: src/language/data-io/get.c:276 src/language/data-io/get.c:290 +#: src/language/data-io/get.c:315 #, c-format msgid "expecting %s or %s" msgstr "" -#: src/language/data-io/get.c:501 src/language/data-io/print.c:176 +#: src/language/data-io/get.c:508 src/language/data-io/print.c:176 msgid "expecting a valid subcommand" msgstr "" -#: src/language/data-io/get.c:534 +#: src/language/data-io/get.c:541 #, c-format msgid "" "Cannot rename %s as %s because there already exists a variable named %s. To " @@ -1230,11 +1230,11 @@ msgid "" "as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, \"/RENAME (A B C=B C A)\"." msgstr "" -#: src/language/data-io/get.c:560 +#: src/language/data-io/get.c:567 msgid "`=' expected after variable list." msgstr "" -#: src/language/data-io/get.c:567 +#: src/language/data-io/get.c:574 #, c-format msgid "" "Number of variables on left side of `=' (%zu) does not match number of " @@ -1242,55 +1242,55 @@ msgid "" "subcommand." msgstr "" -#: src/language/data-io/get.c:580 +#: src/language/data-io/get.c:587 #, c-format msgid "Requested renaming duplicates variable name %s." msgstr "" -#: src/language/data-io/get.c:610 +#: src/language/data-io/get.c:617 msgid "Cannot DROP all variables from dictionary." msgstr "" -#: src/language/data-io/get.c:783 +#: src/language/data-io/get.c:790 msgid "Cannot specify the active file since no active file has been defined." msgstr "" -#: src/language/data-io/get.c:790 +#: src/language/data-io/get.c:797 msgid "" "MATCH FILES may not be used after TEMPORARY when the active file is an input " "source. Temporary transformations will be made permanent." msgstr "" -#: src/language/data-io/get.c:824 +#: src/language/data-io/get.c:831 msgid "Multiple IN subcommands for a single FILE or TABLE." msgstr "" -#: src/language/data-io/get.c:868 +#: src/language/data-io/get.c:875 #, c-format msgid "File %s lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:871 +#: src/language/data-io/get.c:878 #, c-format msgid "Active file lacks BY variable %s." msgstr "" -#: src/language/data-io/get.c:941 +#: src/language/data-io/get.c:948 msgid "BY is required when TABLE is specified." msgstr "" -#: src/language/data-io/get.c:946 +#: src/language/data-io/get.c:953 msgid "BY is required when IN is specified." msgstr "" -#: src/language/data-io/get.c:1051 +#: src/language/data-io/get.c:1058 #, c-format msgid "" "Variable name %s specified on %s subcommand duplicates an existing variable " "name." msgstr "" -#: src/language/data-io/get.c:1297 +#: src/language/data-io/get.c:1304 #, c-format msgid "" "Variable %s in file %s (%s) has different type or width from the same " @@ -4398,6 +4398,7 @@ msgid "Jump to Variable" msgstr "" #: src/ui/gui/data-editor.c:653 src/ui/gui/data-editor.glade:438 +#: src/ui/gui/oneway.glade:179 msgid "_Descriptives" msgstr "" @@ -4934,44 +4935,48 @@ msgstr "" msgid "Incorrect range specification" msgstr "" -#: src/ui/gui/oneway-anova-dialog.c:317 +#: src/ui/gui/oneway-anova-dialog.c:337 #, c-format msgid "Contrast %d of %d" msgstr "" -#: src/ui/gui/oneway.glade:34 src/ui/gui/x.glade:126 +#: src/ui/gui/oneway.glade:30 msgid "_Factor:" msgstr "" -#: src/ui/gui/oneway.glade:70 src/ui/gui/x.glade:82 -msgid "_Dependent Variable(s):" +#: src/ui/gui/oneway.glade:66 +msgid "Dependent _Variable(s):" msgstr "" -#: src/ui/gui/oneway.glade:172 -msgid "_Options..." +#: src/ui/gui/oneway.glade:190 +msgid "_Homogeneity" msgstr "" -#: src/ui/gui/oneway.glade:183 +#: src/ui/gui/oneway.glade:207 +msgid "Statistics" +msgstr "" + +#: src/ui/gui/oneway.glade:226 msgid "_Contrasts..." msgstr "" -#: src/ui/gui/oneway.glade:254 +#: src/ui/gui/oneway.glade:309 msgid "gtk-go-back" msgstr "" -#: src/ui/gui/oneway.glade:265 +#: src/ui/gui/oneway.glade:320 msgid "gtk-go-forward" msgstr "" -#: src/ui/gui/oneway.glade:288 +#: src/ui/gui/oneway.glade:343 msgid "_Coefficients:" msgstr "" -#: src/ui/gui/oneway.glade:334 +#: src/ui/gui/oneway.glade:389 msgid "Coefficient Total: " msgstr "" -#: src/ui/gui/oneway.glade:367 +#: src/ui/gui/oneway.glade:422 msgid "Contrast 1 of 1" msgstr "" @@ -5057,7 +5062,7 @@ msgstr "" msgid "Current Status: " msgstr "" -#: src/ui/gui/psppire.glade:265 +#: src/ui/gui/psppire.glade:265 src/ui/gui/rank.glade:67 msgid "Variable(s):" msgstr "" @@ -5281,6 +5286,106 @@ msgstr "" msgid "%d" msgstr "" +#: src/ui/gui/rank.glade:111 +msgid "By:" +msgstr "" + +#: src/ui/gui/rank.glade:197 +msgid "_Smallest Value" +msgstr "" + +#: src/ui/gui/rank.glade:209 +msgid "_Largest Value" +msgstr "" + +#: src/ui/gui/rank.glade:228 +msgid "Assign rank 1 to:" +msgstr "" + +#: src/ui/gui/rank.glade:246 +msgid "_Display summary tables" +msgstr "" + +#: src/ui/gui/rank.glade:262 +msgid "Rank T_ypes" +msgstr "" + +#: src/ui/gui/rank.glade:273 +msgid "_Ties..." +msgstr "" + +#: src/ui/gui/rank.glade:343 +msgid "Ntiles" +msgstr "" + +#: src/ui/gui/rank.glade:376 +msgid "Rank" +msgstr "" + +#: src/ui/gui/rank.glade:386 +msgid "Savage score" +msgstr "" + +#: src/ui/gui/rank.glade:400 +msgid "Fractional rank" +msgstr "" + +#: src/ui/gui/rank.glade:414 +msgid "Fractional rank as %" +msgstr "" + +#: src/ui/gui/rank.glade:428 +msgid "Sum of case weights" +msgstr "" + +#: src/ui/gui/rank.glade:450 +msgid "Proportion Estimates" +msgstr "" + +#: src/ui/gui/rank.glade:460 +msgid "Normal Scores" +msgstr "" + +#: src/ui/gui/rank.glade:495 +msgid "Blom" +msgstr "" + +#: src/ui/gui/rank.glade:506 +msgid "Tukey" +msgstr "" + +#: src/ui/gui/rank.glade:520 +msgid "Rankit" +msgstr "" + +#: src/ui/gui/rank.glade:534 +msgid "Van der Wärden" +msgstr "" + +#: src/ui/gui/rank.glade:551 +msgid "Proportion Estimation Formula" +msgstr "" + +#: src/ui/gui/rank.glade:615 +msgid "_Mean" +msgstr "" + +#: src/ui/gui/rank.glade:627 +msgid "_Low" +msgstr "" + +#: src/ui/gui/rank.glade:643 +msgid "_High" +msgstr "" + +#: src/ui/gui/rank.glade:661 +msgid "_Sequential ranks to unique values" +msgstr "" + +#: src/ui/gui/rank.glade:681 +msgid "Rank Assigned to Ties" +msgstr "" + #: src/ui/gui/select-cases-dialog.c:85 #, c-format msgid "Approximately %3d%% of all cases." diff --git a/src/ChangeLog b/src/ChangeLog index bb818d6d..bba2afaa 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2007-11-03 John Darrington + + * gnumeric-reader.c gnumeric-reader.h: New files. + Thu May 4 21:47:48 2006 Ben Pfaff Continue reforming procedure execution. In this phase, move diff --git a/src/data/automake.mk b/src/data/automake.mk index 3c73494c..112dc3b0 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -1,6 +1,9 @@ noinst_LIBRARIES += src/data/libdata.a +src_data_libdata_a_CPPFLAGS = $(LIBXML2_CFLAGS) $(AM_CPPFLAGS) + + src_data_libdata_a_SOURCES = \ src/data/any-reader.c \ src/data/any-reader.h \ @@ -48,6 +51,8 @@ src_data_libdata_a_SOURCES = \ src/data/format.c \ src/data/format.h \ src/data/format.def \ + src/data/gnumeric-reader.c \ + src/data/gnumeric-reader.h \ src/data/identifier.c \ src/data/identifier.h \ src/data/lazy-casereader.c \ diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c new file mode 100644 index 00000000..c36a6832 --- /dev/null +++ b/src/data/gnumeric-reader.c @@ -0,0 +1,701 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + + +#include + +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + + +#if !GNM_SUPPORT + +struct casereader * +gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) +{ + msg (ME, _("Support for Gnumeric files was not compiled into this installation of PSPP")); + + return NULL; +} + +#else + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +#include "gnumeric-reader.h" +#include +#include + + +static void gnm_file_casereader_destroy (struct casereader *, void *); + +static bool gnm_file_casereader_read (struct casereader *, void *, + struct ccase *); + +static struct casereader_class gnm_file_casereader_class = + { + gnm_file_casereader_read, + gnm_file_casereader_destroy, + NULL, + NULL, + }; + +/* Convert a string, which is an integer encoded in base26 + IE, A=0, B=1, ... Z=25 to the integer it represents. + ... except that in this scheme, digits with an exponent + greater than 1 are implicitly incremented by 1, so + AA = 0 + 1*26, AB = 1 + 1*26, + ABC = 2 + 2*26 + 1*26^2 .... +*/ +static int +pseudo_base26 (const char *str) +{ + int i; + int multiplier = 1; + int result = 0; + int len = strlen (str); + + for ( i = len - 1 ; i >= 0; --i) + { + int mantissa = (str[i] - 'A'); + + if ( mantissa < 0 || mantissa > 25 ) + return -1; + + if ( i != len - 1) + mantissa++; + + result += mantissa * multiplier; + + multiplier *= 26; + } + + return result; +} + + + +/* Convert a cell reference in the form "A1:B2", to + integers. A1 means column zero, row zero. + B1 means column 1 row 0. AA1 means column 26, row 0. +*/ +static bool +convert_cell_ref (const char *ref, + int *col0, int *row0, + int *coli, int *rowi) +{ + char startcol[5]; + char stopcol [5]; + + int startrow; + int stoprow; + + int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d", + startcol, &startrow, + stopcol, &stoprow); + if ( n != 4) + return false; + + str_uppercase (startcol); + *col0 = pseudo_base26 (startcol); + str_uppercase (stopcol); + *coli = pseudo_base26 (stopcol); + *row0 = startrow - 1; + *rowi = stoprow - 1 ; + + return true; +} + + +enum reader_state + { + STATE_INIT = 0, /* Initial state */ + STATE_SHEET_START, /* Found the start of a sheet */ + STATE_SHEET_NAME, /* Found the sheet name */ + STATE_MAXROW, + STATE_SHEET_FOUND, /* Found the sheet that we actually want */ + STATE_CELLS_START, /* Found the start of the cell array */ + STATE_CELL /* Found a cell */ + }; + + +struct gnumeric_reader +{ + xmlTextReaderPtr xtr; + + enum reader_state state; + int row; + int col; + int node_type; + int sheet_index; + + + const xmlChar *target_sheet; + int target_sheet_index; + + int start_row; + int start_col; + int stop_row; + int stop_col; + + + size_t value_cnt; + struct dictionary *dict; + struct ccase first_case; + bool used_first_case; +}; + +static void process_node (struct gnumeric_reader *r); + +#define _xml(X) (const xmlChar *)(X) + +#define _xmlchar_to_int(X) atoi((const char *)X) + +static void +gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) +{ + struct gnumeric_reader *r = r_; + if ( r == NULL) + return ; + + if ( r->xtr) + xmlFreeTextReader (r->xtr); + + if ( ! r->used_first_case ) + case_destroy (&r->first_case); + + free (r); +} + +static void +process_node (struct gnumeric_reader *r) +{ + xmlChar *name = xmlTextReaderName (r->xtr); + if (name == NULL) + name = xmlStrdup (_xml ("--")); + + + r->node_type = xmlTextReaderNodeType (r->xtr); + + switch ( r->state) + { + case STATE_INIT: + if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_START; + } + break; + case STATE_SHEET_START: + if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_NAME; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; + } + break; + case STATE_SHEET_NAME: + if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_START; + } + else if (XML_READER_TYPE_TEXT == r->node_type) + { + ++r->sheet_index; + if ( r->target_sheet != NULL) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + if ( 0 == xmlStrcmp (value, r->target_sheet)) + r->state = STATE_SHEET_FOUND; + free (value); + } + else if (r->target_sheet_index == r->sheet_index) + { + r->state = STATE_SHEET_FOUND; + } + } + break; + case STATE_SHEET_FOUND: + if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + if (! xmlTextReaderIsEmptyElement (r->xtr)) + r->state = STATE_CELLS_START; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_MAXROW; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; + } + break; + case STATE_MAXROW: + if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_FOUND; + } + case STATE_CELLS_START: + if (0 == xmlStrcasecmp (name, _xml ("gnm:Cell")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + xmlChar *attr = NULL; + r->state = STATE_CELL; + + attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Col")); + r->col = _xmlchar_to_int (attr); + free (attr); + + attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row")); + r->row = _xmlchar_to_int (attr); + free (attr); + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + r->state = STATE_SHEET_NAME; + + break; + case STATE_CELL: + if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + r->state = STATE_CELLS_START; + break; + default: + break; + }; + + xmlFree (name); +} + + + +/* + Change SUGGESTION until it's a valid name that can be added to DICT. +*/ +static void +devise_name (const struct dictionary *dict, struct string *name, int *x) +{ + struct string basename; + if ( ds_is_empty (name)) + ds_init_cstr (&basename, "var"); + else + ds_init_string (&basename, name); + do + { + ds_clear (name); + ds_put_format (name, "%s%d", ds_cstr (&basename), ++(*x)); + } + while (NULL != dict_lookup_var (dict, ds_cstr (name)) ); + + ds_destroy (&basename); +} + +/* + Mutate NAME of a variable, which is gauranteed to be valid for the + dictionary DICT. +*/ +static void +munge_name (const struct dictionary *dict, struct string *name) +{ + int x = 0; + + if (! ds_is_empty (name)) + { + /* Change all the invalid characters to valid ones */ + char *s; + + s = ds_data (name); + + if ( !lex_is_id1 (*s)) + *s = '@'; + + s++; + + while (s < ds_data (name) + ds_length (name)) + { + if ( !lex_is_idn (*s)) + *s = '_'; + s++; + } + + assert (var_is_valid_name (ds_cstr (name), false)); + } + + while (ds_is_empty (name) || NULL != dict_lookup_var (dict, ds_cstr (name)) ) + { + devise_name (dict, name, &x); + } +} + + +/* + Sets the VAR of case C, to the value corresponding to the xml string XV + */ +static void +convert_xml_string_to_value (struct ccase *c, const struct variable *var, + const xmlChar *xv) +{ + char *text; + int n_bytes = 0; + union value *v = case_data_rw (c, var); + + text = recode_string (CONV_UTF8_TO_PSPP, (const char *) xv, -1); + + if ( text) + n_bytes = MIN (var_get_width (var), strlen (text)); + + if ( var_is_alpha (var)) + { + memcpy (v->s, text, n_bytes); + } + else + { + char *endptr; + errno = 0; + v->f = strtod (text, &endptr); + if ( errno != 0 || endptr == text) + v->f = SYSMIS; + } + + free (text); +} + +struct var_spec +{ + char *name; + int width; + xmlChar *first_value; +}; + +struct casereader * +gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) +{ + int ret; + casenumber n_cases = CASENUMBER_MAX; + int i; + struct var_spec *var_spec = NULL; + int n_var_specs = 0; + + struct gnumeric_reader *r = NULL; + + gzFile gz = gzopen (gri->file_name, "r"); + + if ( NULL == gz) + { + msg (ME, _("Error opening \"%s\" for reading as a gnumeric file: %s."), + gri->file_name, strerror (errno)); + + goto error; + } + + r = xzalloc (sizeof *r); + + r->xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, gzclose, gz, + NULL, NULL, 0); + + if ( r->xtr == NULL) + goto error; + + if ( gri->cell_range ) + { + if ( ! convert_cell_ref (gri->cell_range, + &r->start_col, &r->start_row, + &r->stop_col, &r->stop_row)) + { + msg (SE, _("Invalid cell range \"%s\""), + gri->cell_range); + goto error; + } + } + else + { + r->start_col = 0; + r->start_row = 0; + r->stop_col = -1; + r->stop_row = -1; + } + + r->state = STATE_INIT; + r->target_sheet = BAD_CAST gri->sheet_name; + r->target_sheet_index = gri->sheet_index; + r->row = r->col = -1; + r->sheet_index = 0; + + /* Advance to the start of the cells for the target sheet */ + while ( (r->state != STATE_CELL || r->row < r->start_row ) + && 1 == (ret = xmlTextReaderRead (r->xtr))) + { + xmlChar *value ; + process_node (r); + value = xmlTextReaderValue (r->xtr); + + if ( r->state == STATE_MAXROW && r->node_type == XML_READER_TYPE_TEXT) + { + n_cases = 1 + _xmlchar_to_int (value) ; + } + free (value); + } + + + /* If a range has been given, then use that to calculate the number + of cases */ + if ( gri->cell_range) + { + n_cases = MIN (n_cases, r->stop_row - r->start_row + 1); + } + + if ( gri->read_names ) + { + r->start_row++; + n_cases --; + } + + /* Read in the first row of cells, + including the headers if read_names was set */ + while ( + (( r->state == STATE_CELLS_START && r->row <= r->start_row) || r->state == STATE_CELL ) + && (ret = xmlTextReaderRead (r->xtr)) + ) + { + int idx; + process_node (r); + + if ( r->row > r->start_row ) break; + + if ( r->col < r->start_col || + (r->stop_col != -1 && r->col > r->stop_col)) + continue; + + idx = r->col - r->start_col; + + if ( idx >= n_var_specs ) + { + n_var_specs = idx + 1 ; + var_spec = realloc (var_spec, sizeof (*var_spec) * n_var_specs); + var_spec [idx].name = NULL; + var_spec [idx].width = -1; + var_spec [idx].first_value = NULL; + } + + if ( r->node_type == XML_READER_TYPE_TEXT ) + { + char *text ; + xmlChar *value = xmlTextReaderValue (r->xtr); + + text = recode_string (CONV_UTF8_TO_PSPP, (const char *) value, -1); + + if ( r->row < r->start_row) + { + if ( gri->read_names ) + { + var_spec [idx].name = strdup (text); + } + } + else + { + var_spec [idx].first_value = xmlStrdup (value); + + if (-1 == var_spec [idx].width ) + var_spec [idx].width = (gri->asw == -1) ? + ROUND_UP (strlen(text), MAX_SHORT_STRING) : gri->asw; + } + + free (value); + free (text); + } + else if ( r->node_type == XML_READER_TYPE_ELEMENT + && r->state == STATE_CELL) + { + if ( r->row == r->start_row ) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->xtr, _xml ("ValueType")); + + if ( 60 != _xmlchar_to_int (attr)) + var_spec [idx].width = 0; + + free (attr); + } + } + } + + + /* Create the dictionary and populate it */ + *dict = r->dict = dict_create (); + + r->value_cnt = 0; + + for (i = 0 ; i < n_var_specs ; ++i ) + { + struct string name; + + /* Probably no data exists for this variable, so allocate a default width */ + if ( var_spec[i].width == -1 ) + var_spec[i].width = MAX_SHORT_STRING; + + r->value_cnt += value_cnt_from_width (var_spec[i].width); + + if (var_spec[i].name) + ds_init_cstr (&name, var_spec[i].name); + else + ds_init_empty (&name); + + munge_name (r->dict, &name); + + + dict_create_var (r->dict, ds_cstr (&name), var_spec[i].width); + + ds_destroy (&name); + } + + /* Create the first case, and cache it */ + r->used_first_case = false; + + if ( n_var_specs == 0 ) + { + msg (MW, _("Selected sheet or range of spreadsheet \"%s\" is empty."), + gri->file_name); + goto error; + } + + case_create (&r->first_case, r->value_cnt); + memset (case_data_rw_idx (&r->first_case, 0)->s, + ' ', MAX_SHORT_STRING * r->value_cnt); + + for ( i = 0 ; i < n_var_specs ; ++i ) + { + const struct variable *var = dict_get_var (r->dict, i); + + convert_xml_string_to_value (&r->first_case, var, + var_spec[i].first_value); + } + + for ( i = 0 ; i < n_var_specs ; ++i ) + { + free (var_spec[i].first_value); + free (var_spec[i].name); + } + + free (var_spec); + + return casereader_create_sequential + (NULL, + r->value_cnt, + n_cases, + &gnm_file_casereader_class, r); + + + error: + for ( i = 0 ; i < n_var_specs ; ++i ) + { + free (var_spec[i].first_value); + free (var_spec[i].name); + } + + free (var_spec); + + gnm_file_casereader_destroy (NULL, r); + + return NULL; +}; + + +/* Reads one case from READER's file into C. Returns true only + if successful. */ +static bool +gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_, + struct ccase *c) +{ + int ret = 0; + + struct gnumeric_reader *r = r_; + int current_row = r->row; + + if ( !r->used_first_case ) + { + *c = r->first_case; + r->used_first_case = true; + return true; + } + + case_create (c, r->value_cnt); + + memset (case_data_rw_idx (c, 0)->s, ' ', MAX_SHORT_STRING * r->value_cnt); + + while ((r->state == STATE_CELL || r->state == STATE_CELLS_START ) + && r->row == current_row && (ret = xmlTextReaderRead (r->xtr))) + { + process_node (r); + + if ( r->col < r->start_col || (r->stop_col != -1 && + r->col > r->stop_col)) + continue; + + if ( r->col - r->start_col >= r->value_cnt) + continue; + + if ( r->stop_row != -1 && r->row > r->stop_row) + break; + + if ( r->node_type == XML_READER_TYPE_TEXT ) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + + const int idx = r->col - r->start_col; + + const struct variable *var = dict_get_var (r->dict, idx); + + convert_xml_string_to_value (c, var, value); + + free (value); + } + + } + + return (ret == 1); +} + + +#endif /* GNM_SUPPORT */ diff --git a/src/data/gnumeric-reader.h b/src/data/gnumeric-reader.h new file mode 100644 index 00000000..6bb5a6b7 --- /dev/null +++ b/src/data/gnumeric-reader.h @@ -0,0 +1,40 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef GNUMERIC_READ_H +#define GNUMERIC_READ_H 1 + +#include + +struct casereader; + + +struct gnumeric_read_info +{ + char *sheet_name ; + char *file_name ; + char *cell_range ; + int sheet_index ; + bool read_names ; + int asw ; +}; + +struct dictionary; + +struct casereader * gnumeric_open_reader (struct gnumeric_read_info *, struct dictionary **); + + +#endif diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index ae8bb79f..7982916a 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,10 @@ +2007-11-03 John Darrington + + * get.c: Add GET DATA command variant. + + * get-data.c get-data.h (new files): Added support for + GET DATA /TYPE='gnm' command. + 2007-09-23 Ben Pfaff Bug #21111. Reviewed by John Darrington. diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index df51c631..dced683b 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -7,6 +7,8 @@ src_language_data_io_built_sources = \ language_data_io_sources = \ src/language/data-io/data-list.c \ src/language/data-io/get.c \ + src/language/data-io/get-data.c \ + src/language/data-io/get-data.h \ src/language/data-io/inpt-pgm.c \ src/language/data-io/inpt-pgm.h \ src/language/data-io/print.c \ diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c new file mode 100644 index 00000000..015815a9 --- /dev/null +++ b/src/language/data-io/get-data.c @@ -0,0 +1,159 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "get-data.h" + +#include +#include + +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +static int parse_get_gnm (struct lexer *lexer, struct dataset *); + +int +parse_get_data_command (struct lexer *lexer, struct dataset *ds) +{ + lex_force_match (lexer, '/'); + + if (!lex_force_match_id (lexer, "TYPE")) + return CMD_FAILURE; + + lex_force_match (lexer, '='); + + if (lex_match_id (lexer, "GNM")) + return parse_get_gnm (lexer, ds); + + msg (SE, _("Unsupported TYPE %s"), lex_tokid (lexer)); + return CMD_FAILURE; +} + + +static int +parse_get_gnm (struct lexer *lexer, struct dataset *ds) +{ + struct gnumeric_read_info gri = {NULL, NULL, NULL, 1, true, -1}; + + lex_force_match (lexer, '/'); + + if (!lex_force_match_id (lexer, "FILE")) + goto error; + + lex_force_match (lexer, '='); + + if (!lex_force_string (lexer)) + goto error; + + gri.file_name = strdup (ds_cstr (lex_tokstr (lexer))); + + lex_get (lexer); + + while (lex_match (lexer, '/') ) + { + if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH")) + { + lex_match (lexer, '='); + gri.asw = lex_integer (lexer); + } + else if (lex_match_id (lexer, "SHEET")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "NAME")) + { + if ( ! lex_force_string (lexer) ) + goto error; + + gri.sheet_name = strdup (ds_cstr (lex_tokstr (lexer))); + gri.sheet_index = -1; + } + else if (lex_match_id (lexer, "INDEX")) + { + gri.sheet_index = lex_integer (lexer); + } + else + goto error; + } + else if (lex_match_id (lexer, "CELLRANGE")) + { + lex_match (lexer, '='); + + if (lex_match_id (lexer, "FULL")) + { + gri.cell_range = NULL; + lex_put_back (lexer, T_ID); + } + else if (lex_match_id (lexer, "RANGE")) + { + if ( ! lex_force_string (lexer) ) + goto error; + + gri.cell_range = strdup (ds_cstr (lex_tokstr (lexer))); + } + else + goto error; + } + else if (lex_match_id (lexer, "READNAMES")) + { + lex_match (lexer, '='); + + if ( lex_match_id (lexer, "ON")) + { + gri.read_names = true; + } + else if (lex_match_id (lexer, "OFF")) + { + gri.read_names = false; + } + else + goto error; + lex_put_back (lexer, T_ID); + } + else + { + printf ("Unknown data file type \"\%s\"\n", lex_tokid (lexer)); + goto error; + } + lex_get (lexer); + } + + { + struct dictionary *dict = NULL; + struct casereader *reader = gnumeric_open_reader (&gri, &dict); + + if ( reader ) + proc_set_active_file (ds, reader, dict); + } + + free (gri.file_name); + free (gri.sheet_name); + free (gri.cell_range); + return CMD_SUCCESS; + + error: + + free (gri.file_name); + free (gri.sheet_name); + free (gri.cell_range); + return CMD_FAILURE; +} diff --git a/src/language/data-io/get-data.h b/src/language/data-io/get-data.h new file mode 100644 index 00000000..4d9c44ed --- /dev/null +++ b/src/language/data-io/get-data.h @@ -0,0 +1,26 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef GET_DATA_H +#define GET_DATA_H + +struct lexer; +struct dataset; + +int parse_get_data_command (struct lexer *lexer, struct dataset *ds); + + +#endif diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 574656b9..b861aca5 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -45,6 +45,8 @@ #include #include +#include "get-data.h" + #include "xalloc.h" #include "gettext.h" @@ -73,6 +75,11 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command struct dictionary *dict = NULL; struct case_map *map = NULL; + if ( type == GET_CMD && lex_match_id (lexer, "DATA") ) + { + return parse_get_data_command (lexer, ds); + } + for (;;) { lex_match (lexer, '/'); diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index bd3bc536..4d9186f5 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,8 @@ +2007-11-03 John Darrington + + * i18n.c i18n.h: Added convertor from UTF8 to system. + This is needed for reading gnumeric files (and possibly others). + 2007-10-11 Ben Pfaff * xalloc.h: Removed. Changed all users to include "xalloc.h" from diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index d194634f..ba083fff 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -177,6 +177,9 @@ set_pspp_locale (const char *l) iconv_close (convertor[CONV_SYSTEM_TO_PSPP]); convertor[CONV_SYSTEM_TO_PSPP] = create_iconv (charset, current_charset); + + iconv_close (convertor[CONV_UTF8_TO_PSPP]); + convertor[CONV_UTF8_TO_PSPP] = create_iconv (charset, "UTF-8"); } void @@ -188,8 +191,9 @@ i18n_init (void) setlocale (LC_CTYPE, locale); charset = locale_charset (); - convertor[CONV_PSPP_TO_UTF8] = create_iconv ("UTF-8", charset); + convertor[CONV_PSPP_TO_UTF8] = create_iconv ("UTF-8", charset); convertor[CONV_SYSTEM_TO_PSPP] = create_iconv (charset, charset); + convertor[CONV_UTF8_TO_PSPP] = create_iconv (charset, "UTF-8"); } diff --git a/src/libpspp/i18n.h b/src/libpspp/i18n.h index 51ca28ed..0633ebc8 100644 --- a/src/libpspp/i18n.h +++ b/src/libpspp/i18n.h @@ -17,22 +17,23 @@ #ifndef I18N_H #define I18N_H -const char * get_pspp_locale(void); -void set_pspp_locale(const char *locale); -const char * get_pspp_charset(void); +const char * get_pspp_locale (void); +void set_pspp_locale (const char *locale); +const char * get_pspp_charset (void); -void i18n_done(void); -void i18n_init(void); +void i18n_done (void); +void i18n_init (void); enum conv_id { CONV_PSPP_TO_UTF8, CONV_SYSTEM_TO_PSPP, + CONV_UTF8_TO_PSPP, n_CONV }; -char * recode_string(enum conv_id how, const char *text, int len); +char * recode_string (enum conv_id how, const char *text, int len); diff --git a/src/ui/terminal/automake.mk b/src/ui/terminal/automake.mk index a6ea104a..65cd23a3 100644 --- a/src/ui/terminal/automake.mk +++ b/src/ui/terminal/automake.mk @@ -32,6 +32,7 @@ src_ui_terminal_pspp_LDADD = \ lib/gsl-extras/libgsl-extras.a \ src/data/libdata.a \ src/libpspp/libpspp.a \ + $(LIBXML2_LIBS) \ $(LIBICONV) \ gl/libgl.la \ @LIBINTL@ @LIBREADLINE@ diff --git a/tests/Book1.gnm.unzipped b/tests/Book1.gnm.unzipped new file mode 100644 index 00000000..052783e1 --- /dev/null +++ b/tests/Book1.gnm.unzipped @@ -0,0 +1,535 @@ + + + + + + 4 + WorkbookView::show_horizontal_scrollbar + TRUE + + + 4 + WorkbookView::show_vertical_scrollbar + TRUE + + + 4 + WorkbookView::show_notebook_tabs + TRUE + + + 4 + WorkbookView::do_auto_completion + TRUE + + + 4 + WorkbookView::is_protected + FALSE + + + + + application + gnumeric + + + author + John Darrington + + + + This + vars + That + Empty + Blank + + + + + This + 9 + 17 + 1 + + + + + + + + + + + + + + d_then_r + portrait + + + + + + + + + + + + + + + + + + + + + numeral + eng_name + xxx + 1 + One + Eins + 2 + Two + Zwei + 3 + Three + Drei + Vier + XY + xxx + xxxx + xxxx + yyy + V1 + V2 + yyy + 0 + fred + 20 + $$$$ + yyy + 1 + 11 + 21 + $$$$ + yyyy + 2 + twelve + 22 + $$$$ + yyyy + 3 + 13 + 23 + $$$$ + Eleven + yyyy + 4 + 14 + 24 + $$$$ + zzz + zzz + zzz + zzz + zzz + Seventeen + + + + + + vars + 4 + 8 + 1 + + + + + + + + + + + + + + d_then_r + portrait + + + + + + + + + + + + + + + + + + + 1v12 + var&x@ + a(43) + varx + varx + 1 + 2 + 23 + 2 + 4 + 3 + 4 + 23 + 3 + 4 + + + + + + + That + 3 + 4 + 1 + + + + + + + + + + + + + + d_then_r + portrait + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + name + id + height + fred + 0 + 23.4 + bert + 1 + 0.56 + charlie + 2 + n/a + dick + 3 + -34.09 + + + + + + Empty + -1 + -1 + 1 + + + + + + + + + + + + + + d_then_r + portrait + + + + + + + + + + + + + + + + + + + Blank + 3 + 2 + 1 + + + + + + + + + + + + + + d_then_r + portrait + + + + + + + + + + + + + + + + + + + vone + vtwo + vthree + v4 + 1 + 3 + 5 + 2 + 4 + 6 + + + + + + + + diff --git a/tests/ChangeLog b/tests/ChangeLog index c98f53e2..555e24cf 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2007-11-03 John Darrington + + * Book1.gnm.unzipped command/get-data-gnm.sh: New test and data + for reading gnumeric files. + 2007-10-08 Ben Pfaff Bug #21280. Thanks to John Darrington for review. diff --git a/tests/automake.mk b/tests/automake.mk index 58400ac0..b673bef3 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -20,6 +20,7 @@ dist_TESTS = \ tests/command/file-handle.sh \ tests/command/filter.sh \ tests/command/flip.sh \ + tests/command/get-data-gnm.sh \ tests/command/import-export.sh \ tests/command/input-program.sh \ tests/command/insert.sh \ @@ -238,6 +239,7 @@ tests_formats_inexactify_SOURCES = tests/formats/inexactify.c EXTRA_DIST += \ $(dist_TESTS) \ + tests/Book1.gnm.unzipped \ tests/weighting.data tests/data-list.data tests/list.data \ tests/no_case_size.sav \ tests/coverage.sh tests/test_template \ diff --git a/tests/command/get-data-gnm.sh b/tests/command/get-data-gnm.sh new file mode 100755 index 00000000..2cad373f --- /dev/null +++ b/tests/command/get-data-gnm.sh @@ -0,0 +1,236 @@ +#!/bin/sh + +# This program tests that pspp can read gnumeric files + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_srcdir and top_builddir are absolute +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +top_srcdir=`cd $top_srcdir; pwd` +top_builddir=`cd $top_builddir; pwd` + +PSPP=$top_builddir/src/ui/terminal/pspp + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +LANG=C +export LANG + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="zip the gnm file and place it in the test directory" +gzip -c $top_srcdir/tests/Book1.gnm.unzipped > $TEMPDIR/Book1.gnumeric +if [ $? -ne 0 ] ; then no_result ; fi + +activity="create program 1" +cat > $TESTFILE < $TESTFILE < /dev/null +if [ $? -ne 0 ] ; then fail ; fi + +activity="compare output 2" +diff $TEMPDIR/pspp.list - <