From: Ben Pfaff Date: Fri, 20 Apr 2018 06:00:41 +0000 (-0700) Subject: work on SAVE DATA COLLECTION X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=4063e370381d696fa221a5987e596b78abb4d7c3 work on SAVE DATA COLLECTION --- diff --git a/doc/files.texi b/doc/files.texi index ede6e37d78..ffbb685c89 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -20,6 +20,7 @@ portable files. * GET DATA:: Read from foreign files. * IMPORT:: Read from a portable file. * SAVE:: Write to a system file. +* SAVE DATA COLLECTION:: Write to a system file and metadata file. * SAVE TRANSLATE:: Write data in foreign file formats. * SYSFILE INFO:: Display system file dictionary. * XEXPORT:: Write to a portable file, as a transformation. @@ -770,6 +771,44 @@ The @subcmd{NAMES} and @subcmd{MAP} subcommands are currently ignored. @cmd{SAVE} causes the data to be read. It is a procedure. +@node SAVE DATA COLLECTION +@section SAVE DATA COLLECTION +@vindex SAVE DATA COLLECTION + +@display +SAVE DATA COLLECTION + /OUTFILE=@{'@var{file_name}',@var{file_handle}@} + /METADATA=@{'@var{file_name}',@var{file_handle}@} + /@{UNCOMPRESSED,COMPRESSED,ZCOMPRESSED@} + /PERMISSIONS=@{WRITEABLE,READONLY@} + /DROP=@var{var_list} + /KEEP=@var{var_list} + /VERSION=@var{version} + /RENAME=(@var{src_names}=@var{target_names})@dots{} + /NAMES + /MAP +@end display + +Like @cmd{SAVE}, @cmd{SAVE DATA COLLECTION} writes the dictionary and +data in the active dataset to a system file. In addition, it writes +metadata to an additional XML metadata file. + +OUTFILE is required. Specify the system file to be written as a +string file name or a file handle (@pxref{File Handles}). + +METADATA is also required. Specify the metadata file to be written as +a string file name or a file handle. Metadata files customarily use a +@file{.mdd} extension. + +The current implementation of this command only outputs an +approximation of the metadata file format. Please report bugs. + +Other subcommands are optional. They have the same meanings as in the +@cmd{SAVE} command. + +@cmd{SAVE DATA COLLECTION} causes the data to be read. It is a +procedure. + @node SAVE TRANSLATE @section SAVE TRANSLATE @vindex SAVE TRANSLATE diff --git a/src/data/automake.mk b/src/data/automake.mk index 1572eea83e..b59423a153 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -92,6 +92,8 @@ src_data_libdata_la_SOURCES = \ src/data/identifier.h \ src/data/lazy-casereader.c \ src/data/lazy-casereader.h \ + src/data/mdd-writer.c \ + src/data/mdd-writer.h \ src/data/missing-values.c \ src/data/missing-values.h \ src/data/make-file.c \ diff --git a/src/data/mdd-writer.c b/src/data/mdd-writer.c new file mode 100644 index 0000000000..97efe22aa2 --- /dev/null +++ b/src/data/mdd-writer.c @@ -0,0 +1,245 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2018 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/mdd-writer.h" + +#include +#include +#include +#include +#include + +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/make-file.h" +#include "data/short-names.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" + +#include "gl/ftoastr.h" +#include "gl/xalloc.h" +#include "gl/xmemdup0.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +#define _xml(X) (CHAR_CAST (const xmlChar *, (X))) + +/* Metadata file writer. */ +struct mdd_writer + { + struct file_handle *fh; /* File handle. */ + struct fh_lock *lock; /* Mutual exclusion for file. */ + FILE *file; /* File stream. */ + struct replace_file *rf; /* Ticket for replacing output file. */ + + xmlTextWriter *writer; + }; + +/* Returns true if an I/O error has occurred on WRITER, false otherwise. */ +static bool +mdd_write_error (const struct mdd_writer *writer) +{ + return ferror (writer->file); +} + +static bool +mdd_close (struct mdd_writer *w) +{ + if (!w) + return true; + + if (w->writer) + xmlFreeTextWriter (w->writer); + + bool ok = true; + if (w->file) + { + fflush (w->file); + + ok = !mdd_write_error (w); + if (fclose (w->file) == EOF) + ok = false; + + if (!ok) + msg (ME, _("An I/O error occurred writing metadata file `%s'."), + fh_get_file_name (w->fh)); + + if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf)) + ok = false; + } + + fh_unlock (w->lock); + fh_unref (w->fh); + + free (w); + + return ok; +} + +bool +mdd_write (struct file_handle *fh, struct dictionary *dict, + const char *sav_name) +{ + struct mdd_writer *w = xzalloc (sizeof *w); + + /* Open file handle as an exclusive writer. */ + /* TRANSLATORS: this fragment will be interpolated into + messages in fh_lock() that identify types of files. */ + w->lock = fh_lock (fh, FH_REF_FILE, N_("metadata file"), FH_ACC_WRITE, true); + if (w->lock == NULL) + goto error; + + /* Create the file on disk. */ + w->rf = replace_file_start (fh, "wb", 0444, &w->file); + if (w->rf == NULL) + { + msg (ME, _("Error opening `%s' for writing as a metadata file: %s."), + fh_get_file_name (fh), strerror (errno)); + goto error; + } + + w->writer = xmlNewTextWriter (xmlOutputBufferCreateFile (w->file, NULL)); + if (!w->writer) + { + msg (ME, _("Internal error creating xmlTextWriter.")); + goto error; + } + + xmlTextWriterStartDocument (w->writer, NULL, "UTF-8", NULL); + + /* */ + xmlTextWriterStartPI (w->writer, _xml ("xml-stylesheet")); + xmlTextWriterWriteString (w->writer, + _xml ("type=\"text/xsl\" href=\"mdd.xslt\"")); + xmlTextWriterEndPI (w->writer); + + xmlTextWriterStartElement (w->writer, _xml ("xml")); + + /* */ + xmlTextWriterStartElementNS ( + w->writer, _xml ("mdm"), _xml ("xml"), + _xml ("http://www.spss.com/mr/dm/metadatamodel/Arc 3/2000-02-04")); + static const struct pair + { + const char *key, *value; + } + pairs[] = + { + { "mdm_createversion", "7.0.0.0.331" }, + { "mdm_lastversion", "7.0.0.0.331" }, + { "id", "c4c181c1-0d7c-42e3-abcd-f08296d1dfdc" }, + { "data_version", "9" }, + { "data_sub_version", "1" }, + { "systemvariable", "0" }, + { "dbfiltervalidation", "-1" }, + }; + const int n_pairs = sizeof pairs / sizeof *pairs; + for (const struct pair *p = pairs; p < &pairs[n_pairs]; p++) + xmlTextWriterWriteAttribute (w->writer, _xml (p->key), _xml (p->value)); + xmlTextWriterEndElement (w->writer); + + /* */ + xmlTextWriterStartElement (w->writer, _xml ("atoms")); + xmlTextWriterEndElement (w->writer); + + /* */ + xmlTextWriterStartElement (w->writer, _xml ("datasources")); + xmlTextWriterWriteAttribute (w->writer, _xml ("default"), _xml ("mrSavDsc")); + + /* */ + xmlTextWriterStartElement (w->writer, _xml ("connection")); + xmlTextWriterWriteAttribute (w->writer, _xml ("name"), _xml ("mrSavDsc")); + xmlTextWriterWriteAttribute (w->writer, _xml ("dblocation"), + _xml (sav_name)); + xmlTextWriterWriteAttribute (w->writer, + _xml ("cdscname"), _xml ("mrSavDsc")); + xmlTextWriterWriteAttribute (w->writer, _xml ("project"), _xml ("126")); + + size_t n_vars = dict_get_var_cnt (dict); + short_names_assign (dict); + for (size_t i = 0; i < n_vars; i++) + { + const struct variable *var = dict_get_var (dict, i); + xmlTextWriterStartElement (w->writer, _xml ("var")); + + /* XXX Should convert short name to all-lowercase below. */ + xmlTextWriterWriteAttribute (w->writer, _xml ("fullname"), + _xml (var_get_short_name (var, 0))); + xmlTextWriterWriteAttribute (w->writer, _xml ("aliasname"), + _xml (var_get_name (var))); + + const struct val_labs *val_labs = var_get_value_labels (var); + size_t n_vls = val_labs_count (val_labs); + if (n_vls) + { + const struct val_lab **vls = val_labs_sorted (val_labs); + + xmlTextWriterStartElement (w->writer, _xml ("nativevalues")); + int width = var_get_width (var); + for (size_t j = 0; j < n_vls; j++) + { + const struct val_lab *vl = vls[j]; + xmlTextWriterStartElement (w->writer, _xml ("nativevalue")); + /* XXX Should convert to lowercase, change non-id characters to + _, prefix with _ if starts with non-letter */ + xmlTextWriterWriteAttribute (w->writer, _xml ("fullname"), + _xml (val_lab_get_label (vl))); + + /* XXX below would better use syntax_gen_value(). */ + const union value *value = val_lab_get_value (vl); + if (width) + { + char *s = xmemdup0 (value_str (value, width), width); + xmlTextWriterWriteAttribute (w->writer, _xml ("value"), + _xml (s)); + free (s); + } + else + { + char s[DBL_BUFSIZE_BOUND]; + + c_dtoastr (s, sizeof s, 0, 0, value->f); + xmlTextWriterWriteAttribute (w->writer, _xml ("value"), + _xml (s)); + } + xmlTextWriterEndElement (w->writer); + } + xmlTextWriterEndElement (w->writer); + + free (vls); + } + + xmlTextWriterEndElement (w->writer); + } + + xmlTextWriterEndElement (w->writer); /* */ + + xmlTextWriterEndDocument (w->writer); + +error: + mdd_close (w); + return NULL; +} diff --git a/src/data/mdd-writer.h b/src/data/mdd-writer.h new file mode 100644 index 0000000000..949ce52d61 --- /dev/null +++ b/src/data/mdd-writer.h @@ -0,0 +1,29 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2018 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef MDD_WRITER_H +#define MDD_WRITER_H 1 + +#include + +/* Writing MDD files. */ + +struct file_handle; +struct dictionary; +bool mdd_write (struct file_handle *, struct dictionary *, + const char *sav_name); + +#endif /* mdd-writer.h */ diff --git a/src/language/command.def b/src/language/command.def index f2d5033185..a97f9b83e7 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -139,6 +139,7 @@ DEF_CMD (S_DATA, 0, "RENAME VARIABLES", cmd_rename_variables) DEF_CMD (S_DATA, 0, "ROC", cmd_roc) DEF_CMD (S_DATA, 0, "SAMPLE", cmd_sample) DEF_CMD (S_DATA, 0, "SAVE", cmd_save) +DEF_CMD (S_DATA, 0, "SAVE DATA COLLECTION", cmd_save_data_collection) DEF_CMD (S_DATA, 0, "SAVE TRANSLATE", cmd_save_translate) DEF_CMD (S_DATA, 0, "SORT CASES", cmd_sort_cases) DEF_CMD (S_DATA, 0, "SORT VARIABLES", cmd_sort_variables) diff --git a/src/language/data-io/save.c b/src/language/data-io/save.c index b97da69b00..cec878766a 100644 --- a/src/language/data-io/save.c +++ b/src/language/data-io/save.c @@ -25,6 +25,7 @@ #include "data/casewriter.h" #include "data/dataset.h" #include "data/dictionary.h" +#include "data/mdd-writer.h" #include "data/por-file-writer.h" #include "data/sys-file-writer.h" #include "data/transformations.h" @@ -68,6 +69,12 @@ cmd_save (struct lexer *lexer, struct dataset *ds) return parse_output_proc (lexer, ds, SYSFILE_WRITER); } +int +cmd_save_data_collection (struct lexer *lexer, struct dataset *ds) +{ + return parse_output_proc (lexer, ds, SYSFILE_WRITER); +} + int cmd_export (struct lexer *lexer, struct dataset *ds) { @@ -154,6 +161,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, { /* Common data. */ struct file_handle *handle; /* Output file. */ + struct file_handle *metadata; /* MDD output file. */ struct dictionary *dict; /* Dictionary for output file. */ struct casewriter *writer; /* Writer. */ struct case_map_stage *stage; /* Preparation for 'map'. */ @@ -171,6 +179,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, *retain_unselected = true; handle = NULL; + metadata = NULL; dict = dict_clone (dataset_dict (ds)); writer = NULL; stage = NULL; @@ -198,6 +207,20 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, if (handle == NULL) goto error; } + else if (lex_match_id (lexer, "METADATA")) + { + if (metadata != NULL) + { + lex_sbc_only_once ("METADATA"); + goto error; + } + + lex_match (lexer, T_EQUALS); + + metadata = fh_parse (lexer, FH_REF_FILE, NULL); + if (metadata == NULL) + goto error; + } else if (lex_match_id (lexer, "NAMES")) { /* Not yet implemented. */ @@ -306,6 +329,15 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, if (writer == NULL) goto error; + if (metadata) + { + const char *sav_name = (fh_get_referent (handle) == FH_REF_FILE + ? fh_get_file_name (handle) + : fh_get_name (handle)); + if (!mdd_write (metadata, dict, sav_name)) + goto error; + } + map = case_map_stage_get_case_map (stage); case_map_stage_destroy (stage); if (map != NULL) @@ -313,11 +345,13 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, dict_destroy (dict); fh_unref (handle); + fh_unref (metadata); return writer; error: case_map_stage_destroy (stage); fh_unref (handle); + fh_unref (metadata); casewriter_destroy (writer); dict_destroy (dict); case_map_destroy (map);