From: Dave Trollope Date: Sat, 19 Jan 2019 17:18:27 +0000 (-0800) Subject: Improve MDD writer support. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=2dd6b5accbbf92ee440e8a3b41ade942e7e41f29 Improve MDD writer support. Patch #9742. --- diff --git a/NEWS b/NEWS index 53c96b6885..d719cc20bd 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ Changes since 1.2.0: * pspp-convert: New "-a", "-l", "--password-list" options to search for an encrypted file's password. + * Improvements to SAVE DATA COLLECTION support for MDD files. + * Bug fix for CVE-2018-20230. Changes from 1.0.1 to 1.2.0: diff --git a/src/data/mdd-writer.c b/src/data/mdd-writer.c index bc8b54cd8b..ba69c99cc2 100644 --- a/src/data/mdd-writer.c +++ b/src/data/mdd-writer.c @@ -27,12 +27,16 @@ #include "data/dictionary.h" #include "data/file-handle-def.h" +#include "data/format.h" #include "data/make-file.h" +#include "data/missing-values.h" +#include "data/mrset.h" #include "data/short-names.h" #include "data/value-labels.h" #include "data/variable.h" #include "libpspp/message.h" #include "libpspp/misc.h" +#include "libpspp/string-map.h" #include "libpspp/string-set.h" #include "libpspp/version.h" @@ -47,6 +51,38 @@ #define _xml(X) CHAR_CAST (const xmlChar *, X) +enum val_numeric_type + { + VAL_INTEGER_TYPE = 1, + VAL_STRING_TYPE = 2, + VAL_CATEGORICAL_TYPE = 3, + VAL_DATETIME_TYPE = 5, + VAL_DECIMAL_TYPE = 6 + }; + +/* Get the numeric type of the variable. */ +static enum val_numeric_type +var_get_numeric_type_ (const struct variable *var) +{ + const struct fmt_spec *print = var_get_print_format (var); + if (var_get_type (var) == VAL_STRING) + return VAL_STRING_TYPE; + + if (var_has_value_labels (var)) + return VAL_CATEGORICAL_TYPE; + + if (print->d > 0) + return VAL_DECIMAL_TYPE; + + if (print->type == FMT_DATETIME) + return VAL_DATETIME_TYPE; + + if (print->type == FMT_F) + return VAL_INTEGER_TYPE; + + return VAL_CATEGORICAL_TYPE; +} + /* Metadata file writer. */ struct mdd_writer { @@ -58,6 +94,124 @@ struct mdd_writer xmlTextWriter *writer; }; +/* Either a variable or a multi-response set. */ +struct var_or_mrset + { + /* If true, the union contains a multi-response set. Otherwise, it + contains a variable. */ + bool is_mrset; + union + { + const struct mrset *mrset; + const struct variable *variable; + }; + }; + +struct all_dict_variables + { + struct var_or_mrset *vars; + size_t count; + }; + +struct all_dict_variables +all_variables (struct dictionary *dict); + +/* Extract all variables in a dictionary, both normal and multi. + + Excludes variables which are subvariables of an MRSET. */ +struct all_dict_variables +all_variables (struct dictionary *dict) +{ + size_t n_vars = dict_get_var_cnt (dict); + + /* Start with a set of all variable names. */ + struct string_set var_names = STRING_SET_INITIALIZER (var_names); + for (size_t i = 0; i < n_vars; ++i) + { + const struct variable *var = dict_get_var (dict, i); + string_set_insert (&var_names, var_get_name (var)); + } + + /* For each MRSET M, remove all subvariable names of M from S. */ + size_t n_sets = dict_get_n_mrsets (dict); + for (size_t set_idx = 0; set_idx < n_sets; set_idx++) + { + const struct mrset *mrset = dict_get_mrset (dict, set_idx); + for (size_t i = 0; i < mrset->n_vars; ++i) + { + const struct variable *var = mrset->vars[i]; + string_set_delete (&var_names, var_get_name (var)); + } + } + + /* Add the number of remaining variables to the number of MRSets. */ + size_t var_count = n_sets + string_set_count (&var_names); + + /* Allocate an array of var_or_mrset pointers (initially null). */ + struct var_or_mrset *var_or_mrset_array = xcalloc ( + var_count, sizeof *var_or_mrset_array); + + /* Fill the array. */ + struct string_set added_mrsets = STRING_SET_INITIALIZER (added_mrsets); + + size_t var_idx = 0; + for (size_t i = 0; i < n_vars; ++i) + { + const struct variable *var = dict_get_var (dict, i); + bool found_in_mrset = false; + for (size_t set_idx = 0; set_idx < n_sets; set_idx++) + { + const struct mrset *mrset = dict_get_mrset (dict, set_idx); + for (size_t i = 0; i < mrset->n_vars; ++i) + { + const struct variable *subvar = mrset->vars[i]; + if (!strcmp (var_get_name (var), var_get_name (subvar))) + { + /* Then this variable is a member of this MRSet. */ + found_in_mrset = true; + + /* Check if this MRSet has already been added and add + otherwise. */ + if (!string_set_contains (&added_mrsets, mrset->name)) + { + string_set_insert (&added_mrsets, mrset->name); + + assert (var_idx < var_count); + struct var_or_mrset *v_o_m + = &var_or_mrset_array[var_idx++]; + v_o_m->is_mrset = true; + v_o_m->mrset = mrset; + } + } + } + } + + /* If the variable wasn't found to be a member of any MRSets, record it as + a normal variable. */ + if (!found_in_mrset) + { + /* The variable is not part of a multi-response set. */ + assert (var_idx < var_count); + struct var_or_mrset *v_o_m = &var_or_mrset_array[var_idx++]; + v_o_m->is_mrset = false; + v_o_m->variable = var; + } + } + + /* Ensure that we filled up the array. */ + assert (var_idx == var_count); + + /* Cleanup. */ + string_set_destroy (&added_mrsets); + string_set_destroy (&var_names); + + struct all_dict_variables result; + result.vars = var_or_mrset_array; + result.count = var_count; + return result; +} + + /* Returns true if an I/O error has occurred on WRITER, false otherwise. */ static bool mdd_write_error (const struct mdd_writer *writer) @@ -193,11 +347,109 @@ name_to_id (const char *name) return id; } +static void +write_variable_section (xmlTextWriter *writer, const struct variable *var, int id) +{ + xmlTextWriterStartElement (writer, _xml ("variable")); + write_attr (writer, "name", var_get_name (var)); + + bool is_string = var_get_type (var) == VAL_STRING; + + int type = var_get_numeric_type_ (var); + xmlTextWriterWriteFormatAttribute (writer, _xml ("type"), "%d", type); + + int max = is_string ? var_get_width (var) : 1; + xmlTextWriterWriteFormatAttribute (writer, _xml ("max"), "%d", max); + + write_attr (writer, "maxtype", "3"); + + const char *label = var_get_label (var); + if (label) + { + xmlTextWriterStartElement (writer, _xml ("labels")); + write_attr (writer, "context", "LABEL"); + + xmlTextWriterStartElement (writer, _xml ("text")); + write_attr (writer, "context", "ANALYSIS"); + write_xml_lang (writer); + xmlTextWriterWriteString (writer, _xml (label)); + xmlTextWriterEndElement (writer); + + xmlTextWriterEndElement (writer); + } + + const struct val_labs *val_labs = var_get_value_labels (var); + size_t n_vls = val_labs_count (val_labs); + if (n_vls) + { + const struct val_lab **vls = val_labs_sorted (val_labs); + + /* */ + xmlTextWriterStartElement (writer, _xml ("categories")); + write_global_name_space (writer); + int width = var_get_width (var); + for (size_t j = 0; j < n_vls; j++) + { + const struct val_lab *vl = vls[j]; + const union value *value = val_lab_get_value (vl); + + /* */ + xmlTextWriterStartElement (writer, _xml ("category")); + xmlTextWriterWriteFormatAttribute (writer, _xml ("id"), "_%d", id); + + char *name = name_to_id (val_lab_get_label (vl)); + write_attr (writer, "name", name); + free (name); + + /* If the value here is missing, annotate it. + + XXX only checking "user" here because not sure of correct other + cases. */ + if (var_is_value_missing (var, value, MV_USER)) + write_attr (writer, "missing", "user"); + else if (var_is_value_missing (var, value, MV_SYSTEM)) + write_attr (writer, "missing", "system"); + + /* */ + xmlTextWriterStartElement (writer, _xml ("properties")); + xmlTextWriterStartElement (writer, _xml ("property")); + write_attr (writer, "name", "Value"); + write_value_label_value (writer, vl, width); + write_attr (writer, "type", "5"); + write_attr (writer, "context", "Analysis"); + xmlTextWriterEndElement (writer); /* */ + xmlTextWriterEndElement (writer); /* */ + + /* */ + xmlTextWriterStartElement (writer, _xml ("labels")); + write_attr (writer, "context", "LABEL"); + xmlTextWriterStartElement (writer, _xml ("text")); + write_attr (writer, "context", "ANALYSIS"); + write_xml_lang (writer); + xmlTextWriterWriteString (writer, + _xml (val_lab_get_label (vl))); + xmlTextWriterEndElement (writer); /* */ + xmlTextWriterEndElement (writer); /* */ + + + /* */ + xmlTextWriterEndElement (writer); + } + write_empty_element (writer, "deleted"); + xmlTextWriterEndElement (writer); /* */ + + free (vls); + } + /* */ + xmlTextWriterEndElement (writer); +} + bool mdd_write (struct file_handle *fh, struct dictionary *dict, const char *sav_name) { struct mdd_writer *w = xzalloc (sizeof *w); + size_t n_vars = dict_get_var_cnt (dict); /* Open file handle as an exclusive writer. */ /* TRANSLATORS: this fragment will be interpolated into @@ -280,50 +532,94 @@ mdd_write (struct file_handle *fh, struct dictionary *dict, write_attr (w->writer, "cdscname", "mrSavDsc"); write_attr (w->writer, "project", "126"); - size_t n_vars = dict_get_var_cnt (dict); + struct all_dict_variables allvars = all_variables (dict); + struct var_or_mrset *var_or_mrset_array = allvars.vars; + size_t var_count = allvars.count; + short_names_assign (dict); - for (size_t i = 0; i < n_vars; i++) + for (size_t i = 0; i < var_count; i++) { - const struct variable *var = dict_get_var (dict, i); + const struct var_or_mrset var_or_mrset = var_or_mrset_array[i]; xmlTextWriterStartElement (w->writer, _xml ("var")); + if (var_or_mrset.is_mrset) + { + const struct mrset *mrset = var_or_mrset.mrset; + write_attr (w->writer, "fullname", mrset->name + 1); + write_attr (w->writer, "aliasname", mrset->name); - char *short_name = xstrdup (var_get_short_name (var, 0)); - for (char *p = short_name; *p; p++) - *p = c_tolower (*p); - write_attr (w->writer, "fullname", short_name); - free (short_name); - - write_attr (w->writer, "aliasname", var_get_name (var)); + for (size_t subvar_idx = 0; subvar_idx < mrset->n_vars; subvar_idx++) + { + xmlTextWriterStartElement(w->writer, _xml ("subalias")); + xmlTextWriterWriteFormatAttribute (w->writer, _xml ("index"), + "%zu", subvar_idx); + write_attr (w->writer, "name", + var_get_name (mrset->vars[subvar_idx])); - const struct val_labs *val_labs = var_get_value_labels (var); - size_t n_vls = val_labs_count (val_labs); - if (n_vls) + xmlTextWriterEndElement (w->writer); /* subalias */ + } + } + else { - const struct val_lab **vls = val_labs_sorted (val_labs); + const struct variable *var = var_or_mrset.variable; - xmlTextWriterStartElement (w->writer, _xml ("nativevalues")); - int width = var_get_width (var); - for (size_t j = 0; j < n_vls; j++) + char *short_name = xstrdup (var_get_short_name (var, 0)); + for (char *p = short_name; *p; p++) + *p = c_tolower (*p); + write_attr (w->writer, "fullname", short_name); + free (short_name); + + write_attr (w->writer, "aliasname", var_get_name (var)); + + const struct val_labs *val_labs = var_get_value_labels (var); + size_t n_vls = val_labs_count (val_labs); + if (n_vls) { - const struct val_lab *vl = vls[j]; - xmlTextWriterStartElement (w->writer, _xml ("nativevalue")); + const struct val_lab **vls = val_labs_sorted (val_labs); - char *fullname = name_to_id (val_lab_get_label (vl)); - write_attr (w->writer, "fullname", fullname); - free (fullname); + xmlTextWriterStartElement (w->writer, _xml ("nativevalues")); + int width = var_get_width (var); + for (size_t j = 0; j < n_vls; j++) + { + const struct val_lab *vl = vls[j]; + xmlTextWriterStartElement (w->writer, _xml ("nativevalue")); + + char *fullname = name_to_id (val_lab_get_label (vl)); + write_attr (w->writer, "fullname", fullname); + free (fullname); - write_value_label_value (w->writer, vl, width); + write_value_label_value (w->writer, vl, width); + xmlTextWriterEndElement (w->writer); + } xmlTextWriterEndElement (w->writer); + + free (vls); } - xmlTextWriterEndElement (w->writer); - free (vls); } + xmlTextWriterEndElement (w->writer); /* var */ + } + xmlTextWriterEndElement (w->writer); /* connection */ + xmlTextWriterEndElement (w->writer); /* datasources */ + + /* If the dictionary has a label, record it here */ + const char *file_label = dict_get_label (dict); + if (file_label != NULL) + { + xmlTextWriterStartElement (w->writer, _xml ("labels")); + write_attr (w->writer, "context", "LABEL"); + xmlTextWriterStartElement (w->writer, _xml ("text")); + + write_attr (w->writer, "context", "ANALYSIS"); + write_xml_lang (w->writer); + xmlTextWriterWriteString (w->writer, _xml (file_label)); + + /* */ + xmlTextWriterEndElement (w->writer); + + /* */ xmlTextWriterEndElement (w->writer); } - xmlTextWriterEndElement (w->writer); - xmlTextWriterEndElement (w->writer); /* We reserve ids 1...N_VARS for variables and then start other ids after that. */ @@ -331,94 +627,105 @@ mdd_write (struct file_handle *fh, struct dictionary *dict, /* */ xmlTextWriterStartElement (w->writer, _xml ("definition")); - for (size_t i = 0; i < n_vars; i++) + for (size_t i = 0; i < var_count; i++) { - const struct variable *var = dict_get_var (dict, i); - xmlTextWriterStartElement (w->writer, _xml ("variable")); xmlTextWriterWriteFormatAttribute (w->writer, _xml ("id"), "%zu", i + 1); - write_attr (w->writer, "name", var_get_name (var)); - - bool is_string = var_get_type (var) == VAL_STRING; - int type = is_string ? 2 : 3; - xmlTextWriterWriteFormatAttribute (w->writer, _xml ("type"), "%d", type); + const struct var_or_mrset var_or_mrset = var_or_mrset_array[i]; - int max = is_string ? var_get_width (var) : 1; - xmlTextWriterWriteFormatAttribute (w->writer, _xml ("max"), "%d", max); + if (var_or_mrset.is_mrset) + { + const struct mrset *mrset = var_or_mrset.mrset; + xmlTextWriterStartElement (w->writer, _xml ("variable")); + write_attr (w->writer, "name", mrset->name); + + /* Use the type of the first subvariable as the type of the MRSET? */ + write_attr (w->writer, "type", "3"); + + xmlTextWriterStartElement (w->writer, _xml ("properties")); + xmlTextWriterStartElement (w->writer, _xml ("property")); + write_attr (w->writer, "name", "QvLabel"); + write_attr (w->writer, "value", mrset->name); + write_attr (w->writer, "type", "8"); + write_attr (w->writer, "context", "Analysis"); + /* */ + xmlTextWriterEndElement (w->writer); + /* */ + xmlTextWriterEndElement (w->writer); - write_attr (w->writer, "maxtype", "3"); - const char *label = var_get_label (var); - if (label) - { xmlTextWriterStartElement (w->writer, _xml ("labels")); write_attr (w->writer, "context", "LABEL"); - xmlTextWriterStartElement (w->writer, _xml ("text")); + write_attr (w->writer, "context", "ANALYSIS"); write_xml_lang (w->writer); - xmlTextWriterWriteString (w->writer, _xml (label)); - xmlTextWriterEndElement (w->writer); + xmlTextWriterWriteString (w->writer, _xml (mrset->label)); + /* */ xmlTextWriterEndElement (w->writer); - } - const struct val_labs *val_labs = var_get_value_labels (var); - size_t n_vls = val_labs_count (val_labs); - if (n_vls) - { - const struct val_lab **vls = val_labs_sorted (val_labs); + /* */ + xmlTextWriterEndElement (w->writer); - /* */ xmlTextWriterStartElement (w->writer, _xml ("categories")); - write_global_name_space (w->writer); - int width = var_get_width (var); - for (size_t j = 0; j < n_vls; j++) - { - const struct val_lab *vl = vls[j]; + write_attr (w->writer, "global-name-space", "-1"); + write_empty_element (w->writer, "deleted"); - /* */ + /* Individual categories */ + int value = 2; + for (size_t var_idx = 0; var_idx < mrset->n_vars; ++var_idx) + { + const struct variable *subvar = mrset->vars[var_idx]; + value += 2; xmlTextWriterStartElement (w->writer, _xml ("category")); - xmlTextWriterWriteFormatAttribute (w->writer, _xml ("id"), - "_%d", id++); - - char *name = name_to_id (val_lab_get_label (vl)); - write_attr (w->writer, "name", name); - free (name); + write_attr (w->writer, "context", "LABEL"); + char *name_without_spaces = strdup (var_get_name (subvar)); + for (size_t i = 0; name_without_spaces[i]; ++i) + if (name_without_spaces[i] == ' ') name_without_spaces[i] = '_'; + write_attr (w->writer, "name", name_without_spaces); + free (name_without_spaces); - /* */ xmlTextWriterStartElement (w->writer, _xml ("properties")); xmlTextWriterStartElement (w->writer, _xml ("property")); - write_attr (w->writer, "name", "Value"); - write_value_label_value (w->writer, vl, width); - write_attr (w->writer, "type", "5"); + write_attr (w->writer, "name", "QvBasicNum"); + xmlTextWriterWriteFormatAttribute + (w->writer, _xml ("value"), "%d", value); + write_attr (w->writer, "type", "3"); write_attr (w->writer, "context", "Analysis"); + /* */ xmlTextWriterEndElement (w->writer); + /* */ xmlTextWriterEndElement (w->writer); - /* */ xmlTextWriterStartElement (w->writer, _xml ("labels")); write_attr (w->writer, "context", "LABEL"); + xmlTextWriterStartElement (w->writer, _xml ("text")); write_attr (w->writer, "context", "ANALYSIS"); write_xml_lang (w->writer); - xmlTextWriterWriteString (w->writer, - _xml (val_lab_get_label (vl))); - xmlTextWriterEndElement (w->writer); + xmlTextWriterWriteString (w->writer, _xml (var_get_label (subvar))); + /* */ xmlTextWriterEndElement (w->writer); + /* */ + xmlTextWriterEndElement (w->writer); /* */ xmlTextWriterEndElement (w->writer); } - write_empty_element (w->writer, "deleted"); - xmlTextWriterEndElement (w->writer); - free (vls); + /* */ + xmlTextWriterEndElement (w->writer); + /* */ + xmlTextWriterEndElement (w->writer); + } + else + { + const struct variable *var = var_or_mrset.variable; + write_variable_section(w->writer, var, id++); } - - xmlTextWriterEndElement (w->writer); } - xmlTextWriterEndElement (w->writer); + xmlTextWriterEndElement (w->writer); /* */ write_empty_element (w->writer, "system"); write_empty_element (w->writer, "systemrouting"); @@ -567,6 +874,8 @@ mdd_write (struct file_handle *fh, struct dictionary *dict, xmlTextWriterEndDocument (w->writer); + free(var_or_mrset_array); + error: mdd_close (w); return NULL; diff --git a/src/language/data-io/save.c b/src/language/data-io/save.c index be746742f3..d12593ffca 100644 --- a/src/language/data-io/save.c +++ b/src/language/data-io/save.c @@ -36,6 +36,7 @@ #include "language/lexer/lexer.h" #include "libpspp/assertion.h" #include "libpspp/compiler.h" +#include "libpspp/message.h" #include "gl/xalloc.h" @@ -166,6 +167,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, struct casewriter *writer; /* Writer. */ struct case_map_stage *stage; /* Preparation for 'map'. */ struct case_map *map; /* Map from input data to data for writer. */ + const char *sav_name = ""; /* Common options. */ struct sfm_write_options sysfile_opts; @@ -303,37 +305,41 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, if (lex_end_of_command (lexer) != CMD_SUCCESS) goto error; - if (handle == NULL) + if (!handle && !metadata) { - lex_sbc_missing ("OUTFILE"); + msg (SE, _("The OUTFILE or METADATA subcommand is required.")); goto error; } dict_delete_scratch_vars (dict); dict_compact_values (dict); - if (fh_get_referent (handle) == FH_REF_FILE) + if (handle) { - switch (writer_type) + if (metadata) + sav_name = (fh_get_referent (handle) == FH_REF_FILE + ? fh_get_file_name (handle) + : fh_get_name (handle)); + if (fh_get_referent (handle) == FH_REF_FILE) { - case SYSFILE_WRITER: - writer = sfm_open_writer (handle, dict, sysfile_opts); - break; - case PORFILE_WRITER: - writer = pfm_open_writer (handle, dict, porfile_opts); - break; + switch (writer_type) + { + case SYSFILE_WRITER: + writer = sfm_open_writer (handle, dict, sysfile_opts); + break; + case PORFILE_WRITER: + writer = pfm_open_writer (handle, dict, porfile_opts); + break; + } } + else + writer = any_writer_open (handle, dict); + if (writer == NULL) + goto error; } - else - writer = any_writer_open (handle, dict); - if (writer == NULL) - goto error; if (metadata) { - const char *sav_name = (fh_get_referent (handle) == FH_REF_FILE - ? fh_get_file_name (handle) - : fh_get_name (handle)); if (!mdd_write (metadata, dict, sav_name)) goto error; }