#include "data/dictionary.h"
#include "data/file-handle-def.h"
+#include "data/format.h"
#include "data/make-file.h"
+#include "data/missing-values.h"
+#include "data/mrset.h"
#include "data/short-names.h"
#include "data/value-labels.h"
#include "data/variable.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
+#include "libpspp/string-map.h"
#include "libpspp/string-set.h"
#include "libpspp/version.h"
#define _xml(X) CHAR_CAST (const xmlChar *, X)
+enum val_numeric_type
+ {
+ VAL_INTEGER_TYPE = 1,
+ VAL_STRING_TYPE = 2,
+ VAL_CATEGORICAL_TYPE = 3,
+ VAL_DATETIME_TYPE = 5,
+ VAL_DECIMAL_TYPE = 6
+ };
+
+/* Get the numeric type of the variable. */
+static enum val_numeric_type
+var_get_numeric_type_ (const struct variable *var)
+{
+ const struct fmt_spec *print = var_get_print_format (var);
+ if (var_get_type (var) == VAL_STRING)
+ return VAL_STRING_TYPE;
+
+ if (var_has_value_labels (var))
+ return VAL_CATEGORICAL_TYPE;
+
+ if (print->d > 0)
+ return VAL_DECIMAL_TYPE;
+
+ if (print->type == FMT_DATETIME)
+ return VAL_DATETIME_TYPE;
+
+ if (print->type == FMT_F)
+ return VAL_INTEGER_TYPE;
+
+ return VAL_CATEGORICAL_TYPE;
+}
+
/* Metadata file writer. */
struct mdd_writer
{
xmlTextWriter *writer;
};
+/* Either a variable or a multi-response set. */
+struct var_or_mrset
+ {
+ /* If true, the union contains a multi-response set. Otherwise, it
+ contains a variable. */
+ bool is_mrset;
+ union
+ {
+ const struct mrset *mrset;
+ const struct variable *variable;
+ };
+ };
+
+struct all_dict_variables
+ {
+ struct var_or_mrset *vars;
+ size_t count;
+ };
+
+struct all_dict_variables
+all_variables (struct dictionary *dict);
+
+/* Extract all variables in a dictionary, both normal and multi.
+
+ Excludes variables which are subvariables of an MRSET. */
+struct all_dict_variables
+all_variables (struct dictionary *dict)
+{
+ size_t n_vars = dict_get_var_cnt (dict);
+
+ /* Start with a set of all variable names. */
+ struct string_set var_names = STRING_SET_INITIALIZER (var_names);
+ for (size_t i = 0; i < n_vars; ++i)
+ {
+ const struct variable *var = dict_get_var (dict, i);
+ string_set_insert (&var_names, var_get_name (var));
+ }
+
+ /* For each MRSET M, remove all subvariable names of M from S. */
+ size_t n_sets = dict_get_n_mrsets (dict);
+ for (size_t set_idx = 0; set_idx < n_sets; set_idx++)
+ {
+ const struct mrset *mrset = dict_get_mrset (dict, set_idx);
+ for (size_t i = 0; i < mrset->n_vars; ++i)
+ {
+ const struct variable *var = mrset->vars[i];
+ string_set_delete (&var_names, var_get_name (var));
+ }
+ }
+
+ /* Add the number of remaining variables to the number of MRSets. */
+ size_t var_count = n_sets + string_set_count (&var_names);
+
+ /* Allocate an array of var_or_mrset pointers (initially null). */
+ struct var_or_mrset *var_or_mrset_array = xcalloc (
+ var_count, sizeof *var_or_mrset_array);
+
+ /* Fill the array. */
+ struct string_set added_mrsets = STRING_SET_INITIALIZER (added_mrsets);
+
+ size_t var_idx = 0;
+ for (size_t i = 0; i < n_vars; ++i)
+ {
+ const struct variable *var = dict_get_var (dict, i);
+ bool found_in_mrset = false;
+ for (size_t set_idx = 0; set_idx < n_sets; set_idx++)
+ {
+ const struct mrset *mrset = dict_get_mrset (dict, set_idx);
+ for (size_t i = 0; i < mrset->n_vars; ++i)
+ {
+ const struct variable *subvar = mrset->vars[i];
+ if (!strcmp (var_get_name (var), var_get_name (subvar)))
+ {
+ /* Then this variable is a member of this MRSet. */
+ found_in_mrset = true;
+
+ /* Check if this MRSet has already been added and add
+ otherwise. */
+ if (!string_set_contains (&added_mrsets, mrset->name))
+ {
+ string_set_insert (&added_mrsets, mrset->name);
+
+ assert (var_idx < var_count);
+ struct var_or_mrset *v_o_m
+ = &var_or_mrset_array[var_idx++];
+ v_o_m->is_mrset = true;
+ v_o_m->mrset = mrset;
+ }
+ }
+ }
+ }
+
+ /* If the variable wasn't found to be a member of any MRSets, record it as
+ a normal variable. */
+ if (!found_in_mrset)
+ {
+ /* The variable is not part of a multi-response set. */
+ assert (var_idx < var_count);
+ struct var_or_mrset *v_o_m = &var_or_mrset_array[var_idx++];
+ v_o_m->is_mrset = false;
+ v_o_m->variable = var;
+ }
+ }
+
+ /* Ensure that we filled up the array. */
+ assert (var_idx == var_count);
+
+ /* Cleanup. */
+ string_set_destroy (&added_mrsets);
+ string_set_destroy (&var_names);
+
+ struct all_dict_variables result;
+ result.vars = var_or_mrset_array;
+ result.count = var_count;
+ return result;
+}
+
+
/* Returns true if an I/O error has occurred on WRITER, false otherwise. */
static bool
mdd_write_error (const struct mdd_writer *writer)
return id;
}
+static void
+write_variable_section (xmlTextWriter *writer, const struct variable *var, int id)
+{
+ xmlTextWriterStartElement (writer, _xml ("variable"));
+ write_attr (writer, "name", var_get_name (var));
+
+ bool is_string = var_get_type (var) == VAL_STRING;
+
+ int type = var_get_numeric_type_ (var);
+ xmlTextWriterWriteFormatAttribute (writer, _xml ("type"), "%d", type);
+
+ int max = is_string ? var_get_width (var) : 1;
+ xmlTextWriterWriteFormatAttribute (writer, _xml ("max"), "%d", max);
+
+ write_attr (writer, "maxtype", "3");
+
+ const char *label = var_get_label (var);
+ if (label)
+ {
+ xmlTextWriterStartElement (writer, _xml ("labels"));
+ write_attr (writer, "context", "LABEL");
+
+ xmlTextWriterStartElement (writer, _xml ("text"));
+ write_attr (writer, "context", "ANALYSIS");
+ write_xml_lang (writer);
+ xmlTextWriterWriteString (writer, _xml (label));
+ xmlTextWriterEndElement (writer);
+
+ xmlTextWriterEndElement (writer);
+ }
+
+ const struct val_labs *val_labs = var_get_value_labels (var);
+ size_t n_vls = val_labs_count (val_labs);
+ if (n_vls)
+ {
+ const struct val_lab **vls = val_labs_sorted (val_labs);
+
+ /* <categories/> */
+ xmlTextWriterStartElement (writer, _xml ("categories"));
+ write_global_name_space (writer);
+ int width = var_get_width (var);
+ for (size_t j = 0; j < n_vls; j++)
+ {
+ const struct val_lab *vl = vls[j];
+ const union value *value = val_lab_get_value (vl);
+
+ /* <category> */
+ xmlTextWriterStartElement (writer, _xml ("category"));
+ xmlTextWriterWriteFormatAttribute (writer, _xml ("id"), "_%d", id);
+
+ char *name = name_to_id (val_lab_get_label (vl));
+ write_attr (writer, "name", name);
+ free (name);
+
+ /* If the value here is missing, annotate it.
+
+ XXX only checking "user" here because not sure of correct other
+ cases. */
+ if (var_is_value_missing (var, value, MV_USER))
+ write_attr (writer, "missing", "user");
+ else if (var_is_value_missing (var, value, MV_SYSTEM))
+ write_attr (writer, "missing", "system");
+
+ /* <properties/> */
+ xmlTextWriterStartElement (writer, _xml ("properties"));
+ xmlTextWriterStartElement (writer, _xml ("property"));
+ write_attr (writer, "name", "Value");
+ write_value_label_value (writer, vl, width);
+ write_attr (writer, "type", "5");
+ write_attr (writer, "context", "Analysis");
+ xmlTextWriterEndElement (writer); /* </property> */
+ xmlTextWriterEndElement (writer); /* </properties> */
+
+ /* <labels/> */
+ xmlTextWriterStartElement (writer, _xml ("labels"));
+ write_attr (writer, "context", "LABEL");
+ xmlTextWriterStartElement (writer, _xml ("text"));
+ write_attr (writer, "context", "ANALYSIS");
+ write_xml_lang (writer);
+ xmlTextWriterWriteString (writer,
+ _xml (val_lab_get_label (vl)));
+ xmlTextWriterEndElement (writer); /* </text> */
+ xmlTextWriterEndElement (writer); /* </labels> */
+
+
+ /* </category> */
+ xmlTextWriterEndElement (writer);
+ }
+ write_empty_element (writer, "deleted");
+ xmlTextWriterEndElement (writer); /* </categories> */
+
+ free (vls);
+ }
+ /* </variable> */
+ xmlTextWriterEndElement (writer);
+}
+
bool
mdd_write (struct file_handle *fh, struct dictionary *dict,
const char *sav_name)
{
struct mdd_writer *w = xzalloc (sizeof *w);
+ size_t n_vars = dict_get_var_cnt (dict);
/* Open file handle as an exclusive writer. */
/* TRANSLATORS: this fragment will be interpolated into
write_attr (w->writer, "cdscname", "mrSavDsc");
write_attr (w->writer, "project", "126");
- size_t n_vars = dict_get_var_cnt (dict);
+ struct all_dict_variables allvars = all_variables (dict);
+ struct var_or_mrset *var_or_mrset_array = allvars.vars;
+ size_t var_count = allvars.count;
+
short_names_assign (dict);
- for (size_t i = 0; i < n_vars; i++)
+ for (size_t i = 0; i < var_count; i++)
{
- const struct variable *var = dict_get_var (dict, i);
+ const struct var_or_mrset var_or_mrset = var_or_mrset_array[i];
xmlTextWriterStartElement (w->writer, _xml ("var"));
+ if (var_or_mrset.is_mrset)
+ {
+ const struct mrset *mrset = var_or_mrset.mrset;
+ write_attr (w->writer, "fullname", mrset->name + 1);
+ write_attr (w->writer, "aliasname", mrset->name);
- char *short_name = xstrdup (var_get_short_name (var, 0));
- for (char *p = short_name; *p; p++)
- *p = c_tolower (*p);
- write_attr (w->writer, "fullname", short_name);
- free (short_name);
-
- write_attr (w->writer, "aliasname", var_get_name (var));
+ for (size_t subvar_idx = 0; subvar_idx < mrset->n_vars; subvar_idx++)
+ {
+ xmlTextWriterStartElement(w->writer, _xml ("subalias"));
+ xmlTextWriterWriteFormatAttribute (w->writer, _xml ("index"),
+ "%zu", subvar_idx);
+ write_attr (w->writer, "name",
+ var_get_name (mrset->vars[subvar_idx]));
- const struct val_labs *val_labs = var_get_value_labels (var);
- size_t n_vls = val_labs_count (val_labs);
- if (n_vls)
+ xmlTextWriterEndElement (w->writer); /* subalias */
+ }
+ }
+ else
{
- const struct val_lab **vls = val_labs_sorted (val_labs);
+ const struct variable *var = var_or_mrset.variable;
- xmlTextWriterStartElement (w->writer, _xml ("nativevalues"));
- int width = var_get_width (var);
- for (size_t j = 0; j < n_vls; j++)
+ char *short_name = xstrdup (var_get_short_name (var, 0));
+ for (char *p = short_name; *p; p++)
+ *p = c_tolower (*p);
+ write_attr (w->writer, "fullname", short_name);
+ free (short_name);
+
+ write_attr (w->writer, "aliasname", var_get_name (var));
+
+ const struct val_labs *val_labs = var_get_value_labels (var);
+ size_t n_vls = val_labs_count (val_labs);
+ if (n_vls)
{
- const struct val_lab *vl = vls[j];
- xmlTextWriterStartElement (w->writer, _xml ("nativevalue"));
+ const struct val_lab **vls = val_labs_sorted (val_labs);
- char *fullname = name_to_id (val_lab_get_label (vl));
- write_attr (w->writer, "fullname", fullname);
- free (fullname);
+ xmlTextWriterStartElement (w->writer, _xml ("nativevalues"));
+ int width = var_get_width (var);
+ for (size_t j = 0; j < n_vls; j++)
+ {
+ const struct val_lab *vl = vls[j];
+ xmlTextWriterStartElement (w->writer, _xml ("nativevalue"));
+
+ char *fullname = name_to_id (val_lab_get_label (vl));
+ write_attr (w->writer, "fullname", fullname);
+ free (fullname);
- write_value_label_value (w->writer, vl, width);
+ write_value_label_value (w->writer, vl, width);
+ xmlTextWriterEndElement (w->writer);
+ }
xmlTextWriterEndElement (w->writer);
+
+ free (vls);
}
- xmlTextWriterEndElement (w->writer);
- free (vls);
}
+ xmlTextWriterEndElement (w->writer); /* var */
+ }
+ xmlTextWriterEndElement (w->writer); /* connection */
+ xmlTextWriterEndElement (w->writer); /* datasources */
+
+ /* If the dictionary has a label, record it here */
+ const char *file_label = dict_get_label (dict);
+ if (file_label != NULL)
+ {
+ xmlTextWriterStartElement (w->writer, _xml ("labels"));
+ write_attr (w->writer, "context", "LABEL");
+ xmlTextWriterStartElement (w->writer, _xml ("text"));
+
+ write_attr (w->writer, "context", "ANALYSIS");
+ write_xml_lang (w->writer);
+ xmlTextWriterWriteString (w->writer, _xml (file_label));
+
+ /* </text> */
+ xmlTextWriterEndElement (w->writer);
+
+ /* </labels> */
xmlTextWriterEndElement (w->writer);
}
- xmlTextWriterEndElement (w->writer);
- xmlTextWriterEndElement (w->writer);
/* We reserve ids 1...N_VARS for variables and then start other ids after
that. */
/* <definition/> */
xmlTextWriterStartElement (w->writer, _xml ("definition"));
- for (size_t i = 0; i < n_vars; i++)
+ for (size_t i = 0; i < var_count; i++)
{
- const struct variable *var = dict_get_var (dict, i);
- xmlTextWriterStartElement (w->writer, _xml ("variable"));
xmlTextWriterWriteFormatAttribute (w->writer, _xml ("id"), "%zu", i + 1);
- write_attr (w->writer, "name", var_get_name (var));
-
- bool is_string = var_get_type (var) == VAL_STRING;
- int type = is_string ? 2 : 3;
- xmlTextWriterWriteFormatAttribute (w->writer, _xml ("type"), "%d", type);
+ const struct var_or_mrset var_or_mrset = var_or_mrset_array[i];
- int max = is_string ? var_get_width (var) : 1;
- xmlTextWriterWriteFormatAttribute (w->writer, _xml ("max"), "%d", max);
+ if (var_or_mrset.is_mrset)
+ {
+ const struct mrset *mrset = var_or_mrset.mrset;
+ xmlTextWriterStartElement (w->writer, _xml ("variable"));
+ write_attr (w->writer, "name", mrset->name);
+
+ /* Use the type of the first subvariable as the type of the MRSET? */
+ write_attr (w->writer, "type", "3");
+
+ xmlTextWriterStartElement (w->writer, _xml ("properties"));
+ xmlTextWriterStartElement (w->writer, _xml ("property"));
+ write_attr (w->writer, "name", "QvLabel");
+ write_attr (w->writer, "value", mrset->name);
+ write_attr (w->writer, "type", "8");
+ write_attr (w->writer, "context", "Analysis");
+ /* </property> */
+ xmlTextWriterEndElement (w->writer);
+ /* </properties> */
+ xmlTextWriterEndElement (w->writer);
- write_attr (w->writer, "maxtype", "3");
- const char *label = var_get_label (var);
- if (label)
- {
xmlTextWriterStartElement (w->writer, _xml ("labels"));
write_attr (w->writer, "context", "LABEL");
-
xmlTextWriterStartElement (w->writer, _xml ("text"));
+
write_attr (w->writer, "context", "ANALYSIS");
write_xml_lang (w->writer);
- xmlTextWriterWriteString (w->writer, _xml (label));
- xmlTextWriterEndElement (w->writer);
+ xmlTextWriterWriteString (w->writer, _xml (mrset->label));
+ /* </text> */
xmlTextWriterEndElement (w->writer);
- }
- const struct val_labs *val_labs = var_get_value_labels (var);
- size_t n_vls = val_labs_count (val_labs);
- if (n_vls)
- {
- const struct val_lab **vls = val_labs_sorted (val_labs);
+ /* </labels> */
+ xmlTextWriterEndElement (w->writer);
- /* <categories/> */
xmlTextWriterStartElement (w->writer, _xml ("categories"));
- write_global_name_space (w->writer);
- int width = var_get_width (var);
- for (size_t j = 0; j < n_vls; j++)
- {
- const struct val_lab *vl = vls[j];
+ write_attr (w->writer, "global-name-space", "-1");
+ write_empty_element (w->writer, "deleted");
- /* <category> */
+ /* Individual categories */
+ int value = 2;
+ for (size_t var_idx = 0; var_idx < mrset->n_vars; ++var_idx)
+ {
+ const struct variable *subvar = mrset->vars[var_idx];
+ value += 2;
xmlTextWriterStartElement (w->writer, _xml ("category"));
- xmlTextWriterWriteFormatAttribute (w->writer, _xml ("id"),
- "_%d", id++);
-
- char *name = name_to_id (val_lab_get_label (vl));
- write_attr (w->writer, "name", name);
- free (name);
+ write_attr (w->writer, "context", "LABEL");
+ char *name_without_spaces = strdup (var_get_name (subvar));
+ for (size_t i = 0; name_without_spaces[i]; ++i)
+ if (name_without_spaces[i] == ' ') name_without_spaces[i] = '_';
+ write_attr (w->writer, "name", name_without_spaces);
+ free (name_without_spaces);
- /* <properties/> */
xmlTextWriterStartElement (w->writer, _xml ("properties"));
xmlTextWriterStartElement (w->writer, _xml ("property"));
- write_attr (w->writer, "name", "Value");
- write_value_label_value (w->writer, vl, width);
- write_attr (w->writer, "type", "5");
+ write_attr (w->writer, "name", "QvBasicNum");
+ xmlTextWriterWriteFormatAttribute
+ (w->writer, _xml ("value"), "%d", value);
+ write_attr (w->writer, "type", "3");
write_attr (w->writer, "context", "Analysis");
+ /* </property> */
xmlTextWriterEndElement (w->writer);
+ /* </properties> */
xmlTextWriterEndElement (w->writer);
- /* <labels/> */
xmlTextWriterStartElement (w->writer, _xml ("labels"));
write_attr (w->writer, "context", "LABEL");
+
xmlTextWriterStartElement (w->writer, _xml ("text"));
write_attr (w->writer, "context", "ANALYSIS");
write_xml_lang (w->writer);
- xmlTextWriterWriteString (w->writer,
- _xml (val_lab_get_label (vl)));
- xmlTextWriterEndElement (w->writer);
+ xmlTextWriterWriteString (w->writer, _xml (var_get_label (subvar)));
+ /* </text> */
xmlTextWriterEndElement (w->writer);
+ /* </labels> */
+ xmlTextWriterEndElement (w->writer);
/* </category> */
xmlTextWriterEndElement (w->writer);
}
- write_empty_element (w->writer, "deleted");
- xmlTextWriterEndElement (w->writer);
- free (vls);
+ /* </categories> */
+ xmlTextWriterEndElement (w->writer);
+ /* </variable> */
+ xmlTextWriterEndElement (w->writer);
+ }
+ else
+ {
+ const struct variable *var = var_or_mrset.variable;
+ write_variable_section(w->writer, var, id++);
}
-
- xmlTextWriterEndElement (w->writer);
}
- xmlTextWriterEndElement (w->writer);
+ xmlTextWriterEndElement (w->writer); /* </definition> */
write_empty_element (w->writer, "system");
write_empty_element (w->writer, "systemrouting");
xmlTextWriterEndDocument (w->writer);
+ free(var_or_mrset_array);
+
error:
mdd_close (w);
return NULL;