X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdictionary%2Fsys-file-info.c;h=b9e0a85c48c27b972529069d40c7861a055d14e4;hb=649c202d57d7d5d8bb87be5b72839cd56ca4ca0b;hp=1f47e1af104ba5281f1634eda67f81b9f49c3412;hpb=d0b91eae59319ab2756d0d43b9cb15eb9cd3c234;p=pspp diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index 1f47e1af10..b9e0a85c48 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,32 +17,40 @@ #include #include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" +#include "data/attributes.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/format.h" +#include "data/missing-values.h" +#include "data/sys-file-reader.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "data/vector.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/array.h" +#include "libpspp/hash-functions.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/string-array.h" +#include "output/tab.h" +#include "output/text-item.h" +#include "output/table-item.h" + +#include "gl/localcharset.h" +#include "gl/intprops.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -57,114 +65,185 @@ enum DF_MISSING_VALUES = 1 << 4, DF_AT_ATTRIBUTES = 1 << 5, /* Attributes whose names begin with @. */ DF_ATTRIBUTES = 1 << 6, /* All other attributes. */ - DF_MISC = 1 << 7, - DF_ALL = (1 << 8) - 1 + DF_MEASURE = 1 << 7, + DF_ROLE = 1 << 8, + DF_ALIGNMENT = 1 << 9, + DF_WIDTH = 1 << 10, + DF_ALL = (1 << 11) - 1 }; -static int describe_variable (const struct variable *v, struct tab_table *t, - int r, int pc, int flags); +static unsigned int dict_display_mask (const struct dictionary *); + +static struct table *describe_variable (const struct variable *v, int flags); + +static void report_encodings (const struct file_handle *, + const struct sfm_reader *); /* SYSFILE INFO utility. */ int cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) { + struct sfm_reader *sfm_reader; struct file_handle *h; struct dictionary *d; struct tab_table *t; struct casereader *reader; struct sfm_read_info info; + char *encoding; + struct table *table; int r, i; - lex_match_id (lexer, "FILE"); - lex_match (lexer, '='); + h = NULL; + encoding = NULL; + for (;;) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id (lexer, "FILE") || lex_is_string (lexer)) + { + lex_match (lexer, T_EQUALS); - h = fh_parse (lexer, FH_REF_FILE); - if (!h) - return CMD_FAILURE; + fh_unref (h); + h = fh_parse (lexer, FH_REF_FILE, NULL); + if (h == NULL) + goto error; + } + else if (lex_match_id (lexer, "ENCODING")) + { + lex_match (lexer, T_EQUALS); - reader = sfm_open_reader (h, &d, &info); - if (!reader) + if (!lex_force_string (lexer)) + goto error; + + free (encoding); + encoding = ss_xstrdup (lex_tokss (lexer)); + + lex_get (lexer); + } + else + break; + } + + if (h == NULL) { + lex_sbc_missing ("FILE"); + goto error; + } + + sfm_reader = sfm_open (h); + if (sfm_reader == NULL) + goto error; + + if (encoding && !strcasecmp (encoding, "detect")) + { + report_encodings (h, sfm_reader); fh_unref (h); - return CMD_FAILURE; + return CMD_SUCCESS; } + + reader = sfm_decode (sfm_reader, encoding, &d, &info); + if (!reader) + goto error; + casereader_destroy (reader); - t = tab_create (2, 11); + t = tab_create (2, 11 + (info.product_ext != NULL)); + r = 0; tab_vline (t, TAL_GAP, 1, 0, 8); - tab_text (t, 0, 0, TAB_LEFT, _("File:")); - tab_text (t, 1, 0, TAB_LEFT, fh_get_file_name (h)); - tab_text (t, 0, 1, TAB_LEFT, _("Label:")); + + tab_text (t, 0, r, TAB_LEFT, _("File:")); + tab_text (t, 1, r++, TAB_LEFT, fh_get_file_name (h)); + + tab_text (t, 0, r, TAB_LEFT, _("Label:")); { const char *label = dict_get_label (d); if (label == NULL) label = _("No label."); - tab_text (t, 1, 1, TAB_LEFT, label); + tab_text (t, 1, r++, TAB_LEFT, label); } - tab_text (t, 0, 2, TAB_LEFT, _("Created:")); - tab_text_format (t, 1, 2, TAB_LEFT, "%s %s by %s", + + tab_text (t, 0, r, TAB_LEFT, _("Created:")); + tab_text_format (t, 1, r++, TAB_LEFT, "%s %s by %s", info.creation_date, info.creation_time, info.product); - tab_text (t, 0, 3, TAB_LEFT, _("Integer Format:")); - tab_text (t, 1, 3, TAB_LEFT, + + if (info.product_ext) + { + tab_text (t, 0, r, TAB_LEFT, _("Product:")); + tab_text (t, 1, r++, TAB_LEFT, info.product_ext); + } + + tab_text (t, 0, r, TAB_LEFT, _("Integer Format:")); + tab_text (t, 1, r++, TAB_LEFT, info.integer_format == INTEGER_MSB_FIRST ? _("Big Endian") : info.integer_format == INTEGER_LSB_FIRST ? _("Little Endian") : _("Unknown")); - tab_text (t, 0, 4, TAB_LEFT, _("Real Format:")); - tab_text (t, 1, 4, TAB_LEFT, + + tab_text (t, 0, r, TAB_LEFT, _("Real Format:")); + tab_text (t, 1, r++, TAB_LEFT, info.float_format == FLOAT_IEEE_DOUBLE_LE ? _("IEEE 754 LE.") : info.float_format == FLOAT_IEEE_DOUBLE_BE ? _("IEEE 754 BE.") : info.float_format == FLOAT_VAX_D ? _("VAX D.") : info.float_format == FLOAT_VAX_G ? _("VAX G.") : info.float_format == FLOAT_Z_LONG ? _("IBM 390 Hex Long.") : _("Unknown")); - tab_text (t, 0, 5, TAB_LEFT, _("Variables:")); - tab_text_format (t, 1, 5, TAB_LEFT, "%zu", dict_get_var_cnt (d)); - tab_text (t, 0, 6, TAB_LEFT, _("Cases:")); - tab_text_format (t, 1, 6, TAB_LEFT, - info.case_cnt == -1 ? _("Unknown") : "%ld", - (long int) info.case_cnt); - tab_text (t, 0, 7, TAB_LEFT, _("Type:")); - tab_text (t, 1, 7, TAB_LEFT, _("System File")); - tab_text (t, 0, 8, TAB_LEFT, _("Weight:")); + + tab_text (t, 0, r, TAB_LEFT, _("Variables:")); + tab_text_format (t, 1, r++, TAB_LEFT, "%zu", dict_get_var_cnt (d)); + + tab_text (t, 0, r, TAB_LEFT, _("Cases:")); + if (info.case_cnt == -1) + tab_text (t, 1, r, TAB_LEFT, _("Unknown")); + else + tab_text_format (t, 1, r, TAB_LEFT, "%ld", (long int) info.case_cnt); + r++; + + tab_text (t, 0, r, TAB_LEFT, _("Type:")); + tab_text (t, 1, r++, TAB_LEFT, _("System File")); + + tab_text (t, 0, r, TAB_LEFT, _("Weight:")); { struct variable *weight_var = dict_get_weight (d); - tab_text (t, 1, 8, TAB_LEFT, + tab_text (t, 1, r++, TAB_LEFT, (weight_var != NULL ? var_get_name (weight_var) : _("Not weighted."))); } - tab_text (t, 0, 9, TAB_LEFT, _("Mode:")); - tab_text_format (t, 1, 9, TAB_LEFT, - _("Compression %s."), info.compressed ? _("on") : _("off")); - - tab_text (t, 0, 10, TAB_LEFT, _("Charset:")); - tab_text_format (t, 1, 10, TAB_LEFT, - dict_get_encoding(d) ? dict_get_encoding(d) : _("Unknown")); + tab_text (t, 0, r, TAB_LEFT, _("Compression:")); + tab_text_format (t, 1, r++, TAB_LEFT, + info.compression == SFM_COMP_NONE ? _("None") + : info.compression == SFM_COMP_SIMPLE ? "SAV" + : "ZSAV"); + tab_text (t, 0, r, TAB_LEFT, _("Encoding:")); + tab_text (t, 1, r++, TAB_LEFT, dict_get_encoding (d)); tab_submit (t); - t = tab_create (4, 1 + 2 * dict_get_var_cnt (d)); + t = tab_create (3, 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Variable")); - tab_joint_text (t, 1, 0, 2, 0, TAB_LEFT | TAT_TITLE, _("Description")); - tab_text (t, 3, 0, TAB_LEFT | TAT_TITLE, _("Position")); - tab_hline (t, TAL_2, 0, 3, 1); - for (r = 1, i = 0; i < dict_get_var_cnt (d); i++) - r = describe_variable (dict_get_var (d, i), t, r, 3, - DF_ALL & ~DF_AT_ATTRIBUTES); + tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Description")); + tab_text (t, 2, 0, TAB_LEFT | TAT_TITLE, _("Position")); + tab_hline (t, TAL_2, 0, 2, 1); - tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, 3, r); - tab_vline (t, TAL_1, 1, 0, r); - tab_vline (t, TAL_1, 3, 0, r); + table = &t->table; + for (i = 0; i < dict_get_var_cnt (d); i++) + table = table_vpaste (table, + describe_variable (dict_get_var (d, i), + DF_ALL & ~DF_AT_ATTRIBUTES)); - tab_resize (t, -1, r); - tab_submit (t); + table_item_submit (table_item_create (table, NULL /* XXX */)); dict_destroy (d); fh_unref (h); - return lex_end_of_command (lexer); + sfm_read_info_destroy (&info); + return CMD_SUCCESS; + +error: + fh_unref (h); + free (encoding); + return CMD_FAILURE; } /* DISPLAY utility. */ @@ -195,12 +274,10 @@ cmd_display (struct lexer *lexer, struct dataset *ds) return CMD_FAILURE; if (dict_get_label (dataset_dict (ds)) == NULL) tab_output_text (TAB_LEFT, - _("The active file does not have a file label.")); + _("The active dataset does not have a file label.")); else - { - tab_output_text (TAB_LEFT | TAT_TITLE, _("File label:")); - tab_output_text (TAB_LEFT | TAB_FIX, dict_get_label (dataset_dict (ds))); - } + tab_output_text_format (TAB_LEFT, _("File label: %s"), + dict_get_label (dataset_dict (ds))); } else { @@ -211,7 +288,7 @@ cmd_display (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "VECTORS")) { display_vectors (dataset_dict(ds), sorted); - return lex_end_of_command (lexer); + return CMD_SUCCESS; } else if (lex_match_id (lexer, "SCRATCH")) { @@ -234,34 +311,35 @@ cmd_display (struct lexer *lexer, struct dataset *ds) {"LABELS", DF_DICT_INDEX | DF_VARIABLE_LABELS}, {"NAMES", 0}, {"VARIABLES", - DF_DICT_INDEX | DF_FORMATS | DF_MISSING_VALUES | DF_MISC}, + DF_DICT_INDEX | DF_FORMATS | DF_MISSING_VALUES + | DF_MEASURE | DF_ROLE | DF_ALIGNMENT | DF_WIDTH}, {NULL, 0}, }; const struct subcommand *sbc; + struct dictionary *dict = dataset_dict (ds); flags = 0; for (sbc = subcommands; sbc->name != NULL; sbc++) if (lex_match_id (lexer, sbc->name)) { - flags = sbc->flags; + flags = sbc->flags & dict_display_mask (dict); break; } - lex_match (lexer, '/'); + lex_match (lexer, T_SLASH); lex_match_id (lexer, "VARIABLES"); - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); - if (lex_token (lexer) != '.') + if (lex_token (lexer) != T_ENDCMD) { - if (!parse_variables_const (lexer, dataset_dict (ds), &vl, &n, - PV_NONE)) + if (!parse_variables_const (lexer, dict, &vl, &n, PV_NONE)) { free (vl); return CMD_FAILURE; } } else - dict_get_vars (dataset_dict (ds), &vl, &n, 0); + dict_get_vars (dict, &vl, &n, 0); } if (n > 0) @@ -281,7 +359,7 @@ cmd_display (struct lexer *lexer, struct dataset *ds) flags); } - return lex_end_of_command (lexer); + return CMD_SUCCESS; } static void @@ -293,60 +371,69 @@ display_macros (void) static void display_documents (const struct dictionary *dict) { - const char *documents = dict_get_documents (dict); + const struct string_array *documents = dict_get_documents (dict); - if (documents == NULL) - tab_output_text (TAB_LEFT, _("The active file dictionary does not " + if (string_array_is_empty (documents)) + tab_output_text (TAB_LEFT, _("The active dataset dictionary does not " "contain any documents.")); else { - struct string line = DS_EMPTY_INITIALIZER; size_t i; tab_output_text (TAB_LEFT | TAT_TITLE, - _("Documents in the active file:")); + _("Documents in the active dataset:")); for (i = 0; i < dict_get_document_line_cnt (dict); i++) - { - dict_get_document_line (dict, i, &line); - tab_output_text (TAB_LEFT | TAB_FIX, ds_cstr (&line)); - } - ds_destroy (&line); + tab_output_text (TAB_LEFT | TAB_FIX, dict_get_document_line (dict, i)); } } +static int +count_columns (int flags) +{ + int nc = 1; + if (flags & ~DF_DICT_INDEX) + nc++; + if (flags & DF_DICT_INDEX) + nc++; + + return nc; +} + +static int +position_column (int flags) +{ + int pc = 1; + if (flags & ~DF_DICT_INDEX) + pc++; + return pc; +} + static void display_variables (const struct variable **vl, size_t n, int flags) { struct tab_table *t; - int nc; /* Number of columns. */ - int pc; /* `Position column' */ - int r; /* Current row. */ + struct table *table; size_t i; + int nc; - /* One column for the name, - two columns for general description, - one column for dictionary index. */ - nc = 1; - if (flags & ~DF_DICT_INDEX) - nc += 2; - pc = nc; - if (flags & DF_DICT_INDEX) - nc++; - - t = tab_create (nc, n + 5); + nc = count_columns (flags); + t = tab_create (nc, 1); tab_headers (t, 0, 0, 1, 0); tab_hline (t, TAL_2, 0, nc - 1, 1); tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Variable")); if (flags & ~DF_DICT_INDEX) - tab_joint_text (t, 1, 0, 2, 0, TAB_LEFT | TAT_TITLE, - (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS) - ? _("Description") : _("Label"))); + tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, + (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS) + ? _("Description") : _("Label"))); if (flags & DF_DICT_INDEX) - tab_text (t, pc, 0, TAB_LEFT | TAT_TITLE, _("Position")); + tab_text (t, position_column (flags), 0, TAB_LEFT | TAT_TITLE, + _("Position")); - r = 1; + table = &t->table; for (i = 0; i < n; i++) - r = describe_variable (vl[i], t, r, pc, flags); + table = table_vpaste (table, describe_variable (vl[i], flags)); + +#if 0 tab_hline (t, flags & ~DF_DICT_INDEX ? TAL_2 : TAL_1, 0, nc - 1, 1); if (flags) { @@ -355,8 +442,8 @@ display_variables (const struct variable **vl, size_t n, int flags) } if (flags & ~DF_DICT_INDEX) tab_vline (t, TAL_1, nc - 1, 0, r - 1); - tab_resize (t, -1, r); - tab_submit (t); +#endif + table_item_submit (table_item_create (table, NULL /* XXX */)); } static bool @@ -380,94 +467,108 @@ count_attributes (const struct attrset *set, int flags) return n_attrs; } -static void -display_attributes (struct tab_table *t, const struct attrset *set, int flags, - int c, int r) +static struct table * +describe_attributes (const struct attrset *set, int flags) { - struct attrset_iterator i; - struct attribute *attr; + struct attribute **attrs; + struct tab_table *t; + size_t n_attrs; + size_t i; + int r = 1; - for (attr = attrset_first (set, &i); attr != NULL; - attr = attrset_next (set, &i)) + t = tab_create (2, 1 + count_attributes (set, flags)); + tab_headers (t, 0, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); + tab_hline (t, TAL_1, 0, 1, 1); + tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Attribute")); + tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Value")); + + n_attrs = attrset_count (set); + attrs = attrset_sorted (set); + for (i = 0; i < n_attrs; i++) { + const struct attribute *attr = attrs[i]; const char *name = attribute_get_name (attr); size_t n_values; - size_t i; + size_t j; if (!(flags & DF_AT_ATTRIBUTES) && is_at_name (name)) continue; n_values = attribute_get_n_values (attr); - for (i = 0; i < n_values; i++) + for (j = 0; j < n_values; j++) { if (n_values > 1) - tab_text_format (t, c, r, TAB_LEFT, "%s[%d]", name, i + 1); + tab_text_format (t, 0, r, TAB_LEFT, "%s[%zu]", name, j + 1); else - tab_text (t, c, r, TAB_LEFT, name); - tab_text (t, c + 1, r, TAB_LEFT, attribute_get_value (attr, i)); + tab_text (t, 0, r, TAB_LEFT, name); + tab_text (t, 1, r, TAB_LEFT, attribute_get_value (attr, j)); r++; } } + free (attrs); + + return &t->table; } static void display_data_file_attributes (struct attrset *set, int flags) { - struct tab_table *t; - size_t n_attrs; + if (count_attributes (set, flags)) + table_item_submit (table_item_create (describe_attributes (set, flags), + _("Custom data file attributes."))); +} - n_attrs = count_attributes (set, flags); - if (!n_attrs) - return; +static struct table * +describe_value_labels (const struct variable *var) +{ + const struct val_labs *val_labs = var_get_value_labels (var); + size_t n_labels = val_labs_count (val_labs); + const struct val_lab **labels; + struct tab_table *t; + size_t i; - t = tab_create (2, n_attrs + 1); - tab_headers (t, 0, 0, 1, 0); + t = tab_create (2, n_labels + 1); tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); - tab_hline (t, TAL_2, 0, 1, 1); - tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Attribute")); - tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Value")); - display_attributes (t, set, flags, 0, 1); - tab_title (t, "Custom data file attributes."); - tab_submit (t); + + tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Value")); + tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Label")); + + tab_hline (t, TAL_1, 0, 1, 1); + tab_vline (t, TAL_1, 1, 0, n_labels); + + labels = val_labs_sorted (val_labs); + for (i = 0; i < n_labels; i++) + { + const struct val_lab *vl = labels[i]; + + tab_value (t, 0, i + 1, TAB_NONE, &vl->value, var, NULL); + tab_text (t, 1, i + 1, TAB_LEFT, val_lab_get_escaped_label (vl)); + } + free (labels); + + return &t->table; } /* Puts a description of variable V into table T starting at row R. The variable will be described in the format given by FLAGS. Returns the next row available for use in the table. */ -static int -describe_variable (const struct variable *v, struct tab_table *t, int r, - int pc, int flags) +static struct table * +describe_variable (const struct variable *v, int flags) { - size_t n_attrs = 0; - int need_rows; - - /* Make sure that enough rows are allocated. */ - need_rows = 1; - if (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS)) - need_rows += 15; - if (flags & DF_VALUE_LABELS) - need_rows += val_labs_count (var_get_value_labels (v)); - if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES)) - { - n_attrs = count_attributes (var_get_attributes (v), flags); - need_rows += n_attrs; - } - if (r + need_rows > tab_nr (t)) - { - int nr = MAX (r + need_rows, tab_nr (t) * 2); - tab_realloc (t, -1, nr); - } + struct table *table; + struct string s; - /* Put the name, var label, and position into the first row. */ - tab_text (t, 0, r, TAB_LEFT, var_get_name (v)); - if (flags & DF_DICT_INDEX) - tab_text_format (t, pc, r, 0, "%zu", var_get_dict_index (v) + 1); + ds_init_empty (&s); + /* Variable label. */ if (flags & DF_VARIABLE_LABELS && var_has_label (v)) { - tab_joint_text (t, 1, r, 2, r, TAB_LEFT, var_get_label (v)); - r++; + if (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS)) + ds_put_format (&s, _("Label: %s\n"), var_get_label (v)); + else + ds_put_format (&s, "%s\n", var_get_label (v)); } /* Print/write format, or print and write formats. */ @@ -475,153 +576,113 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, { const struct fmt_spec *print = var_get_print_format (v); const struct fmt_spec *write = var_get_write_format (v); + char str[FMT_STRING_LEN_MAX + 1]; if (fmt_equal (print, write)) - { - char str[FMT_STRING_LEN_MAX + 1]; - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Format: %s"), fmt_to_string (print, str)); - r++; - } + ds_put_format (&s, _("Format: %s\n"), fmt_to_string (print, str)); else { - char str[FMT_STRING_LEN_MAX + 1]; - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Print Format: %s"), - fmt_to_string (print, str)); - r++; - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Write Format: %s"), - fmt_to_string (write, str)); - r++; + ds_put_format (&s, _("Print Format: %s\n"), + fmt_to_string (print, str)); + ds_put_format (&s, _("Write Format: %s\n"), + fmt_to_string (write, str)); } } - - /* Measurement level, display width, alignment. */ - if (flags & DF_MISC) - { - enum measure m = var_get_measure (v); - enum alignment a = var_get_alignment (v); - - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Measure: %s"), - m == MEASURE_NOMINAL ? _("Nominal") - : m == MEASURE_ORDINAL ? _("Ordinal") - : _("Scale")); - r++; - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Display Alignment: %s"), - a == ALIGN_LEFT ? _("Left") - : a == ALIGN_CENTRE ? _("Center") - : _("Right")); - r++; - tab_joint_text_format (t, 1, r, 2, r, TAB_LEFT, - _("Display Width: %d"), - var_get_display_width (v)); - r++; - } - + + /* Measurement level, role, display width, alignment. */ + if (flags & DF_MEASURE) + ds_put_format (&s, _("Measure: %s\n"), + measure_to_string (var_get_measure (v))); + + if (flags & DF_ROLE) + ds_put_format (&s, _("Role: %s\n"), var_role_to_string (var_get_role (v))); + + + if (flags & DF_ALIGNMENT) + ds_put_format (&s, _("Display Alignment: %s\n"), + alignment_to_string (var_get_alignment (v))); + + if (flags & DF_WIDTH) + ds_put_format (&s, _("Display Width: %d\n"), var_get_display_width (v)); + /* Missing values if any. */ if (flags & DF_MISSING_VALUES && var_has_missing_values (v)) { const struct missing_values *mv = var_get_missing_values (v); - char buf[128]; - char *cp; int cnt = 0; int i; - cp = stpcpy (buf, _("Missing Values: ")); + ds_put_cstr (&s, _("Missing Values: ")); if (mv_has_range (mv)) { double x, y; mv_get_range (mv, &x, &y); if (x == LOWEST) - cp += sprintf (cp, "LOWEST THRU %g", y); + ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y); else if (y == HIGHEST) - cp += sprintf (cp, "%g THRU HIGHEST", x); + ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x); else - cp += sprintf (cp, "%g THRU %g", x, y); + ds_put_format (&s, "%.*g THRU %.*g", + DBL_DIG + 1, x, + DBL_DIG + 1, y); cnt++; } for (i = 0; i < mv_n_values (mv); i++) { const union value *value = mv_get_value (mv, i); if (cnt++ > 0) - cp += sprintf (cp, "; "); + ds_put_cstr (&s, "; "); if (var_is_numeric (v)) - cp += sprintf (cp, "%g", value->f); + ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f); else { int width = var_get_width (v); int mv_width = MIN (width, MV_MAX_STRING); - *cp++ = '"'; - memcpy (cp, value_str (value, width), mv_width); - cp += mv_width; - *cp++ = '"'; - *cp = '\0'; + ds_put_byte (&s, '"'); + memcpy (ds_put_uninit (&s, mv_width), + value_str (value, width), mv_width); + ds_put_byte (&s, '"'); } } - - tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); - r++; + ds_put_byte (&s, '\n'); } - /* Value labels. */ - if (flags & DF_VALUE_LABELS && var_has_value_labels (v)) - { - const struct val_labs *val_labs = var_get_value_labels (v); - size_t n_labels = val_labs_count (val_labs); - const struct val_lab **labels; - int orig_r = r; - size_t i; - -#if 0 - tab_text (t, 1, r, TAB_LEFT, _("Value")); - tab_text (t, 2, r, TAB_LEFT, _("Label")); - r++; -#endif + ds_chomp_byte (&s, '\n'); - tab_hline (t, TAL_1, 1, 2, r); + table = (ds_is_empty (&s) + ? NULL + : table_from_string (TAB_LEFT, ds_cstr (&s))); + ds_destroy (&s); - labels = val_labs_sorted (val_labs); - for (i = 0; i < n_labels; i++) - { - const struct val_lab *vl = labels[i]; - char buf[MAX_STRING + 1]; + /* Value labels. */ + if (flags & DF_VALUE_LABELS && var_has_value_labels (v)) + table = table_vpaste (table, table_create_nested (describe_value_labels (v))); - if (var_is_alpha (v)) - { - int width = var_get_width (v); - memcpy (buf, value_str (&vl->value, width), width); - buf[width] = 0; - } - else - sprintf (buf, "%g", vl->value.f); - - tab_text (t, 1, r, TAB_NONE, buf); - tab_text (t, 2, r, TAB_LEFT, val_lab_get_label (vl)); - r++; - } - free (labels); + if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES)) + { + struct attrset *attrs = var_get_attributes (v); - tab_vline (t, TAL_1, 2, orig_r, r - 1); + if (count_attributes (attrs, flags)) + table = table_vpaste ( + table, table_create_nested (describe_attributes (attrs, flags))); } - if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES) && n_attrs) + if (table == NULL) + table = table_from_string (TAB_LEFT, ""); + + table = table_hpaste (table_from_string (0, var_get_name (v)), + table_stomp (table)); + if (flags & DF_DICT_INDEX) { - tab_joint_text (t, 1, r, 2, r, TAB_LEFT, "Custom attributes:"); - r++; + char s[INT_BUFSIZE_BOUND (size_t)]; - display_attributes (t, var_get_attributes (v), flags, 1, r); - r += n_attrs; + sprintf (s, "%zu", var_get_dict_index (v) + 1); + table = table_hpaste (table, table_from_string (0, s)); } - /* Draw a line below the last row of information on this variable. */ - tab_hline (t, TAL_1, 0, tab_nc (t) - 1, r); - - return r; + return table; } /* Display a list of vectors. If SORTED is nonzero then they are @@ -690,3 +751,375 @@ display_vectors (const struct dictionary *dict, int sorted) free (vl); } + +/* Encoding analysis. */ + +static const char *encoding_names[] = { + /* These encodings are from http://encoding.spec.whatwg.org/, as retrieved + February 2014. Encodings not supported by glibc and encodings relevant + only to HTML have been removed. */ + "utf-8", + "windows-1252", + "iso-8859-2", + "iso-8859-3", + "iso-8859-4", + "iso-8859-5", + "iso-8859-6", + "iso-8859-7", + "iso-8859-8", + "iso-8859-10", + "iso-8859-13", + "iso-8859-14", + "iso-8859-16", + "macintosh", + "windows-874", + "windows-1250", + "windows-1251", + "windows-1253", + "windows-1254", + "windows-1255", + "windows-1256", + "windows-1257", + "windows-1258", + "koi8-r", + "koi8-u", + "ibm866", + "gb18030", + "big5", + "euc-jp", + "iso-2022-jp", + "shift_jis", + "euc-kr", + + /* Added by user request. */ + "ibm850", + "din_66003", +}; +#define N_ENCODING_NAMES (sizeof encoding_names / sizeof *encoding_names) + +struct encoding + { + uint64_t encodings; + char **utf8_strings; + unsigned int hash; + }; + +static char ** +recode_strings (struct pool *pool, + char **strings, bool *ids, size_t n, + const char *encoding) +{ + char **utf8_strings; + size_t i; + + utf8_strings = pool_alloc (pool, n * sizeof *utf8_strings); + for (i = 0; i < n; i++) + { + struct substring utf8; + int error; + + error = recode_pedantically ("UTF-8", encoding, ss_cstr (strings[i]), + pool, &utf8); + if (!error) + { + ss_rtrim (&utf8, ss_cstr (" ")); + utf8.string[utf8.length] = '\0'; + + if (ids[i] && !id_is_plausible (utf8.string, false)) + error = EINVAL; + } + + if (error) + return NULL; + + utf8_strings[i] = utf8.string; + } + + return utf8_strings; +} + +static struct encoding * +find_duplicate_encoding (struct encoding *encodings, size_t n_encodings, + char **utf8_strings, size_t n_strings, + unsigned int hash) +{ + struct encoding *e; + + for (e = encodings; e < &encodings[n_encodings]; e++) + { + int i; + + if (e->hash != hash) + goto next_encoding; + + for (i = 0; i < n_strings; i++) + if (strcmp (utf8_strings[i], e->utf8_strings[i])) + goto next_encoding; + + return e; + next_encoding:; + } + + return NULL; +} + +static bool +all_equal (const struct encoding *encodings, size_t n_encodings, + size_t string_idx) +{ + const char *s0; + size_t i; + + s0 = encodings[0].utf8_strings[string_idx]; + for (i = 1; i < n_encodings; i++) + if (strcmp (s0, encodings[i].utf8_strings[string_idx])) + return false; + + return true; +} + +static int +equal_prefix (const struct encoding *encodings, size_t n_encodings, + size_t string_idx) +{ + const char *s0; + size_t prefix; + size_t i; + + s0 = encodings[0].utf8_strings[string_idx]; + prefix = strlen (s0); + for (i = 1; i < n_encodings; i++) + { + const char *si = encodings[i].utf8_strings[string_idx]; + size_t j; + + for (j = 0; j < prefix; j++) + if (s0[j] != si[j]) + { + prefix = j; + if (!prefix) + return 0; + break; + } + } + + while (prefix > 0 && s0[prefix - 1] != ' ') + prefix--; + return prefix; +} + +static int +equal_suffix (const struct encoding *encodings, size_t n_encodings, + size_t string_idx) +{ + const char *s0; + size_t s0_len; + size_t suffix; + size_t i; + + s0 = encodings[0].utf8_strings[string_idx]; + s0_len = strlen (s0); + suffix = s0_len; + for (i = 1; i < n_encodings; i++) + { + const char *si = encodings[i].utf8_strings[string_idx]; + size_t si_len = strlen (si); + size_t j; + + if (si_len < suffix) + suffix = si_len; + for (j = 0; j < suffix; j++) + if (s0[s0_len - j - 1] != si[si_len - j - 1]) + { + suffix = j; + if (!suffix) + return 0; + break; + } + } + + while (suffix > 0 && s0[s0_len - suffix] != ' ') + suffix--; + return suffix; +} + +static void +report_encodings (const struct file_handle *h, const struct sfm_reader *r) +{ + char **titles; + char **strings; + bool *ids; + struct encoding encodings[N_ENCODING_NAMES]; + size_t n_encodings, n_strings, n_unique_strings; + size_t i, j; + struct tab_table *t; + struct text_item *text; + struct pool *pool; + size_t row; + + pool = pool_create (); + n_strings = sfm_get_strings (r, pool, &titles, &ids, &strings); + + n_encodings = 0; + for (i = 0; i < N_ENCODING_NAMES; i++) + { + char **utf8_strings; + struct encoding *e; + unsigned int hash; + + utf8_strings = recode_strings (pool, strings, ids, n_strings, + encoding_names[i]); + if (!utf8_strings) + continue; + + /* Hash utf8_strings. */ + hash = 0; + for (j = 0; j < n_strings; j++) + hash = hash_string (utf8_strings[j], hash); + + /* If there's a duplicate encoding, just mark it. */ + e = find_duplicate_encoding (encodings, n_encodings, + utf8_strings, n_strings, hash); + if (e) + { + e->encodings |= UINT64_C (1) << i; + continue; + } + + e = &encodings[n_encodings++]; + e->encodings = UINT64_C (1) << i; + e->utf8_strings = utf8_strings; + e->hash = hash; + } + if (!n_encodings) + { + msg (SW, _("No valid encodings found.")); + pool_destroy (pool); + return; + } + + text = text_item_create_format ( + TEXT_ITEM_PARAGRAPH, + _("The following table lists the encodings that can successfully read %s, " + "by specifying the encoding name on the GET command's ENCODING " + "subcommand. Encodings that yield identical text are listed " + "together."), fh_get_name (h)); + text_item_submit (text); + + t = tab_create (2, n_encodings + 1); + tab_title (t, _("Usable encodings for %s."), fh_get_name (h)); + tab_headers (t, 1, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, 1, n_encodings); + tab_hline (t, TAL_1, 0, 1, 1); + tab_text (t, 0, 0, TAB_RIGHT, "#"); + tab_text (t, 1, 0, TAB_LEFT, _("Encodings")); + for (i = 0; i < n_encodings; i++) + { + struct string s; + + ds_init_empty (&s); + for (j = 0; j < 64; j++) + if (encodings[i].encodings & (UINT64_C (1) << j)) + ds_put_format (&s, "%s, ", encoding_names[j]); + ds_chomp (&s, ss_cstr (", ")); + + tab_text_format (t, 0, i + 1, TAB_RIGHT, "%zu", i + 1); + tab_text (t, 1, i + 1, TAB_LEFT, ds_cstr (&s)); + ds_destroy (&s); + } + tab_submit (t); + + n_unique_strings = 0; + for (i = 0; i < n_strings; i++) + if (!all_equal (encodings, n_encodings, i)) + n_unique_strings++; + if (!n_unique_strings) + { + pool_destroy (pool); + return; + } + + text = text_item_create_format ( + TEXT_ITEM_PARAGRAPH, + _("The following table lists text strings in the file dictionary that " + "the encodings above interpret differently, along with those " + "interpretations.")); + text_item_submit (text); + + t = tab_create (3, (n_encodings * n_unique_strings) + 1); + tab_title (t, _("%s encoded text strings."), fh_get_name (h)); + tab_headers (t, 1, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, 2, n_encodings * n_unique_strings); + tab_hline (t, TAL_1, 0, 2, 1); + + tab_text (t, 0, 0, TAB_LEFT, _("Purpose")); + tab_text (t, 1, 0, TAB_RIGHT, "#"); + tab_text (t, 2, 0, TAB_LEFT, _("Text")); + + row = 1; + for (i = 0; i < n_strings; i++) + if (!all_equal (encodings, n_encodings, i)) + { + int prefix = equal_prefix (encodings, n_encodings, i); + int suffix = equal_suffix (encodings, n_encodings, i); + + tab_joint_text (t, 0, row, 0, row + n_encodings - 1, + TAB_LEFT, titles[i]); + tab_hline (t, TAL_1, 0, 2, row); + for (j = 0; j < n_encodings; j++) + { + const char *s = encodings[j].utf8_strings[i] + prefix; + + tab_text_format (t, 1, row, TAB_RIGHT, "%zu", j + 1); + if (prefix || suffix) + { + size_t len = strlen (s) - suffix; + struct string entry; + + ds_init_empty (&entry); + if (prefix) + ds_put_cstr (&entry, "..."); + ds_put_substring (&entry, ss_buffer (s, len)); + if (suffix) + ds_put_cstr (&entry, "..."); + tab_text (t, 2, row, TAB_LEFT, ds_cstr (&entry)); + } + else + tab_text (t, 2, row, TAB_LEFT, s); + row++; + } + } + tab_submit (t); + + pool_destroy (pool); +} + +static unsigned int +dict_display_mask (const struct dictionary *d) +{ + size_t n_vars = dict_get_var_cnt (d); + unsigned int mask; + size_t i; + + mask = DF_ALL & ~(DF_MEASURE | DF_ROLE | DF_ALIGNMENT | DF_WIDTH); + for (i = 0; i < n_vars; i++) + { + const struct variable *v = dict_get_var (d, i); + enum val_type val = var_get_type (v); + int width = var_get_width (v); + + if (var_get_measure (v) != var_default_measure (val)) + mask |= DF_MEASURE; + + if (var_get_role (v) != ROLE_INPUT) + mask |= DF_ROLE; + + if (var_get_alignment (v) != var_default_alignment (val)) + mask |= DF_ALIGNMENT; + + if (var_get_display_width (v) != var_default_display_width (width)) + mask |= DF_WIDTH; + } + + return mask; +}