discourage its use PSPP and PSPPIRE do not directly read or write
this format.
+ * New commands:
+
+ - SORT VARIABLES.
+
* The following functions for transformation expressions are new:
- REPLACE, for search-and-replace of one string with another.
* NUMERIC:: Create new numeric variables.
* PRINT FORMATS:: Set variable print formats.
* RENAME VARIABLES:: Rename variables.
+* SORT VARIABLES:: Reorder variables.
* VALUE LABELS:: Set value labels for variables.
* STRING:: Create new string variables.
* VARIABLE ATTRIBUTE:: Set custom attributes on variables.
@cmd{RENAME VARIABLES} may not be specified following @cmd{TEMPORARY}
(@pxref{TEMPORARY}).
+@node SORT VARIABLES
+@section SORT VARIABLES
+@vindex SORT VARIABLES
+
+@display
+SORT VARIABLES [BY]
+ (NAME | TYPE | FORMAT | LABEL | VALUES | MISSING | MEASURE
+ | ROLE | COLUMNS | ALIGNMENT | ATTRIBUTE @var{name})
+ [(D)].
+@end display
+
+@cmd{SORT VARIABLES} reorders the variables in the active dataset.
+The main specification is one of the following identifiers, which
+determines how the variables are sorted:
+
+@table @asis
+@item NAME
+Sorts the variables according to their names, in a case-insensitive
+fashion. However, when variable names differ only in a number at the
+end, they are sorted numerically. For example, @code{VAR5} is sorted
+before @code{VAR400} even though @samp{4} precedes @samp{5}.
+
+@item TYPE
+Sorts numeric variables before string variables, and shorter string
+variables before longer ones.
+
+@item FORMAT
+Groups variables by print format; within a format, sorts narrower
+formats before wider ones; with the same format and width, sorts fewer
+decimal places before more decimal places.
+@xref{FORMATS}.
+
+@item LABEL
+Sorts variables without a variable label before those with one.
+@xref{VARIABLE LABELS}.
+
+@item VALUES
+Sorts variables without value labels before those with some.
+@xref{VALUE LABELS}.
+
+@item MISSING
+Sorts variables without missing values before those with some.
+@xref{MISSING VALUES}.
+
+@item MEASURE
+Sorts nominal variables first, followed by ordinal variables, followed
+by scale variables. @xref{VARIABLE LEVEL}.
+
+@item ROLE
+Groups variables according to their role. @xref{VARIABLE ROLE}.
+
+@item COLUMNS
+Sorts variables in ascending display width. @xref{VARIABLE WIDTH}.
+
+@item ALIGNMENT
+Sorts variables according to their alignment, first left-aligned, then
+right-aligned, then centered. @xref{VARIABLE ALIGNMENT}.
+
+@item ATTRIBUTE @var{name}
+Sorts variables according to the first value of their @var{name}
+attribute. Variables without attribute are sorted first.
+@xref{VARIABLE ATTRIBUTE}.
+@end table
+
+Only one sort criterion can be specified. The sort is ``stable,'' so
+to sort on multiple criteria one may perform multiple sorts. For
+example, the following will sort primarily based on alignment, with
+variables that have the same alignment ordered based on display width:
+
+@example
+SORT VARIABLES BY COLUMNS.
+SORT VARIABLES BY ALIGNMENT.
+@end example
+
+Specify @code{(D)} to reverse the sort order.
+
@node VALUE LABELS
@section VALUE LABELS
@vindex VALUE LABELS
/* PSPP - a program for statistical analysis.
- Copyright (C) 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2011, 2012, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
case-insensitively, or a null pointer if SET does not contain
an attribute with that name. */
struct attribute *
-attrset_lookup (struct attrset *set, const char *name)
+attrset_lookup (const struct attrset *set, const char *name)
{
- struct attribute *attr;
+ const struct attribute *attr;
HMAP_FOR_EACH_WITH_HASH (attr, struct attribute, node,
utf8_hash_case_string (name, 0), &set->map)
if (!utf8_strcasecmp (attribute_get_name (attr), name))
break;
- return attr;
+ return CONST_CAST (struct attribute *, attr);
}
/* Adds ATTR to SET, which must not already contain an attribute
/* PSPP - a program for statistical analysis.
- Copyright (C) 2008, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2011, 2012, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
size_t attrset_count (const struct attrset *);
-struct attribute *attrset_lookup (struct attrset *, const char *);
+struct attribute *attrset_lookup (const struct attrset *, const char *);
void attrset_add (struct attrset *, struct attribute *);
void attrset_delete (struct attrset *, const char *);
void attrset_clear (struct attrset *);
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009, 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
DEF_CMD (S_DATA, 0, "SAVE", cmd_save)
DEF_CMD (S_DATA, 0, "SAVE TRANSLATE", cmd_save_translate)
DEF_CMD (S_DATA, 0, "SORT CASES", cmd_sort_cases)
+DEF_CMD (S_DATA, 0, "SORT VARIABLES", cmd_sort_variables)
DEF_CMD (S_DATA, 0, "T-TEST", cmd_t_test)
DEF_CMD (S_DATA, 0, "TEMPORARY", cmd_temporary)
DEF_CMD (S_DATA, 0, "USE", cmd_use)
src/language/dictionary/mrsets.c \
src/language/dictionary/numeric.c \
src/language/dictionary/rename-variables.c \
+ src/language/dictionary/sort-variables.c \
src/language/dictionary/split-file.c \
src/language/dictionary/split-file.h \
src/language/dictionary/sys-file-info.c \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+
+#include "data/attributes.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+
+#include "gl/xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+enum key
+ {
+ K_NAME,
+ K_TYPE,
+ K_FORMAT,
+ K_VAR_LABEL,
+ K_VALUE_LABELS,
+ K_MISSING_VALUES,
+ K_MEASURE,
+ K_ROLE,
+ K_COLUMNS,
+ K_ALIGNMENT,
+ K_ATTRIBUTE,
+ };
+
+struct criterion
+ {
+ enum key key;
+ char *attr_name;
+ bool descending;
+ };
+
+static int
+compare_ints (int a, int b)
+{
+ return a < b ? -1 : a > b;
+}
+
+static int
+compare_formats (const struct fmt_spec *a, const struct fmt_spec *b)
+{
+ int retval = compare_ints (fmt_to_io (a->type), fmt_to_io (b->type));
+ if (!retval)
+ retval = compare_ints (a->w, b->w);
+ if (!retval)
+ retval = compare_ints (a->d, b->d);
+ return retval;
+}
+
+static int
+compare_var_labels (const struct variable *a, const struct variable *b)
+{
+ const char *a_label = var_get_label (a);
+ const char *b_label = var_get_label (b);
+ return utf8_strcasecmp (a_label ? a_label : "",
+ b_label ? b_label : "");
+}
+
+static int
+map_measure (enum measure m)
+{
+ return (m == MEASURE_NOMINAL ? 0
+ : m == MEASURE_ORDINAL ? 1
+ : 2);
+}
+
+static int
+map_role (enum var_role r)
+{
+ return (r == ROLE_INPUT ? 0
+ : r == ROLE_TARGET ? 1
+ : r == ROLE_BOTH ? 2
+ : r == ROLE_NONE ? 3
+ : r == ROLE_PARTITION ? 4
+ : 5);
+}
+
+static const char *
+get_attribute (const struct variable *v, const char *name)
+{
+ const struct attrset *set = var_get_attributes (v);
+ const struct attribute *attr = attrset_lookup (set, name);
+ const char *value = attr ? attribute_get_value (attr, 0) : NULL;
+ return value ? value : "";
+}
+
+static int
+map_alignment (enum alignment a)
+{
+ return (a == ALIGN_LEFT ? 0
+ : a == ALIGN_RIGHT ? 1
+ : 2);
+}
+
+static int
+compare_vars (const void *a_, const void *b_, const void *c_)
+{
+ const struct variable *const *ap = a_;
+ const struct variable *const *bp = b_;
+ const struct variable *a = *ap;
+ const struct variable *b = *bp;
+ const struct criterion *c = c_;
+
+ int retval;
+ switch (c->key)
+ {
+ case K_NAME:
+ retval = utf8_strverscasecmp (var_get_name (a), var_get_name (b));
+ break;
+
+ case K_TYPE:
+ retval = compare_ints (var_get_width (a), var_get_width (b));
+ break;
+
+ case K_FORMAT:
+ retval = compare_formats (var_get_print_format (a),
+ var_get_print_format (b));
+ break;
+
+ case K_VAR_LABEL:
+ retval = compare_var_labels (a, b);
+ break;
+
+ case K_VALUE_LABELS:
+ retval = compare_ints (var_has_value_labels (a),
+ var_has_value_labels (b));
+ break;
+
+ case K_MISSING_VALUES:
+ retval = compare_ints (var_has_missing_values (a),
+ var_has_missing_values (b));
+ break;
+
+ case K_MEASURE:
+ retval = compare_ints (map_measure (var_get_measure (a)),
+ map_measure (var_get_measure (b)));
+ break;
+
+ case K_ROLE:
+ retval = compare_ints (map_role (var_get_role (a)),
+ map_role (var_get_role (b)));
+ break;
+
+ case K_COLUMNS:
+ retval = compare_ints (var_get_display_width (a),
+ var_get_display_width (b));
+ break;
+
+ case K_ALIGNMENT:
+ retval = compare_ints (map_alignment (var_get_alignment (a)),
+ map_alignment (var_get_alignment (b)));
+ break;
+
+ case K_ATTRIBUTE:
+ retval = utf8_strcasecmp (get_attribute (a, c->attr_name),
+ get_attribute (b, c->attr_name));
+ break;
+
+ default:
+ NOT_REACHED ();
+ }
+
+ /* Make this a stable sort. */
+ if (!retval)
+ retval = a < b ? -1 : a > b;
+
+ if (c->descending)
+ retval = -retval;
+
+ return retval;
+}
+
+/* Performs SORT VARIABLES command. */
+int
+cmd_sort_variables (struct lexer *lexer, struct dataset *ds)
+{
+ enum cmd_result result = CMD_FAILURE;
+
+ lex_match (lexer, T_BY);
+
+ /* Parse sort key. */
+ struct criterion c = { .attr_name = NULL };
+ if (lex_match_id (lexer, "NAME"))
+ c.key = K_NAME;
+ else if (lex_match_id (lexer, "TYPE"))
+ c.key = K_TYPE;
+ else if (lex_match_id (lexer, "FORMAT"))
+ c.key = K_FORMAT;
+ else if (lex_match_id (lexer, "LABEL"))
+ c.key = K_VAR_LABEL;
+ else if (lex_match_id (lexer, "VALUES"))
+ c.key = K_VALUE_LABELS;
+ else if (lex_match_id (lexer, "MISSING"))
+ c.key = K_MISSING_VALUES;
+ else if (lex_match_id (lexer, "MEASURE"))
+ c.key = K_MEASURE;
+ else if (lex_match_id (lexer, "ROLE"))
+ c.key = K_ROLE;
+ else if (lex_match_id (lexer, "COLUMNS"))
+ c.key = K_COLUMNS;
+ else if (lex_match_id (lexer, "ALIGNMENT"))
+ c.key = K_ALIGNMENT;
+ else if (lex_match_id (lexer, "ATTRIBUTE"))
+ {
+ if (!lex_force_id (lexer))
+ goto exit;
+ c.key = K_ATTRIBUTE;
+ c.attr_name = xstrdup (lex_tokcstr (lexer));
+ lex_get (lexer);
+ }
+
+ /* Parse sort direction. */
+ if (lex_match (lexer, T_LPAREN))
+ {
+ if (lex_match_id (lexer, "A") || lex_match_id (lexer, "UP"))
+ c.descending = false;
+ else if (lex_match_id (lexer, "D") || lex_match_id (lexer, "DOWN"))
+ c.descending = true;
+ else
+ {
+ lex_error (lexer, NULL);
+ goto exit;
+ }
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto exit;
+ }
+ else
+ c.descending = false;
+
+ /* Sort variables. */
+ struct dictionary *d = dataset_dict (ds);
+ struct variable **vars;
+ size_t n_vars;
+ dict_get_vars_mutable (d, &vars, &n_vars, 0);
+ sort (vars, n_vars, sizeof *vars, compare_vars, &c);
+ dict_reorder_vars (d, CONST_CAST (struct variable *const *, vars), n_vars);
+ free (vars);
+
+ result = CMD_SUCCESS;
+
+exit:
+ free (c.attr_name);
+ return result;
+}
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "libpspp/str.h"
#include "libpspp/version.h"
+#include "gl/c-ctype.h"
#include "gl/c-strcase.h"
#include "gl/localcharset.h"
#include "gl/minmax.h"
return result;
}
+static bool
+is_all_digits (const uint8_t *s, size_t len)
+{
+ for (size_t i = 0; i < len; i++)
+ if (!c_isdigit (s[i]))
+ return false;
+ return true;
+}
+
+/* Compares UTF-8 strings A and B case-insensitively. If the strings end in a
+ number, then they are compared numerically. Returns a negative value if A <
+ B, zero if A == B, positive if A > B. */
+int
+utf8_strverscasecmp (const char *a, const char *b)
+{
+ /* Normalize A. */
+ uint8_t a_stub[64];
+ size_t a_len = sizeof a_stub;
+ uint8_t *a_norm = u8_casefold (CHAR_CAST (uint8_t *, a), strlen (a), NULL,
+ UNINORM_NFKD, a_stub, &a_len);
+
+ /* Normalize B. */
+ uint8_t b_stub[64];
+ size_t b_len = sizeof b_stub;
+ uint8_t *b_norm = u8_casefold (CHAR_CAST (uint8_t *, b), strlen (b), NULL,
+ UNINORM_NFKD, b_stub, &b_len);
+
+ int result;
+ if (!a_norm || !b_norm)
+ {
+ result = strcmp (a, b);
+ goto exit;
+ }
+
+ size_t len = MIN (a_len, b_len);
+ for (size_t i = 0; i < len; i++)
+ if (a_norm[i] != b_norm[i])
+ {
+ /* If both strings end in digits, compare them numerically. */
+ if (is_all_digits (&a_norm[i], a_len - i)
+ && is_all_digits (&b_norm[i], b_len - i))
+ {
+ /* Start by stripping leading zeros, since those don't matter for
+ numerical comparison. */
+ size_t ap, bp;
+ for (ap = i; ap < a_len; ap++)
+ if (a_norm[ap] != '0')
+ break;
+ for (bp = i; bp < b_len; bp++)
+ if (b_norm[bp] != '0')
+ break;
+
+ /* The number with more digits, if there is one, is larger. */
+ size_t a_digits = a_len - ap;
+ size_t b_digits = b_len - bp;
+ if (a_digits != b_digits)
+ result = a_digits > b_digits ? 1 : -1;
+ else
+ result = memcmp (&a_norm[ap], &b_norm[bp], a_digits);
+ }
+ else
+ result = a_norm[i] > b_norm[i] ? 1 : -1;
+ goto exit;
+ }
+ result = a_len < b_len ? -1 : a_len > b_len;
+
+exit:
+ if (a_norm != a_stub)
+ free (a_norm);
+ if (b_norm != b_stub)
+ free (b_norm);
+ return result;
+}
+
static char *
utf8_casemap (const char *s,
uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2010, 2011, 2012, 2014, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
unsigned int utf8_hash_case_string (const char *, unsigned int basis);
int utf8_strcasecmp (const char *, const char *);
int utf8_strncasecmp (const char *, size_t, const char *, size_t);
+int utf8_strverscasecmp (const char *, const char *);
char *utf8_to_upper (const char *);
char *utf8_to_lower (const char *);
\f
tests/language/dictionary/missing-values.at \
tests/language/dictionary/mrsets.at \
tests/language/dictionary/rename-variables.at \
+ tests/language/dictionary/sort-variables.at \
tests/language/dictionary/split-file.at \
tests/language/dictionary/sys-file-info.at \
tests/language/dictionary/value-labels.at \