updates to gnumeric-reader.c.
* dictionary.c (make_hinted_name): New function.
(make_numeric_name): New function.
(dict_make_unique_var_name): New function.
* gnumeric-reader.c (devise_name): Removed.
(munge_name): Removed.
(gnumeric_open_reader): Use new function
dict_make_unique_var_name.
* short-names.c (set_var_short_name_suffix): Use new function
str_format_26adic.
+2008-02-01 Ben Pfaff <blp@gnu.org>
+
+ Patch #6386. Thanks to John Darrington for review and for the
+ updates to gnumeric-reader.c.
+
+ * dictionary.c (make_hinted_name): New function.
+ (make_numeric_name): New function.
+ (dict_make_unique_var_name): New function.
+
+ * gnumeric-reader.c (devise_name): Removed.
+ (munge_name): Removed.
+ (gnumeric_open_reader): Use new function
+ dict_make_unique_var_name.
+
+ * short-names.c (set_var_short_name_suffix): Use new function
+ str_format_26adic.
+
2008-01-19 John Darrington <john@darrington.wattle.id.au>
* settings.c settings.h: Moved static variables into a
#include "case.h"
#include "category.h"
+#include "identifier.h"
#include "settings.h"
#include "value-labels.h"
#include "vardict.h"
#include "variable.h"
#include "vector.h"
#include <libpspp/array.h>
+#include <libpspp/assertion.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
#include <libpspp/message.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
+#include "intprops.h"
#include "minmax.h"
#include "xalloc.h"
return true;
}
+static bool
+make_hinted_name (const struct dictionary *dict, const char *hint,
+ char name[VAR_NAME_LEN + 1])
+{
+ bool dropped = false;
+ char *cp;
+
+ for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++)
+ {
+ if (cp == name
+ ? lex_is_id1 (*hint) && *hint != '$'
+ : lex_is_idn (*hint))
+ {
+ if (dropped)
+ {
+ *cp++ = '_';
+ dropped = false;
+ }
+ if (cp < name + VAR_NAME_LEN)
+ *cp++ = *hint;
+ }
+ else if (cp > name)
+ dropped = true;
+ }
+ *cp = '\0';
+
+ if (name[0] != '\0')
+ {
+ size_t len = strlen (name);
+ unsigned long int i;
+
+ if (dict_lookup_var (dict, name) == NULL)
+ return true;
+
+ for (i = 0; i < ULONG_MAX; i++)
+ {
+ char suffix[INT_BUFSIZE_BOUND (i) + 1];
+ int ofs;
+
+ suffix[0] = '_';
+ if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1))
+ NOT_REACHED ();
+
+ ofs = MIN (VAR_NAME_LEN - strlen (suffix), len);
+ strcpy (&name[ofs], suffix);
+
+ if (dict_lookup_var (dict, name) == NULL)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+make_numeric_name (const struct dictionary *dict, unsigned long int *num_start,
+ char name[VAR_NAME_LEN + 1])
+{
+ unsigned long int number;
+
+ for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
+ number < ULONG_MAX;
+ number++)
+ {
+ sprintf (name, "VAR%03lu", number);
+ if (dict_lookup_var (dict, name) == NULL)
+ {
+ if (num_start != NULL)
+ *num_start = number + 1;
+ return true;
+ }
+ }
+
+ if (num_start != NULL)
+ *num_start = ULONG_MAX;
+ return false;
+}
+
+
+/* Attempts to devise a variable name unique within DICT.
+ Returns true if successful, in which case the new variable
+ name is stored into NAME. Returns false if all names that can
+ be generated have already been taken. (Returning false is
+ quite unlikely: at least ULONG_MAX unique names can be
+ generated.)
+
+ HINT, if it is non-null, is used as a suggestion that will be
+ modified for suitability as a variable name and for
+ uniqueness.
+
+ If HINT is null or entirely unsuitable, a name in the form
+ "VAR%03d" will be generated, where the smallest unused integer
+ value is used. If NUM_START is non-null, then its value is
+ used as the minimum numeric value to check, and it is updated
+ to the next value to be checked.
+ */
+bool
+dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
+ unsigned long int *num_start,
+ char name[VAR_NAME_LEN + 1])
+{
+ return ((hint != NULL && make_hinted_name (dict, hint, name))
+ || make_numeric_name (dict, num_start, name));
+}
+
/* Returns the weighting variable in dictionary D, or a null
pointer if the dictionary is unweighted. */
struct variable *
void dict_reorder_vars (struct dictionary *,
struct variable *const *, size_t count);
-/* Changing variable names. */
+/* Variable names. */
void dict_rename_var (struct dictionary *, struct variable *, const char *);
bool dict_rename_vars (struct dictionary *,
struct variable **, char **new_names,
size_t count, char **err_name);
+bool dict_make_unique_var_name (const struct dictionary *, const char *hint,
+ unsigned long int *num_start,
+ char name[]);
/* Weight variable. */
double dict_get_case_weight (const struct dictionary *,
}
-
-/*
- Change SUGGESTION until it's a valid name that can be added to DICT.
-*/
-static void
-devise_name (const struct dictionary *dict, struct string *name, int *x)
-{
- struct string basename;
- if ( ds_is_empty (name))
- ds_init_cstr (&basename, "var");
- else
- ds_init_string (&basename, name);
- do
- {
- ds_clear (name);
- ds_put_format (name, "%s%d", ds_cstr (&basename), ++(*x));
- }
- while (NULL != dict_lookup_var (dict, ds_cstr (name)) );
-
- ds_destroy (&basename);
-}
-
-/*
- Mutate NAME of a variable, which is gauranteed to be valid for the
- dictionary DICT.
-*/
-static void
-munge_name (const struct dictionary *dict, struct string *name)
-{
- int x = 0;
-
- if (! ds_is_empty (name))
- {
- /* Change all the invalid characters to valid ones */
- char *s;
-
- s = ds_data (name);
-
- if ( !lex_is_id1 (*s))
- *s = '@';
-
- s++;
-
- while (s < ds_data (name) + ds_length (name))
- {
- if ( !lex_is_idn (*s))
- *s = '_';
- s++;
- }
-
- assert (var_is_valid_name (ds_cstr (name), false));
- }
-
- while (ds_is_empty (name) || NULL != dict_lookup_var (dict, ds_cstr (name)) )
- {
- devise_name (dict, name, &x);
- }
-}
-
-
/*
Sets the VAR of case C, to the value corresponding to the xml string XV
*/
struct casereader *
gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
{
+ unsigned long int vstart = 0;
int ret;
casenumber n_cases = CASENUMBER_MAX;
int i;
for (i = 0 ; i < n_var_specs ; ++i )
{
- struct string name;
+ char name[VAR_NAME_LEN + 1];
- /* Probably no data exists for this variable, so allocate a default width */
+ /* Probably no data exists for this variable, so allocate a
+ default width */
if ( var_spec[i].width == -1 )
var_spec[i].width = MAX_SHORT_STRING;
r->value_cnt += value_cnt_from_width (var_spec[i].width);
- if (var_spec[i].name)
- ds_init_cstr (&name, var_spec[i].name);
- else
- ds_init_empty (&name);
-
- munge_name (r->dict, &name);
-
-
- dict_create_var (r->dict, ds_cstr (&name), var_spec[i].width);
+ if ( ! dict_make_unique_var_name (r->dict, var_spec[i].name,
+ &vstart, name))
+ {
+ msg (ME, _("Cannot create variable name from %s"), var_spec[i].name);
+ goto error;
+ }
- ds_destroy (&name);
+ dict_create_var (r->dict, name, var_spec[i].width);
}
/* Create the first case, and cache it */
{
char suffix[SHORT_NAME_LEN + 1];
char short_name[SHORT_NAME_LEN + 1];
- char *start, *end;
int len, ofs;
assert (suffix_number >= 0);
var_set_short_name (v, i, base);
/* Compose suffix. */
- start = end = suffix + sizeof suffix - 1;
- *end = '\0';
- do
- {
- *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26];
- if (start <= suffix + 1)
- msg (SE, _("Variable suffix too large."));
- suffix_number /= 26;
- }
- while (suffix_number > 0);
- *--start = '_';
+ suffix[0] = '_';
+ if (!str_format_26adic (suffix_number, &suffix[1], sizeof suffix - 1))
+ msg (SE, _("Variable suffix too large."));
+ len = strlen (suffix);
/* Append suffix to V's short name. */
str_copy_trunc (short_name, sizeof short_name, base);
- len = end - start;
- if (len + strlen (short_name) > SHORT_NAME_LEN)
+ if (strlen (short_name) + len > SHORT_NAME_LEN)
ofs = SHORT_NAME_LEN - len;
else
ofs = strlen (short_name);
- strcpy (short_name + ofs, start);
+ strcpy (short_name + ofs, suffix);
/* Set name. */
var_set_short_name (v, i, short_name);
if (trial == 0)
var_set_short_name (v, i, var_get_name (v));
else
- set_var_short_name_suffix (v, i, var_get_name (v), trial - 1);
+ set_var_short_name_suffix (v, i, var_get_name (v), trial);
if (hsh_insert (short_names, (char *) var_get_short_name (v, i)) == NULL)
break;
+2008-02-01 Ben Pfaff <blp@gnu.org>
+
+ Patch #6386. Thanks to John Darrington for review.
+
+ * str.c (str_format_26adic): New function.
+
2007-12-24 John Darrington <john@darrington.wattle.id.au>
* taint.c (taint_destroy): Return true if pointer is null.
*s = tolower ((unsigned char) *s);
}
+/* Converts NUMBER into a string in 26-adic notation in BUFFER,
+ which has room for SIZE bytes. Returns true if successful,
+ false if NUMBER, plus a trailing null, is too large to fit in
+ the available space.
+
+ 26-adic notation is "spreadsheet column numbering": 1 = A, 2 =
+ B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ...
+
+ 26-adic notation is the special case of a k-adic numeration
+ system (aka bijective base-k numeration) with k=26. In k-adic
+ numeration, the digits are {1, 2, 3, ..., k} (there is no
+ digit 0), and integer 0 is represented by the empty string.
+ For more information, see
+ http://en.wikipedia.org/wiki/Bijective_numeration. */
+bool
+str_format_26adic (unsigned long int number, char buffer[], size_t size)
+{
+ size_t length = 0;
+
+ while (number-- > 0)
+ {
+ if (length >= size)
+ return false;
+ buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26];
+ number /= 26;
+ }
+
+ if (length >= size)
+ return false;
+ buffer[length] = '\0';
+
+ buf_reverse (buffer, length);
+ return true;
+}
+
/* Formats FORMAT into DST, as with sprintf(), and returns the
address of the terminating null written to DST. */
char *
void str_uppercase (char *);
void str_lowercase (char *);
+bool str_format_26adic (unsigned long int number, char buffer[], size_t);
+
char *spprintf (char *dst, const char *format, ...);
void *mempset (void *, int, size_t);
+2008-02-01 Ben Pfaff <blp@gnu.org>
+
+ * automake.mk: Add new test.
+
+ * libpspp/str-test.c: New test.
+
+ * command/get-dat-gnm.sh: Update variable names to match new
+ naming scheme.
+
2007-12-04 Ben Pfaff <blp@gnu.org>
* automake.mk: Add new tests.
tests/libpspp/range-map-test \
tests/libpspp/range-set-test \
tests/libpspp/sparse-array-test \
+ tests/libpspp/str-test \
tests/libpspp/tower-test
TESTS = $(dist_TESTS) $(nodist_TESTS)
tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@
tests_libpspp_range_set_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+tests_libpspp_str_test_SOURCES = \
+ tests/libpspp/str-test.c
+tests_libpspp_str_test_LDADD = src/libpspp/libpspp.a gl/libgl.la @LIBINTL@
+
tests_libpspp_tower_test_SOURCES = \
src/libpspp/abt.c \
src/libpspp/abt.h \
+--------+-------------------------------------------+--------+
|Variable|Description |Position|
#========#===========================================#========#
-|var1 |Format: F8.2 | 1|
+|VAR001 |Format: F8.2 | 1|
| |Measure: Scale | |
| |Display Alignment: Right | |
| |Display Width: 8 | |
+--------+-------------------------------------------+--------+
-|var2 |Format: A8 | 2|
+|VAR002 |Format: A8 | 2|
| |Measure: Nominal | |
| |Display Alignment: Left | |
| |Display Width: 8 | |
+--------+-------------------------------------------+--------+
-|var3 |Format: F8.2 | 3|
+|VAR003 |Format: F8.2 | 3|
| |Measure: Scale | |
| |Display Alignment: Right | |
| |Display Width: 8 | |
+--------+-------------------------------------------+--------+
- var1 var2 var3
+ VAR001 VAR002 VAR003
-------- -------- --------
.00 fred 20.00
1.00 11 21.00
| |Display Alignment: Left | |
| |Display Width: 8 | |
+--------+-------------------------------------------+--------+
-|var1 |Format: F8.2 | 3|
+|VAR001 |Format: F8.2 | 3|
| |Measure: Scale | |
| |Display Alignment: Right | |
| |Display Width: 8 | |
+--------+-------------------------------------------+--------+
- V1 V2 var1
+ V1 V2 VAR001
-------- -------- --------
.00 fred 20.00
1.00 11 21.00
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <libpspp/str.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+\f
+/* Currently running test. */
+static const char *test_name;
+
+/* Exit with a failure code.
+ (Place a breakpoint on this function while debugging.) */
+static void
+check_die (void)
+{
+ exit (EXIT_FAILURE);
+}
+\f
+static void
+check_26adic (unsigned long int number, const char *expected_string)
+{
+ char string[8];
+ str_format_26adic (number, string, sizeof string);
+ if (strcmp (string, expected_string))
+ {
+ printf ("base-26 of %lu: expected \"%s\", got \"%s\"\n",
+ number, expected_string, string);
+ check_die ();
+ }
+}
+
+static void
+test_str_format_26adic (void)
+{
+ check_26adic (0, "");
+ check_26adic (1, "A");
+ check_26adic (2, "B");
+ check_26adic (26, "Z");
+ check_26adic (27, "AA");
+ check_26adic (28, "AB");
+ check_26adic (29, "AC");
+ check_26adic (18278, "ZZZ");
+ check_26adic (18279, "AAAA");
+ check_26adic (19010, "ABCD");
+}
+\f
+/* Main program. */
+
+/* Runs TEST_FUNCTION and prints a message about NAME. */
+static void
+run_test (void (*test_function) (void), const char *name)
+{
+ test_name = name;
+ putchar ('.');
+ fflush (stdout);
+ test_function ();
+}
+
+int
+main (void)
+{
+ run_test (test_str_format_26adic, "format 26-adic strings");
+ putchar ('\n');
+
+ return 0;
+}