From 814952415c6322e9ab08376426fb99d851765e20 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 23 May 2019 10:05:06 +0200 Subject: [PATCH] Matrix reader: Remove reliance on hash functions. This code made inappropriate use of hash functions to identify uniqueness in sets of values. This code removes that code, and compares them explicitly. Partial fix for bug #56363 --- src/language/data-io/matrix-reader.c | 40 +++++++++++++++------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c index c0cee04743..3a7e6ef056 100644 --- a/src/language/data-io/matrix-reader.c +++ b/src/language/data-io/matrix-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2017 Free Software Foundation, Inc. + Copyright (C) 2017, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,12 +20,14 @@ #include -#include #include +#include #include #include #include #include +#include +#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -77,6 +79,7 @@ s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2 struct matrix_reader { + const struct dictionary *dict; const struct variable *varname; const struct variable *rowtype; struct casegrouper *grouper; @@ -93,6 +96,7 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas struct matrix_reader *mr = xzalloc (sizeof *mr); mr->varname = dict_lookup_var (dict, "varname_"); + mr->dict = dict; if (mr->varname == NULL) { msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_"); @@ -209,18 +213,10 @@ next_matrix_from_reader (struct matrix_material *mm, mm->mean_matrix = mr->mean_vectors; mm->var_matrix = mr->var_vectors; - // FIXME: Make this into a hash table. - unsigned long *table = xmalloc (sizeof (*table) * n_vars); - int i; - for (i = 0; i < n_vars; ++i) + struct substring *var_names = xcalloc (n_vars, sizeof *var_names); + for (int i = 0; i < n_vars; ++i) { - const int w = var_get_width (mr->varname); - char s[w]; - memset (s, 0, w); - const char *name = var_get_name (vars[i]); - strncpy (s, name, w); - unsigned long h = hash_bytes (s, w, 0); - table[i] = h; + ss_alloc_substring (var_names + i, ss_cstr (var_get_name (vars[i]))); } struct ccase *c; @@ -243,20 +239,26 @@ next_matrix_from_reader (struct matrix_material *mm, gsl_matrix_set (mr->var_vectors, row, col, x * x); } + const char *enc = dict_get_encoding (mr->dict); + const union value *uvv = case_data (c, mr->varname); - const uint8_t *vs = value_str (uvv, var_get_width (mr->varname)); int w = var_get_width (mr->varname); - unsigned long h = hash_bytes (vs, w, 0); + + struct fmt_spec fmt = {FMT_A, 0, 0}; + fmt.w = w; + char *vname = data_out (uvv, enc, &fmt); + struct substring the_name = ss_cstr (vname); int mrow = -1; - for (i = 0; i < n_vars; ++i) + for (int i = 0; i < n_vars; ++i) { - if (table[i] == h) + if (ss_equals (var_names[i], the_name)) { mrow = i; break; } } + free (vname); if (mrow == -1) continue; @@ -273,7 +275,9 @@ next_matrix_from_reader (struct matrix_material *mm, casereader_destroy (group); - free (table); + for (int i = 0; i < n_vars; ++i) + ss_dealloc (var_names + i); + free (var_names); return true; } -- 2.30.2