From: Ben Pfaff Date: Sun, 21 Jul 2019 02:18:53 +0000 (-0700) Subject: value: Drop distinction between long and short string values. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dc331e08118a1c299a41069f6f51183176b27621;p=pspp value: Drop distinction between long and short string values. The time has passed when this is a super valuable distinction. There is basically no distinction in the language itself anymore, the efficiency gains sound pretty marginal, and the differences lead to bugs, both correctness- and memory leak-wise, that are hard to test for (because the developer must make sure to test both short and long strings everywhere). This commit changes the code so that both short and long strings are represented the way that long strings were previously. --- diff --git a/src/data/case.c b/src/data/case.c index 7676922398..60704185a6 100644 --- a/src/data/case.c +++ b/src/data/case.c @@ -110,7 +110,7 @@ case_get_cost (const struct caseproto *proto) { /* FIXME: improve approximation? */ return (1 + caseproto_get_n_widths (proto) - + 3 * caseproto_get_n_long_strings (proto)) * sizeof (union value); + + 3 * caseproto_get_n_strings (proto)) * sizeof (union value); } /* Changes the prototype for case C, which must not be shared. @@ -206,7 +206,7 @@ case_copy (struct ccase *dst, size_t dst_idx, if (dst != src) { - if (!dst->proto->n_long_strings || !src->proto->n_long_strings) + if (!dst->proto->n_strings || !src->proto->n_strings) memcpy (&dst->values[dst_idx], &src->values[src_idx], sizeof dst->values[0] * n_values); else @@ -214,7 +214,7 @@ case_copy (struct ccase *dst, size_t dst_idx, } else if (dst_idx != src_idx) { - if (!dst->proto->n_long_strings) + if (!dst->proto->n_strings) memmove (&dst->values[dst_idx], &src->values[src_idx], sizeof dst->values[0] * n_values); else if (dst_idx < src_idx) diff --git a/src/data/caseproto.c b/src/data/caseproto.c index d62af283de..f990c127aa 100644 --- a/src/data/caseproto.c +++ b/src/data/caseproto.c @@ -27,13 +27,13 @@ #include "gl/minmax.h" static struct caseproto *caseproto_unshare (struct caseproto *); -static bool try_init_long_strings (const struct caseproto *, +static bool try_init_strings (const struct caseproto *, size_t first, size_t last, union value[]); -static void init_long_strings (const struct caseproto *, +static void init_strings (const struct caseproto *, size_t first, size_t last, union value[]); -static void destroy_long_strings (const struct caseproto *, +static void destroy_strings (const struct caseproto *, size_t first, size_t last, union value[]); -static size_t count_long_strings (const struct caseproto *, +static size_t count_strings (const struct caseproto *, size_t idx, size_t count); /* Returns the number of bytes to allocate for a struct caseproto @@ -53,8 +53,8 @@ caseproto_create (void) enum { N_ALLOCATE = 4 }; struct caseproto *proto = xmalloc (caseproto_size (N_ALLOCATE)); proto->ref_cnt = 1; - proto->long_strings = NULL; - proto->n_long_strings = 0; + proto->strings = NULL; + proto->n_strings = 0; proto->n_widths = 0; proto->allocated_widths = N_ALLOCATE; return proto; @@ -100,7 +100,7 @@ caseproto_add_width (struct caseproto *proto, int width) proto = caseproto_reserve (proto, proto->n_widths + 1); proto->widths[proto->n_widths++] = width; - proto->n_long_strings += count_long_strings (proto, proto->n_widths - 1, 1); + proto->n_strings += count_strings (proto, proto->n_widths - 1, 1); return proto; } @@ -117,9 +117,9 @@ caseproto_set_width (struct caseproto *proto, size_t idx, int width) proto = caseproto_reserve (proto, idx + 1); while (idx >= proto->n_widths) proto->widths[proto->n_widths++] = -1; - proto->n_long_strings -= count_long_strings (proto, idx, 1); + proto->n_strings -= count_strings (proto, idx, 1); proto->widths[idx] = width; - proto->n_long_strings += count_long_strings (proto, idx, 1); + proto->n_strings += count_strings (proto, idx, 1); return proto; } @@ -133,7 +133,7 @@ caseproto_insert_width (struct caseproto *proto, size_t before, int width) assert (before <= proto->n_widths); proto = caseproto_reserve (proto, proto->n_widths + 1); - proto->n_long_strings += value_needs_init (width); + proto->n_strings += value_needs_init (width); insert_element (proto->widths, proto->n_widths, sizeof *proto->widths, before); proto->widths[before] = width; @@ -150,7 +150,7 @@ caseproto_remove_widths (struct caseproto *proto, size_t idx, size_t cnt) assert (caseproto_range_is_valid (proto, idx, cnt)); proto = caseproto_unshare (proto); - proto->n_long_strings -= count_long_strings (proto, idx, cnt); + proto->n_strings -= count_strings (proto, idx, cnt); remove_range (proto->widths, proto->n_widths, sizeof *proto->widths, idx, cnt); proto->n_widths -= cnt; @@ -231,7 +231,7 @@ caseproto_equal (const struct caseproto *a, size_t a_start, bool caseproto_needs_init_values (const struct caseproto *proto) { - return proto->n_long_strings > 0; + return proto->n_strings > 0; } /* Initializes the values in VALUES as required by PROTO, by @@ -246,7 +246,7 @@ caseproto_needs_init_values (const struct caseproto *proto) void caseproto_init_values (const struct caseproto *proto, union value values[]) { - init_long_strings (proto, 0, proto->n_long_strings, values); + init_strings (proto, 0, proto->n_strings, values); } /* Like caseproto_init_values, but returns false instead of @@ -254,7 +254,7 @@ caseproto_init_values (const struct caseproto *proto, union value values[]) bool caseproto_try_init_values (const struct caseproto *proto, union value values[]) { - return try_init_long_strings (proto, 0, proto->n_long_strings, values); + return try_init_strings (proto, 0, proto->n_strings, values); } /* Initializes the data in VALUES that are in NEW but not in OLD, @@ -273,15 +273,15 @@ void caseproto_reinit_values (const struct caseproto *old, const struct caseproto *new, union value values[]) { - size_t old_n_long = old->n_long_strings; - size_t new_n_long = new->n_long_strings; + size_t old_n_long = old->n_strings; + size_t new_n_long = new->n_strings; expensive_assert (caseproto_is_conformable (old, new)); if (new_n_long > old_n_long) - init_long_strings (new, old_n_long, new_n_long, values); + init_strings (new, old_n_long, new_n_long, values); else if (new_n_long < old_n_long) - destroy_long_strings (old, new_n_long, old_n_long, values); + destroy_strings (old, new_n_long, old_n_long, values); } /* Frees the values in VALUES as required by PROTO, by calling @@ -293,7 +293,7 @@ caseproto_reinit_values (const struct caseproto *old, void caseproto_destroy_values (const struct caseproto *proto, union value values[]) { - destroy_long_strings (proto, 0, proto->n_long_strings, values); + destroy_strings (proto, 0, proto->n_strings, values); } /* Copies COUNT values, whose widths are given by widths in PROTO @@ -314,26 +314,25 @@ caseproto_copy (const struct caseproto *proto, size_t idx, size_t count, void caseproto_free__ (struct caseproto *proto) { - free (proto->long_strings); + free (proto->strings); free (proto); } void -caseproto_refresh_long_string_cache__ (const struct caseproto *proto_) +caseproto_refresh_string_cache__ (const struct caseproto *proto_) { struct caseproto *proto = CONST_CAST (struct caseproto *, proto_); size_t n, i; - assert (proto->long_strings == NULL); - assert (proto->n_long_strings > 0); + assert (proto->strings == NULL); + assert (proto->n_strings > 0); - proto->long_strings = xmalloc (proto->n_long_strings - * sizeof *proto->long_strings); + proto->strings = xmalloc (proto->n_strings * sizeof *proto->strings); n = 0; for (i = 0; i < proto->n_widths; i++) - if (proto->widths[i] > MAX_SHORT_STRING) - proto->long_strings[n++] = i; - assert (n == proto->n_long_strings); + if (proto->widths[i] > 0) + proto->strings[n++] = i; + assert (n == proto->n_strings); } static struct caseproto * @@ -349,27 +348,27 @@ caseproto_unshare (struct caseproto *old) else { new = old; - free (new->long_strings); + free (new->strings); } - new->long_strings = NULL; + new->strings = NULL; return new; } static bool -try_init_long_strings (const struct caseproto *proto, +try_init_strings (const struct caseproto *proto, size_t first, size_t last, union value values[]) { size_t i; - if (last > 0 && proto->long_strings == NULL) - caseproto_refresh_long_string_cache__ (proto); + if (last > 0 && proto->strings == NULL) + caseproto_refresh_string_cache__ (proto); for (i = first; i < last; i++) { - size_t idx = proto->long_strings[i]; + size_t idx = proto->strings[i]; if (!value_try_init (&values[idx], proto->widths[idx])) { - destroy_long_strings (proto, first, i, values); + destroy_strings (proto, first, i, values); return false; } } @@ -377,36 +376,36 @@ try_init_long_strings (const struct caseproto *proto, } static void -init_long_strings (const struct caseproto *proto, +init_strings (const struct caseproto *proto, size_t first, size_t last, union value values[]) { - if (!try_init_long_strings (proto, first, last, values)) + if (!try_init_strings (proto, first, last, values)) xalloc_die (); } static void -destroy_long_strings (const struct caseproto *proto, size_t first, size_t last, +destroy_strings (const struct caseproto *proto, size_t first, size_t last, union value values[]) { size_t i; - if (last > 0 && proto->long_strings == NULL) - caseproto_refresh_long_string_cache__ (proto); + if (last > 0 && proto->strings == NULL) + caseproto_refresh_string_cache__ (proto); for (i = first; i < last; i++) { - size_t idx = proto->long_strings[i]; + size_t idx = proto->strings[i]; value_destroy (&values[idx], proto->widths[idx]); } } static size_t -count_long_strings (const struct caseproto *proto, size_t idx, size_t count) +count_strings (const struct caseproto *proto, size_t idx, size_t count) { size_t n, i; n = 0; for (i = 0; i < count; i++) - n += proto->widths[idx + i] > MAX_SHORT_STRING; + n += proto->widths[idx + i] > 0; return n; } diff --git a/src/data/caseproto.h b/src/data/caseproto.h index a8468ddeed..207ecacedb 100644 --- a/src/data/caseproto.h +++ b/src/data/caseproto.h @@ -64,11 +64,10 @@ struct caseproto { size_t ref_cnt; /* Reference count. */ - /* Tracking of long string widths. Lazily maintained: when - 'long_strings' is null and 'n_long_strings' is nonzero, - the former must be regenerated. */ - size_t *long_strings; /* Array of indexes of long string widths. */ - size_t n_long_strings; /* Number of long string widths. */ + /* Tracking of string widths. Lazily maintained: when 'strings' is null + and 'n_strings' is nonzero, the former must be regenerated. */ + size_t *strings; /* Array of indexes of string widths. */ + size_t n_strings; /* Number of string widths. */ /* Widths. */ size_t n_widths; /* Number of widths. */ @@ -118,14 +117,13 @@ void caseproto_destroy_values (const struct caseproto *, union value[]); void caseproto_copy (const struct caseproto *, size_t idx, size_t count, union value *dst, const union value *src); -/* Inspecting the cache of long string widths. +/* Inspecting the cache of string widths. - (These functions are useful for allocating cases, which - requires allocating a block memory for each long string value - in the case.) */ -static inline size_t caseproto_get_n_long_strings (const struct caseproto *); -static inline size_t caseproto_get_long_string_idx (const struct caseproto *, - size_t idx1); + (These functions are useful for allocating cases, which requires allocating + a block of memory for each string value in the case.) */ +static inline size_t caseproto_get_n_strings (const struct caseproto *); +static inline size_t caseproto_get_string_idx (const struct caseproto *, + size_t idx1); /* For use in assertions. */ bool caseproto_range_is_valid (const struct caseproto *, @@ -182,30 +180,27 @@ caseproto_get_n_widths (const struct caseproto *proto) /* Inspecting the cache of long string widths. */ -void caseproto_refresh_long_string_cache__ (const struct caseproto *); +void caseproto_refresh_string_cache__ (const struct caseproto *); -/* Returns the number of long string widths in PROTO; that is, - the number of widths in PROTO that are greater than to - MAX_SHORT_STRING. */ +/* Returns the number of strings in PROTO. */ static inline size_t -caseproto_get_n_long_strings (const struct caseproto *proto) +caseproto_get_n_strings (const struct caseproto *proto) { - return proto->n_long_strings; + return proto->n_strings; } -/* Given long string width IDX1, returns a value IDX2 for which - caseproto_get_width(PROTO, IDX2) will return a value greater - than MAX_SHORT_STRING. IDX1 must be less than - caseproto_get_n_long_strings(PROTO), and IDX2 will be less - than caseproto_get_n_widths(PROTO). */ +/* Given string width IDX1, returns a value IDX2 for which + caseproto_get_width(PROTO, IDX2) will return a value greater than 0. IDX1 + must be less than caseproto_get_n_strings(PROTO), and IDX2 will be less than + caseproto_get_n_widths(PROTO). */ static inline size_t -caseproto_get_long_string_idx (const struct caseproto *proto, size_t idx1) +caseproto_get_string_idx (const struct caseproto *proto, size_t idx1) { - if (proto->long_strings == NULL) - caseproto_refresh_long_string_cache__ (proto); + if (proto->strings == NULL) + caseproto_refresh_string_cache__ (proto); - assert (idx1 < proto->n_long_strings); - return proto->long_strings[idx1]; + assert (idx1 < proto->n_strings); + return proto->strings[idx1]; } #endif /* data/caseproto.h */ diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index bbe58aecd6..b655f44073 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -507,6 +507,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) Missing values for long string variables are written in a separate record. */ + enum { MAX_SHORT_STRING = 8 }; if (width <= MAX_SHORT_STRING) { const struct missing_values *mv = var_get_missing_values (v); diff --git a/src/data/value.c b/src/data/value.c index ce80076235..1d27083552 100644 --- a/src/data/value.c +++ b/src/data/value.c @@ -201,16 +201,7 @@ value_needs_resize (int old_width, int new_width) { assert (val_type_from_width (old_width) == val_type_from_width (new_width)); - /* We need to call value_resize if either the new width is - longer than the old width (in which case the new characters - must be set to spaces) or if either width is a long string. - (We could omit resizing if both the old and new widths were - long and the new width was shorter, but we choose to do so - anyway in hopes of saving memory.) */ - return (old_width != new_width - && (new_width > old_width - || old_width > MAX_SHORT_STRING - || new_width > MAX_SHORT_STRING)); + return old_width != new_width; } /* Same as value_init, except that memory for VALUE (if @@ -223,8 +214,8 @@ value_needs_resize (int old_width, int new_width) void value_init_pool (struct pool *pool, union value *value, int width) { - if (width > MAX_SHORT_STRING) - value->long_string = pool_alloc_unaligned (pool, width); + if (width > 0) + value->s = pool_alloc_unaligned (pool, width); } /* Same as value_clone(), except that memory for VALUE (if necessary) is @@ -237,10 +228,10 @@ void value_clone_pool (struct pool *pool, union value *value, const union value *src, int width) { - if (width > MAX_SHORT_STRING) - value->long_string = pool_clone_unaligned (pool, src->long_string, width); + if (width > 0) + value->s = pool_clone_unaligned (pool, src->s, width); else - *value = *src; + value->f = src->f; } /* Same as value_resize, except that VALUE must have been @@ -256,12 +247,10 @@ value_resize_pool (struct pool *pool, union value *value, assert (value_is_resizable (value, old_width, new_width)); if (new_width > old_width) { - if (new_width > MAX_SHORT_STRING) - { - uint8_t *new_long_string = pool_alloc_unaligned (pool, new_width); - memcpy (new_long_string, value_str (value, old_width), old_width); - value->long_string = new_long_string; - } + uint8_t *new_string = pool_alloc_unaligned (pool, new_width); + memcpy (new_string, value_str (value, old_width), old_width); + value->s = new_string; + memset (value_str_rw (value, new_width) + old_width, ' ', new_width - old_width); } diff --git a/src/data/value.h b/src/data/value.h index 299187285a..4d51774608 100644 --- a/src/data/value.h +++ b/src/data/value.h @@ -25,30 +25,12 @@ #include #include "xalloc.h" -/* Maximum length of a "short" string, that is represented in - "union value" without a separate pointer. - - This is an implementation detail of the "union value" code. - There is little reason for client code to use it. */ -#define MAX_SHORT_STRING 8 - -/* A numeric or string value. - - The client is responsible for keeping track of the value's - width. - - This structure is semi-opaque: - - - If the value is a number, clients may access the 'f' - member directly. - - - Clients should not access other members directly. -*/ +/* A numeric or string value. The client is responsible for keeping track of + the value's width. */ union value { double f; - uint8_t short_string[MAX_SHORT_STRING]; - uint8_t *long_string; + uint8_t *s; }; static inline void value_init (union value *, int width); @@ -100,8 +82,8 @@ void value_resize_pool (struct pool *, union value *, static inline void value_init (union value *v, int width) { - if (width > MAX_SHORT_STRING) - v->long_string = xmalloc (width); + if (width > 0) + v->s = xmalloc (width); } /* Initializes V as a value of the given WIDTH, as with value_init(), and @@ -109,10 +91,10 @@ value_init (union value *v, int width) static inline void value_clone (union value *v, const union value *src, int width) { - if (width <= MAX_SHORT_STRING) - *v = *src; + if (width <= 0) + v->f = src->f; else - v->long_string = xmemdup (src->long_string, width); + v->s = xmemdup (src->s, width); } /* Returns true if a value of the given WIDTH actually needs to @@ -124,7 +106,7 @@ value_clone (union value *v, const union value *src, int width) static inline bool value_needs_init (int width) { - return width > MAX_SHORT_STRING; + return width > 0; } /* Same as value_init, except that failure to allocate memory @@ -133,10 +115,10 @@ value_needs_init (int width) static inline bool value_try_init (union value *v, int width) { - if (width > MAX_SHORT_STRING) + if (width > 0) { - v->long_string = malloc (width); - return v->long_string != NULL; + v->s = malloc (width); + return v->s != NULL; } else return true; @@ -147,8 +129,8 @@ value_try_init (union value *v, int width) static inline void value_destroy (union value *v, int width) { - if (width > MAX_SHORT_STRING) - free (v->long_string); + if (width > 0) + free (v->s); } /* Returns the numeric value in V, which must have width 0. */ @@ -169,7 +151,7 @@ static inline const uint8_t * value_str (const union value *v, int width) { assert (width > 0); - return (width > MAX_SHORT_STRING ? v->long_string : v->short_string); + return v->s; } /* Returns the string value in V, which must have width WIDTH. @@ -183,7 +165,7 @@ static inline uint8_t * value_str_rw (union value *v, int width) { assert (width > 0); - return (width > MAX_SHORT_STRING ? v->long_string : v->short_string); + return v->s; } /* Copies SRC to DST, given that they both contain data of the @@ -191,10 +173,10 @@ value_str_rw (union value *v, int width) static inline void value_copy (union value *dst, const union value *src, int width) { - if (width <= MAX_SHORT_STRING) - *dst = *src; - else if (dst != src) - memcpy (dst->long_string, src->long_string, width); + if (width <= 0) + dst->f = src->f; + else + memcpy (dst->s, src->s, width); } /* Exchanges the contents of A and B. */ diff --git a/src/ui/gui/value-variant.c b/src/ui/gui/value-variant.c index ce523ae14e..f332ccff11 100644 --- a/src/ui/gui/value-variant.c +++ b/src/ui/gui/value-variant.c @@ -40,17 +40,10 @@ value_variant_new (const union value *in, int width) if (width == 0) vv[IDX_DATA] = g_variant_new_double (in->f); - else if (width <= MAX_SHORT_STRING) - { - char xx[MAX_SHORT_STRING + 1]; - memset (xx, '\0', MAX_SHORT_STRING + 1); - memcpy (xx, in->short_string, width); - vv[IDX_DATA] = g_variant_new_bytestring (xx); - } else { gchar *q = xmalloc (width + 1); - memcpy (q, in->long_string, width); + memcpy (q, in->s, width); q[width] = '\0'; vv[IDX_DATA] = g_variant_new_from_data (G_VARIANT_TYPE_BYTESTRING, q, width + 1, FALSE, NULL, NULL); @@ -88,10 +81,7 @@ value_variant_get (union value *val, GVariant *v) { const gchar *data = g_variant_get_bytestring (vdata); size_t len = strlen (data); - if (width <= MAX_SHORT_STRING) - memcpy (val->short_string, data, MIN (MAX_SHORT_STRING, len)); - else - val->long_string = xmemdup (data, MIN (width, len)); + val->s = xmemdup (data, MIN (width, len)); } g_variant_unref (vdata);