From 8c76dd167f2b63b3c97606c53667c391c17fae10 Mon Sep 17 00:00:00 2001
From: Friedrich Beckmann <friedrich.beckmann@gmx.de>
Date: Fri, 14 Aug 2020 02:53:11 +0200
Subject: [PATCH] sys-file-reader: get_text_token did not set delimiter - bug
 #58943

parse_mrsets used loop local variables which were not initialized.
The regression worked with low compiler optimization levels because
the value for "delimiter" was kept over the loop iterations.
get_text_token did not set the delimiter value when only delimiters
are left in the string. I set the delimiter value to the last value
in the string in that case. I initialized the variables to see the
bug also in -O0.

Closes: https://savannah.gnu.org/bugs/?58943
---
 src/data/sys-file-reader.c | 12 ++++++++----
 src/libpspp/str.c          |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c
index 1a17eb89a1..0e8a9bfc68 100644
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -1687,9 +1687,9 @@ parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
   text = open_text_record (r, record, false);
   for (;;)
     {
-      struct sfm_mrset *mrset;
-      size_t allocated_vars;
-      char delimiter;
+      struct sfm_mrset *mrset = NULL;
+      size_t allocated_vars = 0;
+      char delimiter = '4';
 
       /* Skip extra line feeds if present. */
       while (text_match (text, '\n'))
@@ -3119,7 +3119,11 @@ text_get_token (struct text_record *text, struct substring delimiters,
   char *end;
 
   if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
-    return NULL;
+    {
+      if (delimiter != NULL)
+	*delimiter = ss_data (text->buffer)[text->pos-1];
+      return NULL;
+    }
 
   end = &ss_data (token)[ss_length (token)];
   if (delimiter != NULL)
diff --git a/src/libpspp/str.c b/src/libpspp/str.c
index c227f7f698..a58473192e 100644
--- a/src/libpspp/str.c
+++ b/src/libpspp/str.c
@@ -511,7 +511,7 @@ ss_tokenize (struct substring ss, struct substring delimiters,
   ss_get_bytes (&ss, ss_cspan (ss, delimiters), token);
 
   found_token = ss_length (*token) > 0;
-  *save_idx += ss_length (*token) + found_token;
+  *save_idx += ss_length (*token) + (found_token?1:0);
   return found_token;
 }
 
@@ -738,7 +738,7 @@ ss_cspan (struct substring ss, struct substring stop_set)
 size_t
 ss_find_byte (struct substring ss, char c)
 {
-  const char *p = memchr (ss.string, c, ss.length);
+  const char *p = memchr (ss.string, (int) c, ss.length);
   return p != NULL ? p - ss.string : SIZE_MAX;
 }
 
-- 
2.30.2