Allow undo/redo of pasted text as a single item

[pspp] / src / data / data-in.c
diff --git a/src/data/data-in.c b/src/data/data-in.c

index 0ddd322bf417a1bb37b19642d76459ca4aa75a8b..c105454ae6654ffda20a4e5bbe4ae5a3eadfa28a 100644 (file)
--- a/src/data/data-in.c
+++ b/src/data/data-in.c
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -27,20 +27,25 @@
  #include <stdio.h>
  #include <stdlib.h>
  #include <stdbool.h>
+#include <limits.h>
  
  #include "calendar.h"
  #include "identifier.h"
  #include "settings.h"
  #include "value.h"
+#include "format.h"
+#include "dictionary.h"
  
  #include <libpspp/assertion.h>
+#include <libpspp/legacy-encoding.h>
+#include <libpspp/i18n.h>
  #include <libpspp/compiler.h>
  #include <libpspp/integer-format.h>
  #include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/str.h>
-
  #include "c-ctype.h"
+#include "c-strtod.h"
  #include "minmax.h"
  #include "xalloc.h"
  
@@ -50,6 +55,7 @@
  /* Information about parsing one data field. */
  struct data_in
    {
+    const char *src_enc;        /* Encoding of source. */
      struct substring input;     /* Source. */
      enum fmt_type format;       /* Input format. */
      int implied_decimals;       /* Number of implied decimal places. */
@@ -61,11 +67,7 @@ struct data_in
      int last_column;           /* Last column. */
    };
  
-/* Integer format used for IB and PIB input. */
-static enum integer_format input_integer_format = INTEGER_NATIVE;
  
-/* Floating-point format used for RB and RBHEX input. */
-static enum float_format input_float_format = FLOAT_NATIVE_DOUBLE;
  
  typedef bool data_in_parser_func (struct data_in *);
  #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
@@ -83,21 +85,31 @@ static bool trim_spaces_and_check_missing (struct data_in *);
  
  static int hexit_value (int c);
  \f
-/* Parses the characters in INPUT according to FORMAT.  Stores
-   the parsed representation in OUTPUT, which has the given WIDTH
-   (0 for a numeric field, otherwise the string width).
+/* Parses the characters in INPUT, which are encoded in the given
+   ENCODING, according to FORMAT.  Stores the parsed
+   representation in OUTPUT, which the caller must have
+   initialized with the given WIDTH (0 for a numeric field,
+   otherwise the string width).
+   Iff FORMAT is a string format, then DICT must be a pointer
+   to the dictionary associated with OUTPUT.  Otherwise, DICT
+   may be null.
  
     If no decimal point is included in a numeric format, then
     IMPLIED_DECIMALS decimal places are implied.  Specify 0 if no
     decimal places should be implied.
  
-   If FIRST_COLUMN is nonzero, then it should be the 1-based
-   column number of the first character in INPUT, used in error
-   messages. */
+   If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should
+   be the 1-based column number of the first and
+   one-past-the-last-character in INPUT, for use in error
+   messages.  (LAST_COLUMN cannot always be calculated from
+   FIRST_COLUMN plus the length of the input because of the
+   possibility of escaped quotes in strings, etc.) */
  bool
-data_in (struct substring input,
+data_in (struct substring input, const char *encoding,
           enum fmt_type format, int implied_decimals,
-         int first_column, union value *output, int width)
+         int first_column, int last_column,
+        const struct dictionary *dict,
+        union value *output, int width)
  {
    static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
      {
@@ -106,11 +118,12 @@ data_in (struct substring input,
      };
  
    struct data_in i;
+
+  char *s = NULL;
    bool ok;
  
    assert ((width != 0) == fmt_is_string (format));
  
-  i.input = input;
    i.format = format;
    i.implied_decimals = implied_decimals;
  
@@ -118,53 +131,43 @@ data_in (struct substring input,
    i.width = width;
  
    i.first_column = first_column;
-  i.last_column = first_column + ss_length (input) - 1;
+  i.last_column = last_column;
+  i.src_enc = encoding;
  
-  if (!ss_is_empty (i.input))
+  if (ss_is_empty (input))
      {
-      ok = handlers[i.format] (&i);
-      if (!ok)
-        default_result (&i);
+      default_result (&i);
+      return true;
      }
-  else
+
+  if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY))
      {
-      default_result (&i);
-      ok = true;
+      i.input = input;
      }
+  else
+    {
+      const char *dest_encoding;
  
-  return ok;
-}
+      if ( dict == NULL)
+       {
+         assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)));
+         dest_encoding = LEGACY_NATIVE;
+       }
+      else
+       dest_encoding = dict_get_encoding (dict);
  
-/* Returns the integer format used for IB and PIB input. */
-enum integer_format
-data_in_get_integer_format (void)
-{
-  return input_integer_format;
-}
+      s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input));
+      i.input = ss_cstr (s);
+    }
  
-/* Sets the integer format used for IB and PIB input to
-   FORMAT. */
-void
-data_in_set_integer_format (enum integer_format format)
-{
-  input_integer_format = format;
-}
+  ok = handlers[i.format] (&i);
+  if (!ok)
+    default_result (&i);
  
-/* Returns the floating-point format used for RB and RBHEX
-   input. */
-enum float_format
-data_in_get_float_format (void)
-{
-  return input_float_format;
+  free (s);
+  return ok;
  }
  
-/* Sets the floating-point format used for RB and RBHEX input to
-   FORMAT. */
-void
-data_in_set_float_format (enum float_format format)
-{
-  input_float_format = format;
-}
  \f
  /* Format parsers. */
  
@@ -172,7 +175,8 @@ data_in_set_float_format (enum float_format format)
  static bool
  parse_number (struct data_in *i)
  {
-  const struct fmt_number_style *style = fmt_get_style (i->format);
+  const struct fmt_number_style *style =
+    settings_get_style (i->format);
  
    struct string tmp;
  
@@ -180,7 +184,10 @@ parse_number (struct data_in *i)
    int save_errno;
    char *tail;
  
-  assert (fmt_get_category (i->format) != FMT_CAT_CUSTOM);
+  if  (fmt_get_category (i->format) == FMT_CAT_CUSTOM)
+    {
+      style = settings_get_style (FMT_F);
+    }
  
    /* Trim spaces and check for missing value representation. */
    if (trim_spaces_and_check_missing (i))
@@ -271,10 +278,10 @@ parse_number (struct data_in *i)
        return false;
      }
  
-  /* Let strtod() do the conversion. */
+  /* Let c_strtod() do the conversion. */
    save_errno = errno;
    errno = 0;
-  i->output->f = strtod (ds_cstr (&tmp), &tail);
+  i->output->f = c_strtod (ds_cstr (&tmp), &tail);
    if (*tail != '\0')
      {
        data_warning (i, _("Invalid numeric syntax."));
@@ -461,10 +468,10 @@ parse_Z (struct data_in *i)
        return false;
      }
  
-  /* Let strtod() do the conversion. */
+  /* Let c_strtod() do the conversion. */
    save_errno = errno;
    errno = 0;
-  i->output->f = strtod (ds_cstr (&tmp), NULL);
+  i->output->f = c_strtod (ds_cstr (&tmp), NULL);
    if (errno == ERANGE)
      {
        if (fabs (i->output->f) > 1)
@@ -498,7 +505,7 @@ parse_IB (struct data_in *i)
    uint64_t sign_bit;
  
    bytes = MIN (8, ss_length (i->input));
-  value = integer_get (input_integer_format, ss_data (i->input), bytes);
+  value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes);
  
    sign_bit = UINT64_C(1) << (8 * bytes - 1);
    if (!(value & sign_bit))
@@ -519,7 +526,7 @@ parse_IB (struct data_in *i)
  static bool
  parse_PIB (struct data_in *i)
  {
-  i->output->f = integer_get (input_integer_format, ss_data (i->input),
+  i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input),
                                MIN (8, ss_length (i->input)));
  
    apply_implied_decimals (i);
@@ -595,9 +602,10 @@ parse_PK (struct data_in *i)
  static bool
  parse_RB (struct data_in *i)
  {
-  size_t size = float_get_size (input_float_format);
+  enum float_format ff = settings_get_input_float_format ();
+  size_t size = float_get_size (ff);
    if (ss_length (i->input) >= size)
-    float_convert (input_float_format, ss_data (i->input),
+    float_convert (ff, ss_data (i->input),
                     FLOAT_NATIVE_DOUBLE, &i->output->f);
    else
      i->output->f = SYSMIS;
@@ -609,8 +617,18 @@ parse_RB (struct data_in *i)
  static bool
  parse_A (struct data_in *i)
  {
-  buf_copy_rpad (i->output->s, i->width,
-                 ss_data (i->input), ss_length (i->input));
+  /* This is equivalent to buf_copy_rpad, except that we posibly
+     do a character set recoding in the middle. */
+  uint8_t *dst = value_str_rw (i->output, i->width);
+  size_t dst_size = i->width;
+  const char *src = ss_data (i->input);
+  size_t src_size = ss_length (i->input);
+
+  memcpy (dst, src, MIN (src_size, dst_size));
+
+  if (dst_size > src_size)
+    memset (&dst[src_size], ' ', dst_size - src_size);
+
    return true;
  }
  
@@ -618,6 +636,7 @@ parse_A (struct data_in *i)
  static bool
  parse_AHEX (struct data_in *i)
  {
+  uint8_t *s = value_str_rw (i->output, i->width);
    size_t j;
  
    for (j = 0; ; j++)
@@ -632,6 +651,11 @@ parse_AHEX (struct data_in *i)
            return false;
          }
  
+      if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
+        {
+          hi = legacy_to_native (i->src_enc, hi);
+          lo = legacy_to_native (i->src_enc, lo);
+        }
        if (!c_isxdigit (hi) || !c_isxdigit (lo))
         {
           data_warning (i, _("Field must contain only hex digits."));
@@ -639,10 +663,10 @@ parse_AHEX (struct data_in *i)
         }
  
        if (j < i->width)
-        i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo);
+        s[j] = hexit_value (hi) * 16 + hexit_value (lo);
      }
  
-  memset (i->output->s + j, ' ', i->width - j);
+  memset (&s[j], ' ', i->width - j);
  
    return true;
  }
@@ -761,7 +785,7 @@ parse_name_token (struct data_in *i)
     exact matches (except for case) are allowed.
     Returns true if successful, false otherwise. */
  static bool
-match_name (struct substring token, const char **names, long *output)
+match_name (struct substring token, const char *const *names, long *output)
  {
    int i;
  
@@ -790,14 +814,14 @@ parse_month (struct data_in *i, long *month)
      }
    else
      {
-      static const char *english_names[] =
+      static const char *const english_names[] =
          {
            "jan", "feb", "mar", "apr", "may", "jun",
            "jul", "aug", "sep", "oct", "nov", "dec",
            NULL,
          };
  
-      static const char *roman_names[] =
+      static const char *const roman_names[] =
          {
            "i", "ii", "iii", "iv", "v", "vi",
            "vii", "viii", "ix", "x", "xi", "xii",
@@ -826,7 +850,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits)
  
    if (*year >= 0 && *year <= 99)
      {
-      int epoch = get_epoch ();
+      int epoch = settings_get_epoch ();
        int epoch_century = ROUND_DOWN (epoch, 100);
        int epoch_offset = epoch - epoch_century;
        if (*year >= epoch_offset)
@@ -961,7 +985,7 @@ parse_minute_second (struct data_in *i, double *time)
    cp = buf;
    while (c_isdigit (ss_first (i->input)))
      *cp++ = ss_get_char (&i->input);
-  if (ss_match_char (&i->input, fmt_decimal_char (FMT_F)))
+  if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F)))
      *cp++ = '.';
    while (c_isdigit (ss_first (i->input)))
      *cp++ = ss_get_char (&i->input);
@@ -978,7 +1002,7 @@ parse_minute_second (struct data_in *i, double *time)
  static bool
  parse_weekday (struct data_in *i, long *weekday)
  {
-  static const char *weekday_names[] =
+  static const char *const weekday_names[] =
      {
        "su", "mo", "tu", "we", "th", "fr", "sa",
        NULL,
@@ -1165,19 +1189,21 @@ vdata_warning (const struct data_in *i, const char *format, va_list args)
    ds_put_char (&text, '(');
    if (i->first_column != 0)
      {
-      if (i->first_column == i->last_column)
+      if (i->first_column == i->last_column - 1)
          ds_put_format (&text, _("column %d"), i->first_column);
        else
          ds_put_format (&text, _("columns %d-%d"),
-                       i->first_column, i->last_column);
+                       i->first_column, i->last_column - 1);
        ds_put_cstr (&text, ", ");
      }
    ds_put_format (&text, _("%s field) "), fmt_name (i->format));
    ds_put_vformat (&text, format, args);
  
-  m.category = MSG_DATA;
-  m.severity = MSG_WARNING;
+  m.category = MSG_C_DATA;
+  m.severity = MSG_S_WARNING;
    m.text = ds_cstr (&text);
+  m.where.file_name = NULL;
+  m.where.line_number = -1;
  
    msg_emit (&m);
  }
@@ -1210,9 +1236,9 @@ static void
  default_result (struct data_in *i)
  {
    if (fmt_is_string (i->format))
-    memset (i->output->s, ' ', i->width);
+    memset (value_str_rw (i->output, i->width), ' ', i->width);
    else
-    i->output->f = get_blanks ();
+    i->output->f = settings_get_blanks ();
  }
  
  /* Trims leading and trailing spaces from I.