/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "settings.h"
#include "value.h"
#include "format.h"
+#include "dictionary.h"
#include <libpspp/assertion.h>
#include <libpspp/legacy-encoding.h>
+#include <libpspp/i18n.h>
#include <libpspp/compiler.h>
#include <libpspp/integer-format.h>
#include <libpspp/message.h>
/* Information about parsing one data field. */
struct data_in
{
- enum legacy_encoding encoding;/* Encoding of source. */
+ const char *src_enc; /* Encoding of source. */
struct substring input; /* Source. */
enum fmt_type format; /* Input format. */
int implied_decimals; /* Number of implied decimal places. */
\f
/* Parses the characters in INPUT, which are encoded in the given
ENCODING, according to FORMAT. Stores the parsed
- representation in OUTPUT, which has the given WIDTH (0 for
- a numeric field, otherwise the string width).
+ representation in OUTPUT, which the caller must have
+ initialized with the given WIDTH (0 for a numeric field,
+ otherwise the string width).
+ Iff FORMAT is a string format, then DICT must be a pointer
+ to the dictionary associated with OUTPUT. Otherwise, DICT
+ may be null.
If no decimal point is included in a numeric format, then
IMPLIED_DECIMALS decimal places are implied. Specify 0 if no
FIRST_COLUMN plus the length of the input because of the
possibility of escaped quotes in strings, etc.) */
bool
-data_in (struct substring input, enum legacy_encoding encoding,
+data_in (struct substring input, const char *encoding,
enum fmt_type format, int implied_decimals,
- int first_column, int last_column, union value *output, int width)
+ int first_column, int last_column,
+ const struct dictionary *dict,
+ union value *output, int width)
{
static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
{
};
struct data_in i;
- void *copy = NULL;
+
+ char *s = NULL;
bool ok;
assert ((width != 0) == fmt_is_string (format));
- if (encoding == LEGACY_NATIVE
- || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))
- {
- i.input = input;
- i.encoding = encoding;
- }
- else
- {
- ss_alloc_uninit (&i.input, ss_length (input));
- legacy_recode (encoding, ss_data (input), LEGACY_NATIVE,
- ss_data (i.input), ss_length (input));
- i.encoding = LEGACY_NATIVE;
- copy = ss_data (i.input);
- }
i.format = format;
i.implied_decimals = implied_decimals;
i.first_column = first_column;
i.last_column = last_column;
+ i.src_enc = encoding;
- if (!ss_is_empty (i.input))
+ if (ss_is_empty (input))
{
- ok = handlers[i.format] (&i);
- if (!ok)
- default_result (&i);
+ default_result (&i);
+ return true;
+ }
+
+ if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY))
+ {
+ i.input = input;
}
else
{
- default_result (&i);
- ok = true;
+ const char *dest_encoding;
+
+ if ( dict == NULL)
+ {
+ assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)));
+ dest_encoding = LEGACY_NATIVE;
+ }
+ else
+ dest_encoding = dict_get_encoding (dict);
+
+ s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input));
+ i.input = ss_cstr (s);
}
- if (copy)
- free (copy);
+ ok = handlers[i.format] (&i);
+ if (!ok)
+ default_result (&i);
+ free (s);
return ok;
}
{
/* This is equivalent to buf_copy_rpad, except that we posibly
do a character set recoding in the middle. */
- char *dst = i->output->s;
+ uint8_t *dst = value_str_rw (i->output, i->width);
size_t dst_size = i->width;
const char *src = ss_data (i->input);
size_t src_size = ss_length (i->input);
- legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size));
+ memcpy (dst, src, MIN (src_size, dst_size));
+
if (dst_size > src_size)
memset (&dst[src_size], ' ', dst_size - src_size);
static bool
parse_AHEX (struct data_in *i)
{
+ uint8_t *s = value_str_rw (i->output, i->width);
size_t j;
for (j = 0; ; j++)
return false;
}
- if (i->encoding != LEGACY_NATIVE)
+ if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
{
- hi = legacy_to_native (i->encoding, hi);
- lo = legacy_to_native (i->encoding, lo);
+ hi = legacy_to_native (i->src_enc, hi);
+ lo = legacy_to_native (i->src_enc, lo);
}
if (!c_isxdigit (hi) || !c_isxdigit (lo))
{
}
if (j < i->width)
- i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo);
+ s[j] = hexit_value (hi) * 16 + hexit_value (lo);
}
- memset (i->output->s + j, ' ', i->width - j);
+ memset (&s[j], ' ', i->width - j);
return true;
}
ds_put_format (&text, _("%s field) "), fmt_name (i->format));
ds_put_vformat (&text, format, args);
- m.category = MSG_DATA;
- m.severity = MSG_WARNING;
+ m.category = MSG_C_DATA;
+ m.severity = MSG_S_WARNING;
m.text = ds_cstr (&text);
+ m.where.file_name = NULL;
+ m.where.line_number = -1;
msg_emit (&m);
}
default_result (struct data_in *i)
{
if (fmt_is_string (i->format))
- memset (i->output->s, ' ', i->width);
+ memset (value_str_rw (i->output, i->width), ' ', i->width);
else
i->output->f = settings_get_blanks ();
}