X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=33e369f971da8751f78e29e76f23d230e6e0227d;hb=a5097a183f00ab2d2dc538ba7094a4696e2fea04;hp=28d6da2a42b7b2629779838b28e80000e28f90f0;hpb=c1e28f93006b3151c30c6f5e77d88f177960e6d4;p=pspp-builds.git diff --git a/src/data/data-in.c b/src/data/data-in.c index 28d6da2a..33e369f9 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,15 +27,18 @@ #include #include #include +#include #include "calendar.h" #include "identifier.h" #include "settings.h" #include "value.h" #include "format.h" +#include "dictionary.h" #include #include +#include #include #include #include @@ -52,7 +55,7 @@ /* Information about parsing one data field. */ struct data_in { - enum legacy_encoding encoding;/* Encoding of source. */ + const char *src_enc; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -84,8 +87,12 @@ static int hexit_value (int c); /* Parses the characters in INPUT, which are encoded in the given ENCODING, according to FORMAT. Stores the parsed - representation in OUTPUT, which has the given WIDTH (0 for - a numeric field, otherwise the string width). + representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, + otherwise the string width). + Iff FORMAT is a string format, then DICT must be a pointer + to the dictionary associated with OUTPUT. Otherwise, DICT + may be null. If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no @@ -98,9 +105,11 @@ static int hexit_value (int c); FIRST_COLUMN plus the length of the input because of the possibility of escaped quotes in strings, etc.) */ bool -data_in (struct substring input, enum legacy_encoding encoding, +data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, - int first_column, int last_column, union value *output, int width) + int first_column, int last_column, + const struct dictionary *dict, + union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -109,25 +118,11 @@ data_in (struct substring input, enum legacy_encoding encoding, }; struct data_in i; - void *copy = NULL; + bool ok; assert ((width != 0) == fmt_is_string (format)); - if (encoding == LEGACY_NATIVE - || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) - { - i.input = input; - i.encoding = encoding; - } - else - { - ss_alloc_uninit (&i.input, ss_length (input)); - legacy_recode (encoding, ss_data (input), LEGACY_NATIVE, - ss_data (i.input), ss_length (input)); - i.encoding = LEGACY_NATIVE; - copy = ss_data (i.input); - } i.format = format; i.implied_decimals = implied_decimals; @@ -136,21 +131,39 @@ data_in (struct substring input, enum legacy_encoding encoding, i.first_column = first_column; i.last_column = last_column; + i.src_enc = encoding; - if (!ss_is_empty (i.input)) + if (ss_is_empty (input)) { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); + default_result (&i); + return true; + } + + if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY)) + { + i.input = input; } else { - default_result (&i); - ok = true; + const char *dest_encoding; + char *s = NULL; + if ( dict == NULL) + { + assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))); + dest_encoding = LEGACY_NATIVE; + } + else + dest_encoding = dict_get_encoding (dict); + + s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input)); + ss_alloc_uninit (&i.input, strlen (s)); + memcpy (ss_data (i.input), s, ss_length (input)); + free (s); } - if (copy) - free (copy); + ok = handlers[i.format] (&i); + if (!ok) + default_result (&i); return ok; } @@ -606,12 +619,13 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - char *dst = i->output->s; + uint8_t *dst = value_str_rw (i->output, i->width); size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); - legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size)); + memcpy (dst, src, MIN (src_size, dst_size)); + if (dst_size > src_size) memset (&dst[src_size], ' ', dst_size - src_size); @@ -622,6 +636,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { + uint8_t *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) @@ -636,10 +651,10 @@ parse_AHEX (struct data_in *i) return false; } - if (i->encoding != LEGACY_NATIVE) + if (0 != strcmp (i->src_enc, LEGACY_NATIVE)) { - hi = legacy_to_native (i->encoding, hi); - lo = legacy_to_native (i->encoding, lo); + hi = legacy_to_native (i->src_enc, hi); + lo = legacy_to_native (i->src_enc, lo); } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { @@ -648,10 +663,10 @@ parse_AHEX (struct data_in *i) } if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); + memset (&s[j], ' ', i->width - j); return true; } @@ -1219,7 +1234,7 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else i->output->f = settings_get_blanks (); }