X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fformat-guesser.c;h=a9953a532cce8442362ba0193566773014f5b8ec;hb=142f8f8814423f76523825f8df060e2fa9d2a2b6;hp=78af8dfa269ed3ee70ff35782799cdddcc973d93;hpb=3e3d825afe59ad43699664a74ca04e2e1b836786;p=pspp

diff --git a/src/data/format-guesser.c b/src/data/format-guesser.c
index 78af8dfa26..a9953a532c 100644
--- a/src/data/format-guesser.c
+++ b/src/data/format-guesser.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 2008 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2010, 2011 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -16,19 +16,19 @@
 
 #include <config.h>
 
-#include "format-guesser.h"
+#include "data/format-guesser.h"
 
 #include <stdlib.h>
 #include <string.h>
 
-#include "c-ctype.h"
-#include "minmax.h"
-#include "xalloc.h"
+#include "data/format.h"
+#include "data/settings.h"
+#include "libpspp/assertion.h"
+#include "libpspp/str.h"
 
-#include <data/format.h>
-#include <data/settings.h>
-#include <libpspp/assertion.h>
-#include <libpspp/str.h>
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
 
 /* A token in which potential date or time fields are broken.
 
@@ -70,7 +70,7 @@ struct date_syntax
   {
     enum fmt_type format;       /* Format type. */
 #define MAX_TOKENS 11
-    size_t token_cnt;           /* Number of tokens. */
+    size_t n_tokens;           /* Number of tokens. */
     enum date_token tokens[MAX_TOKENS]; /* Tokens. */
   };
 
@@ -120,6 +120,15 @@ static struct date_syntax syntax[] =
       11, {DT_DAY, DT_DELIM, DT_MONTH, DT_DELIM, DT_YEAR, DT_SPACE, DT_HOUR,
            DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
 
+    /* yyyy-dd-mmm HH:MM */
+    { FMT_YMDHMS,
+      9, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+          DT_COLON, DT_MINUTE} },
+    /* yyyy-dd-mmm HH:MM:SS */
+    { FMT_YMDHMS,
+      11, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+           DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
+
     /* HH:MM */
     { FMT_TIME, 3, {DT_HOUR, DT_COLON, DT_MINUTE} },
     /* HH:MM:SS */
@@ -135,8 +144,13 @@ static struct date_syntax syntax[] =
     /* www */
     { FMT_WKDAY, 1, {DT_WEEKDAY} },
 
-    /* mmm */
-    { FMT_MONTH, 1, {DT_MONTH} },
+    /* mmm
+
+       We require a spelled-out English month so that
+       single-character Roman numerals like "i" and "x" don't get
+       detected as months.  The latter is particularly common in
+       the password field of /etc/passwd-like files. */
+    { FMT_MONTH, 1, {DT_ENGLISH_MONTH} },
   };
 
 /* Number of recognized date syntax formats. */
@@ -299,12 +313,12 @@ add_numeric (struct fmt_guesser *g, struct substring s)
   int c;
 
   /* Skip leading "$" and optional following white space. */
-  has_dollar = ss_match_char (&s, '$');
+  has_dollar = ss_match_byte (&s, '$');
   if (has_dollar)
     ss_ltrim (&s, ss_cstr (CC_SPACES));
 
   /* Skip optional sign. */
-  ss_match_char_in (&s, ss_cstr ("+-"));
+  ss_match_byte_in (&s, ss_cstr ("+-"));
 
   /* Skip digits punctuated by commas and dots.  We don't know
      whether the decimal point is a comma or a dot, so for now we
@@ -320,7 +334,7 @@ add_numeric (struct fmt_guesser *g, struct substring s)
           if (dots || commas)
             delim_digits++;
         }
-      else if (c == '.' )
+      else if (c == '.')
         {
           dots++;
           prev_delim = c;
@@ -343,10 +357,10 @@ add_numeric (struct fmt_guesser *g, struct substring s)
     }
 
   /* Skip the optional exponent. */
-  has_exp = ss_match_char_in (&s, ss_cstr ("eEdD")) != EOF;
-  has_exp_sign = ss_match_char_in (&s, ss_cstr ("-+")) != EOF;
+  has_exp = ss_match_byte_in (&s, ss_cstr ("eEdD")) != EOF;
+  has_exp_sign = ss_match_byte_in (&s, ss_cstr ("-+")) != EOF;
   if (has_exp_sign)
-    ss_match_char (&s, ' ');
+    ss_match_byte (&s, ' ');
   exp_digits = ss_ltrim (&s, ss_cstr (CC_DIGITS));
   if ((has_exp || has_exp_sign) && !exp_digits)
     {
@@ -356,7 +370,7 @@ add_numeric (struct fmt_guesser *g, struct substring s)
     }
 
   /* Skip optional '%'. */
-  has_percent = ss_match_char (&s, '%');
+  has_percent = ss_match_byte (&s, '%');
   if (has_dollar && has_percent)
     {
       /* A valid number cannot have both '$' and '%'. */
@@ -390,7 +404,7 @@ add_numeric (struct fmt_guesser *g, struct substring s)
          can't tell whether the ',' or '.' is a grouping or
          decimal character.  Assume that the decimal character
          from the settings is in use. */
-      if (prev_delim == settings_get_decimal_char (FMT_F))
+      if (prev_delim == settings_get_fmt_settings ()->decimal)
         {
           decimal = prev_delim;
           precision = delim_digits;
@@ -434,7 +448,7 @@ add_numeric (struct fmt_guesser *g, struct substring s)
 static void
 guess_numeric (struct fmt_guesser *g, struct fmt_spec *f)
 {
-  int decimal_char = settings_get_decimal_char (FMT_COMMA);
+  int decimal_char = settings_get_fmt_settings ()->decimal;
 
   f->d = g->decimals / g->count;
   if (g->pct)
@@ -452,11 +466,15 @@ guess_numeric (struct fmt_guesser *g, struct fmt_spec *f)
 }
 
 /* Tries to parse S as a date (DATE, ADATE, EDATE, SDATE, QYR,
-   MOYR, WKYR, or DATETIME), time (TIME or DTIME), or date
-   component (WKDAY or MONTH) format.  If successful, increments
-   G's any_date counter and the counter or counters for the
-   specific format(s) that S matches.  On failure, does not
-   modify G.
+   MOYR, WKYR, DATETIME, or YMDHMS), time (TIME or DTIME), or
+   date component (WKDAY or MONTH) format.  If successful,
+   increments G's any_date counter and the counter or counters
+   for the specific format(s) that S matches.  On failure, does
+   not modify G.
+
+   XXX How can we distinguish MTIME from TIME?  One way might be
+   that TIME can have three parts (HH:MM:SS) but MTIME only ever
+   has two (MM:SS).
 
    Does not attempt to recognize JDATE format: it looks just like
    F format and will thus be caught by the numeric parser.
@@ -473,27 +491,27 @@ add_date_time (struct fmt_guesser *g, struct substring s)
   enum date_token token;
   enum date_token tokens[MAX_TOKENS];
   enum date_token tokens_seen;
-  size_t token_cnt;
+  size_t n_tokens;
   int decimals;
   bool is_date;
   int i;
 
   /* Break S into tokens. */
-  token_cnt = 0;
+  n_tokens = 0;
   tokens_seen = 0;
   decimals = 0;
   while (!ss_is_empty (s))
     {
-      if (token_cnt >= MAX_TOKENS)
+      if (n_tokens >= MAX_TOKENS)
         return;
 
       token = parse_date_token (&s, tokens_seen, &decimals);
       if (token == 0)
         return;
-      tokens[token_cnt++] = token;
+      tokens[n_tokens++] = token;
       tokens_seen |= token;
     }
-  if (token_cnt == 0)
+  if (n_tokens == 0)
     return;
 
   /* Find matching date formats, if any, and increment the
@@ -502,7 +520,7 @@ add_date_time (struct fmt_guesser *g, struct substring s)
   for (i = 0; i < DATE_SYNTAX_CNT; i++)
     {
       struct date_syntax *s = &syntax[i];
-      if (match_date_syntax (tokens, token_cnt, s->tokens, s->token_cnt))
+      if (match_date_syntax (tokens, n_tokens, s->tokens, s->n_tokens))
         {
           is_date = true;
           g->date[i]++;
@@ -566,12 +584,12 @@ guess_date_time (struct fmt_guesser *g, struct fmt_spec *f)
      (We use the minimum input width, but an output width would
      be equally appropriate, since all the time formats have the
      same minimum widths for input and output.)  */
-  if (f->type == FMT_DATETIME || f->type == FMT_TIME
-      || f->type == FMT_DTIME)
+  if (f->type == FMT_DATETIME || f->type == FMT_YMDHMS
+      || f->type == FMT_MTIME || f->type == FMT_TIME || f->type == FMT_DTIME)
     {
       for (i = 0; i < DATE_SYNTAX_CNT; i++)
         if (g->date[i]
-            && syntax[i].tokens[syntax[i].token_cnt - 1] == DT_SECOND)
+            && syntax[i].tokens[syntax[i].n_tokens - 1] == DT_SECOND)
           {
             f->d = g->decimals / g->count;
             f->w = MAX (f->w, fmt_min_input_width (f->type) + 3);
@@ -630,7 +648,7 @@ parse_date_token (struct substring *s, enum date_token tokens_seen,
         ss_advance (s, 1);
         token = recognize_identifier_token (s);
         if (token)
-          ss_match_char_in (s, ss_cstr (CC_SPACES));
+          ss_match_byte_in (s, ss_cstr (CC_SPACES));
         else
           token = DT_DELIM | DT_SPACE;
         return token;
@@ -658,10 +676,10 @@ parse_date_number (struct substring *s, enum date_token tokens_seen,
                    int *decimals)
 {
   long int value;
-  size_t digit_cnt = ss_get_long (s, &value);
+  size_t n_digits = ss_get_long (s, &value);
   enum date_token token = 0;
 
-  if (ss_match_char (s, settings_get_decimal_char (FMT_F))
+  if (ss_match_byte (s, settings_get_fmt_settings ()->decimal)
       && tokens_seen & DT_COLON
       && value <= 59)
     {
@@ -685,13 +703,13 @@ parse_date_number (struct substring *s, enum date_token tokens_seen,
       else
         token = DT_DAY_COUNT;
 
-      if (digit_cnt == 2)
+      if (n_digits == 2)
         {
           token |= DT_YEAR;
           if (value <= 59)
             token |= DT_MINUTE | DT_SECOND;
         }
-      else if (digit_cnt == 4)
+      else if (n_digits == 4)
         token |= DT_YEAR;
     }