/* PSPP - a program for statistical analysis.
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "format-guesser.h"
+#include "data/format-guesser.h"
#include <stdlib.h>
#include <string.h>
-#include "c-ctype.h"
-#include "minmax.h"
-#include "xalloc.h"
+#include "data/format.h"
+#include "data/settings.h"
+#include "libpspp/assertion.h"
+#include "libpspp/str.h"
-#include <data/format.h>
-#include <data/settings.h>
-#include <libpspp/assertion.h>
-#include <libpspp/str.h>
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
/* A token in which potential date or time fields are broken.
11, {DT_DAY, DT_DELIM, DT_MONTH, DT_DELIM, DT_YEAR, DT_SPACE, DT_HOUR,
DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
+ /* yyyy-dd-mmm HH:MM */
+ { FMT_YMDHMS,
+ 9, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+ DT_COLON, DT_MINUTE} },
+ /* yyyy-dd-mmm HH:MM:SS */
+ { FMT_YMDHMS,
+ 11, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+ DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
+
/* HH:MM */
{ FMT_TIME, 3, {DT_HOUR, DT_COLON, DT_MINUTE} },
/* HH:MM:SS */
/* www */
{ FMT_WKDAY, 1, {DT_WEEKDAY} },
- /* mmm */
- { FMT_MONTH, 1, {DT_MONTH} },
+ /* mmm
+
+ We require a spelled-out English month so that
+ single-character Roman numerals like "i" and "x" don't get
+ detected as months. The latter is particularly common in
+ the password field of /etc/passwd-like files. */
+ { FMT_MONTH, 1, {DT_ENGLISH_MONTH} },
};
/* Number of recognized date syntax formats. */
int c;
/* Skip leading "$" and optional following white space. */
- has_dollar = ss_match_char (&s, '$');
+ has_dollar = ss_match_byte (&s, '$');
if (has_dollar)
ss_ltrim (&s, ss_cstr (CC_SPACES));
/* Skip optional sign. */
- ss_match_char_in (&s, ss_cstr ("+-"));
+ ss_match_byte_in (&s, ss_cstr ("+-"));
/* Skip digits punctuated by commas and dots. We don't know
whether the decimal point is a comma or a dot, so for now we
if (dots || commas)
delim_digits++;
}
- else if (c == '.' )
+ else if (c == '.')
{
dots++;
prev_delim = c;
}
/* Skip the optional exponent. */
- has_exp = ss_match_char_in (&s, ss_cstr ("eEdD")) != EOF;
- has_exp_sign = ss_match_char_in (&s, ss_cstr ("-+")) != EOF;
+ has_exp = ss_match_byte_in (&s, ss_cstr ("eEdD")) != EOF;
+ has_exp_sign = ss_match_byte_in (&s, ss_cstr ("-+")) != EOF;
if (has_exp_sign)
- ss_match_char (&s, ' ');
+ ss_match_byte (&s, ' ');
exp_digits = ss_ltrim (&s, ss_cstr (CC_DIGITS));
if ((has_exp || has_exp_sign) && !exp_digits)
{
}
/* Skip optional '%'. */
- has_percent = ss_match_char (&s, '%');
+ has_percent = ss_match_byte (&s, '%');
if (has_dollar && has_percent)
{
/* A valid number cannot have both '$' and '%'. */
}
\f
/* Tries to parse S as a date (DATE, ADATE, EDATE, SDATE, QYR,
- MOYR, WKYR, or DATETIME), time (TIME or DTIME), or date
- component (WKDAY or MONTH) format. If successful, increments
- G's any_date counter and the counter or counters for the
- specific format(s) that S matches. On failure, does not
- modify G.
+ MOYR, WKYR, DATETIME, or YMDHMS), time (TIME or DTIME), or
+ date component (WKDAY or MONTH) format. If successful,
+ increments G's any_date counter and the counter or counters
+ for the specific format(s) that S matches. On failure, does
+ not modify G.
+
+ XXX How can we distinguish MTIME from TIME? One way might be
+ that TIME can have three parts (HH:MM:SS) but MTIME only ever
+ has two (MM:SS).
Does not attempt to recognize JDATE format: it looks just like
F format and will thus be caught by the numeric parser.
(We use the minimum input width, but an output width would
be equally appropriate, since all the time formats have the
same minimum widths for input and output.) */
- if (f->type == FMT_DATETIME || f->type == FMT_TIME
- || f->type == FMT_DTIME)
+ if (f->type == FMT_DATETIME || f->type == FMT_YMDHMS
+ || f->type == FMT_MTIME || f->type == FMT_TIME || f->type == FMT_DTIME)
{
for (i = 0; i < DATE_SYNTAX_CNT; i++)
if (g->date[i]
ss_advance (s, 1);
token = recognize_identifier_token (s);
if (token)
- ss_match_char_in (s, ss_cstr (CC_SPACES));
+ ss_match_byte_in (s, ss_cstr (CC_SPACES));
else
token = DT_DELIM | DT_SPACE;
return token;
size_t digit_cnt = ss_get_long (s, &value);
enum date_token token = 0;
- if (ss_match_char (s, settings_get_decimal_char (FMT_F))
+ if (ss_match_byte (s, settings_get_decimal_char (FMT_F))
&& tokens_seen & DT_COLON
&& value <= 59)
{