/* PSPP - a program for statistical analysis.
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "format-guesser.h"
+#include "data/format-guesser.h"
#include <stdlib.h>
#include <string.h>
-#include "c-ctype.h"
-#include "minmax.h"
-#include "xalloc.h"
+#include "data/format.h"
+#include "data/settings.h"
+#include "libpspp/assertion.h"
+#include "libpspp/str.h"
-#include <data/format.h>
-#include <data/settings.h>
-#include <libpspp/assertion.h>
-#include <libpspp/str.h>
+#include "gl/c-ctype.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
/* A token in which potential date or time fields are broken.
{
enum fmt_type format; /* Format type. */
#define MAX_TOKENS 11
- size_t token_cnt; /* Number of tokens. */
+ size_t n_tokens; /* Number of tokens. */
enum date_token tokens[MAX_TOKENS]; /* Tokens. */
};
11, {DT_DAY, DT_DELIM, DT_MONTH, DT_DELIM, DT_YEAR, DT_SPACE, DT_HOUR,
DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
+ /* yyyy-dd-mmm HH:MM */
+ { FMT_YMDHMS,
+ 9, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+ DT_COLON, DT_MINUTE} },
+ /* yyyy-dd-mmm HH:MM:SS */
+ { FMT_YMDHMS,
+ 11, {DT_YEAR, DT_DELIM, DT_MONTH, DT_DELIM, DT_DAY, DT_SPACE, DT_HOUR,
+ DT_COLON, DT_MINUTE, DT_COLON, DT_SECOND} },
+
/* HH:MM */
{ FMT_TIME, 3, {DT_HOUR, DT_COLON, DT_MINUTE} },
/* HH:MM:SS */
/* www */
{ FMT_WKDAY, 1, {DT_WEEKDAY} },
- /* mmm */
- { FMT_MONTH, 1, {DT_MONTH} },
+ /* mmm
+
+ We require a spelled-out English month so that
+ single-character Roman numerals like "i" and "x" don't get
+ detected as months. The latter is particularly common in
+ the password field of /etc/passwd-like files. */
+ { FMT_MONTH, 1, {DT_ENGLISH_MONTH} },
};
/* Number of recognized date syntax formats. */
int c;
/* Skip leading "$" and optional following white space. */
- has_dollar = ss_match_char (&s, '$');
+ has_dollar = ss_match_byte (&s, '$');
if (has_dollar)
ss_ltrim (&s, ss_cstr (CC_SPACES));
/* Skip optional sign. */
- ss_match_char_in (&s, ss_cstr ("+-"));
+ ss_match_byte_in (&s, ss_cstr ("+-"));
/* Skip digits punctuated by commas and dots. We don't know
whether the decimal point is a comma or a dot, so for now we
if (dots || commas)
delim_digits++;
}
- else if (c == '.' )
+ else if (c == '.')
{
dots++;
prev_delim = c;
}
/* Skip the optional exponent. */
- has_exp = ss_match_char_in (&s, ss_cstr ("eEdD")) != EOF;
- has_exp_sign = ss_match_char_in (&s, ss_cstr ("-+")) != EOF;
+ has_exp = ss_match_byte_in (&s, ss_cstr ("eEdD")) != EOF;
+ has_exp_sign = ss_match_byte_in (&s, ss_cstr ("-+")) != EOF;
if (has_exp_sign)
- ss_match_char (&s, ' ');
+ ss_match_byte (&s, ' ');
exp_digits = ss_ltrim (&s, ss_cstr (CC_DIGITS));
if ((has_exp || has_exp_sign) && !exp_digits)
{
}
/* Skip optional '%'. */
- has_percent = ss_match_char (&s, '%');
+ has_percent = ss_match_byte (&s, '%');
if (has_dollar && has_percent)
{
/* A valid number cannot have both '$' and '%'. */
can't tell whether the ',' or '.' is a grouping or
decimal character. Assume that the decimal character
from the settings is in use. */
- if (prev_delim == settings_get_decimal_char (FMT_F))
+ if (prev_delim == settings_get_fmt_settings ()->decimal)
{
decimal = prev_delim;
precision = delim_digits;
static void
guess_numeric (struct fmt_guesser *g, struct fmt_spec *f)
{
- int decimal_char = settings_get_decimal_char (FMT_COMMA);
+ int decimal_char = settings_get_fmt_settings ()->decimal;
f->d = g->decimals / g->count;
if (g->pct)
}
\f
/* Tries to parse S as a date (DATE, ADATE, EDATE, SDATE, QYR,
- MOYR, WKYR, or DATETIME), time (TIME or DTIME), or date
- component (WKDAY or MONTH) format. If successful, increments
- G's any_date counter and the counter or counters for the
- specific format(s) that S matches. On failure, does not
- modify G.
+ MOYR, WKYR, DATETIME, or YMDHMS), time (TIME or DTIME), or
+ date component (WKDAY or MONTH) format. If successful,
+ increments G's any_date counter and the counter or counters
+ for the specific format(s) that S matches. On failure, does
+ not modify G.
+
+ XXX How can we distinguish MTIME from TIME? One way might be
+ that TIME can have three parts (HH:MM:SS) but MTIME only ever
+ has two (MM:SS).
Does not attempt to recognize JDATE format: it looks just like
F format and will thus be caught by the numeric parser.
enum date_token token;
enum date_token tokens[MAX_TOKENS];
enum date_token tokens_seen;
- size_t token_cnt;
+ size_t n_tokens;
int decimals;
bool is_date;
int i;
/* Break S into tokens. */
- token_cnt = 0;
+ n_tokens = 0;
tokens_seen = 0;
decimals = 0;
while (!ss_is_empty (s))
{
- if (token_cnt >= MAX_TOKENS)
+ if (n_tokens >= MAX_TOKENS)
return;
token = parse_date_token (&s, tokens_seen, &decimals);
if (token == 0)
return;
- tokens[token_cnt++] = token;
+ tokens[n_tokens++] = token;
tokens_seen |= token;
}
- if (token_cnt == 0)
+ if (n_tokens == 0)
return;
/* Find matching date formats, if any, and increment the
for (i = 0; i < DATE_SYNTAX_CNT; i++)
{
struct date_syntax *s = &syntax[i];
- if (match_date_syntax (tokens, token_cnt, s->tokens, s->token_cnt))
+ if (match_date_syntax (tokens, n_tokens, s->tokens, s->n_tokens))
{
is_date = true;
g->date[i]++;
(We use the minimum input width, but an output width would
be equally appropriate, since all the time formats have the
same minimum widths for input and output.) */
- if (f->type == FMT_DATETIME || f->type == FMT_TIME
- || f->type == FMT_DTIME)
+ if (f->type == FMT_DATETIME || f->type == FMT_YMDHMS
+ || f->type == FMT_MTIME || f->type == FMT_TIME || f->type == FMT_DTIME)
{
for (i = 0; i < DATE_SYNTAX_CNT; i++)
if (g->date[i]
- && syntax[i].tokens[syntax[i].token_cnt - 1] == DT_SECOND)
+ && syntax[i].tokens[syntax[i].n_tokens - 1] == DT_SECOND)
{
f->d = g->decimals / g->count;
f->w = MAX (f->w, fmt_min_input_width (f->type) + 3);
ss_advance (s, 1);
token = recognize_identifier_token (s);
if (token)
- ss_match_char_in (s, ss_cstr (CC_SPACES));
+ ss_match_byte_in (s, ss_cstr (CC_SPACES));
else
token = DT_DELIM | DT_SPACE;
return token;
int *decimals)
{
long int value;
- size_t digit_cnt = ss_get_long (s, &value);
+ size_t n_digits = ss_get_long (s, &value);
enum date_token token = 0;
- if (ss_match_char (s, settings_get_decimal_char (FMT_F))
+ if (ss_match_byte (s, settings_get_fmt_settings ()->decimal)
&& tokens_seen & DT_COLON
&& value <= 59)
{
else
token = DT_DAY_COUNT;
- if (digit_cnt == 2)
+ if (n_digits == 2)
{
token |= DT_YEAR;
if (value <= 59)
token |= DT_MINUTE | DT_SECOND;
}
- else if (digit_cnt == 4)
+ else if (n_digits == 4)
token |= DT_YEAR;
}