X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Finpt-pgm.c;h=b9e2c326feb86374b6e17f7afc70568b38d61e63;hb=60c545e6e958d868db3399a8989d37d8f9e0c131;hp=59d7bfa72c2d6abcf2fa6a3ff5ac6d4e90e9ba56;hpb=43b1296aafe7582e7dbe6c2b6a8b478d7d9b0fcf;p=pspp diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index 59d7bfa72c..b9e2c326fe 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,71 +16,61 @@ #include -#include - #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/case.h" +#include "data/caseinit.h" +#include "data/casereader-provider.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/session.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/expressions/public.h" +#include "language/lexer/lexer.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) -/* Private result codes for use within INPUT PROGRAM. */ -enum cmd_result_extensions - { - CMD_END_INPUT_PROGRAM = CMD_PRIVATE_FIRST, - CMD_END_CASE - }; - /* Indicates how a `union value' should be initialized. */ -enum value_init_type - { - INP_NUMERIC = 01, /* Numeric. */ - INP_STRING = 0, /* String. */ - - INP_INIT_ONCE = 02, /* Initialize only once. */ - INP_REINIT = 0, /* Reinitialize for each iteration. */ - }; - struct input_program_pgm { - struct trns_chain *trns_chain; - enum trns_result restart; + struct session *session; + struct dataset *ds; + + struct trns_chain xforms; + size_t idx; + bool eof; casenumber case_nr; /* Incremented by END CASE transformation. */ struct caseinit *init; - size_t value_cnt; + struct caseproto *proto; }; static void destroy_input_program (struct input_program_pgm *); -static trns_proc_func end_case_trns_proc; -static trns_proc_func reread_trns_proc; -static trns_proc_func end_file_trns_proc; -static trns_free_func reread_trns_free; +static const struct trns_class end_case_trns_class; +static const struct trns_class reread_trns_class; +static const struct trns_class end_file_trns_class; static const struct casereader_class input_program_casereader_class; static bool inside_input_program; +static bool saw_END_CASE; +static bool saw_END_FILE; +static bool saw_DATA_LIST; /* Returns true if we're parsing the inside of a INPUT PROGRAM...END INPUT PROGRAM construct, false otherwise. */ @@ -90,124 +80,130 @@ in_input_program (void) return inside_input_program; } +void +data_list_seen (void) +{ + saw_DATA_LIST = true; +} + /* Emits an END CASE transformation for INP. */ static void -emit_END_CASE (struct dataset *ds, struct input_program_pgm *inp) +emit_END_CASE (struct dataset *ds) { - add_transformation (ds, end_case_trns_proc, NULL, inp); + add_transformation (ds, &end_case_trns_class, xzalloc (sizeof (bool))); } int cmd_input_program (struct lexer *lexer, struct dataset *ds) { - struct input_program_pgm *inp; - bool saw_END_CASE = false; - - proc_discard_active_file (ds); - if (lex_token (lexer) != '.') + if (!lex_match (lexer, T_ENDCMD)) return lex_end_of_command (lexer); - inp = xmalloc (sizeof *inp); - inp->trns_chain = NULL; - inp->init = NULL; + struct session *session = session_create (dataset_session (ds)); + struct dataset *inp_ds = dataset_create (session, "INPUT PROGRAM"); + + struct input_program_pgm *inp = xmalloc (sizeof *inp); + *inp = (struct input_program_pgm) { .session = session, .ds = inp_ds }; + proc_push_transformations (inp->ds); inside_input_program = true; - for (;;) + saw_END_CASE = saw_END_FILE = saw_DATA_LIST = false; + while (!lex_match_phrase (lexer, "END INPUT PROGRAM")) { - enum cmd_result result = cmd_parse_in_state (lexer, ds, CMD_STATE_INPUT_PROGRAM); - if (result == CMD_END_INPUT_PROGRAM) - break; - else if (result == CMD_END_CASE) - { - emit_END_CASE (ds, inp); - saw_END_CASE = true; - } - else if (cmd_result_is_failure (result) && result != CMD_FAILURE) + enum cmd_result result; + + result = cmd_parse_in_state (lexer, inp->ds, CMD_STATE_INPUT_PROGRAM); + if (result == CMD_EOF + || result == CMD_FINISH + || result == CMD_CASCADING_FAILURE) { + proc_pop_transformations (inp->ds, &inp->xforms); + if (result == CMD_EOF) - msg (SE, _("Unexpected end-of-file within INPUT PROGRAM.")); + msg (SE, _("Unexpected end-of-file within %s."), "INPUT PROGRAM"); inside_input_program = false; - proc_discard_active_file (ds); destroy_input_program (inp); return result; } } if (!saw_END_CASE) - emit_END_CASE (ds, inp); + emit_END_CASE (inp->ds); inside_input_program = false; + proc_pop_transformations (inp->ds, &inp->xforms); - if (dict_get_next_value_idx (dataset_dict (ds)) == 0) + if (!saw_DATA_LIST && !saw_END_FILE) + { + msg (SE, _("Input program must contain %s or %s."), "DATA LIST", "END FILE"); + destroy_input_program (inp); + return CMD_FAILURE; + } + if (dict_get_next_value_idx (dataset_dict (inp->ds)) == 0) { msg (SE, _("Input program did not create any variables.")); - proc_discard_active_file (ds); destroy_input_program (inp); return CMD_FAILURE; } - inp->trns_chain = proc_capture_transformations (ds); - trns_chain_finalize (inp->trns_chain); - - inp->restart = TRNS_CONTINUE; - /* Figure out how to initialize each input case. */ inp->init = caseinit_create (); - caseinit_mark_for_init (inp->init, dataset_dict (ds)); - inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds)); + caseinit_mark_for_init (inp->init, dataset_dict (inp->ds)); + inp->proto = caseproto_ref (dict_get_proto (dataset_dict (inp->ds))); - proc_set_active_file_data ( - ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX, + dataset_set_dict (ds, dict_clone (dataset_dict (inp->ds))); + dataset_set_source ( + ds, casereader_create_sequential (NULL, inp->proto, CASENUMBER_MAX, &input_program_casereader_class, inp)); return CMD_SUCCESS; } -int -cmd_end_input_program (struct lexer *lexer UNUSED, struct dataset *ds UNUSED) -{ - assert (in_input_program ()); - return CMD_END_INPUT_PROGRAM; -} - -/* Returns true if STATE is valid given the transformations that - are allowed within INPUT PROGRAM. */ -static bool -is_valid_state (enum trns_result state) -{ - return (state == TRNS_CONTINUE - || state == TRNS_ERROR - || state == TRNS_END_FILE - || state >= 0); -} - -/* Reads one case into C. - Returns true if successful, false at end of file or if an +/* Reads and returns one case. + Returns the case if successful, null at end of file or if an I/O error occurred. */ -static bool -input_program_casereader_read (struct casereader *reader UNUSED, void *inp_, - struct ccase *c) +static struct ccase * +input_program_casereader_read (struct casereader *reader UNUSED, void *inp_) { struct input_program_pgm *inp = inp_; - case_create (c, inp->value_cnt); + if (inp->eof || !inp->xforms.n) + return NULL; + + struct ccase *c = case_create (inp->proto); + caseinit_init_vars (inp->init, c); - do + for (size_t i = inp->idx < inp->xforms.n ? inp->idx : 0; ; i++) { - assert (is_valid_state (inp->restart)); - if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) + if (i >= inp->xforms.n) { - case_destroy (c); - return false; + i = 0; + c = case_unshare (c); + caseinit_update_left_vars (inp->init, c); + caseinit_init_vars (inp->init, c); } - caseinit_init_vars (inp->init, c); - inp->restart = trns_chain_execute (inp->trns_chain, inp->restart, - c, inp->case_nr); - assert (is_valid_state (inp->restart)); - caseinit_update_left_vars (inp->init, c); + const struct transformation *trns = &inp->xforms.xforms[i]; + switch (trns->class->execute (trns->aux, &c, inp->case_nr)) + { + case TRNS_END_CASE: + inp->case_nr++; + inp->idx = i; + return c; + + case TRNS_ERROR: + casereader_force_error (reader); + /* Fall through. */ + case TRNS_END_FILE: + inp->eof = true; + case_unref (c); + return NULL; + + case TRNS_CONTINUE: + break; + + default: + NOT_REACHED (); + } } - while (inp->restart < 0); - - return true; } static void @@ -215,8 +211,10 @@ destroy_input_program (struct input_program_pgm *pgm) { if (pgm != NULL) { - trns_chain_destroy (pgm->trns_chain); + session_destroy (pgm->session); + trns_chain_uninit (&pgm->xforms); caseinit_destroy (pgm->init); + caseproto_unref (pgm->proto); free (pgm); } } @@ -226,8 +224,6 @@ static void input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_) { struct input_program_pgm *inp = inp_; - if (inp->restart == TRNS_ERROR) - casereader_force_error (reader); destroy_input_program (inp); } @@ -240,24 +236,38 @@ static const struct casereader_class input_program_casereader_class = }; int -cmd_end_case (struct lexer *lexer, struct dataset *ds UNUSED) +cmd_end_case (struct lexer *lexer UNUSED, struct dataset *ds) { assert (in_input_program ()); - if (lex_token (lexer) == '.') - return CMD_END_CASE; - return lex_end_of_command (lexer); + emit_END_CASE (ds); + saw_END_CASE = true; + return CMD_SUCCESS; } /* Outputs the current case */ -int -end_case_trns_proc (void *inp_, struct ccase *c UNUSED, +static enum trns_result +end_case_trns_proc (void *resume_, struct ccase **c UNUSED, casenumber case_nr UNUSED) { - struct input_program_pgm *inp = inp_; - inp->case_nr++; - return TRNS_END_CASE; + bool *resume = resume_; + enum trns_result retval = *resume ? TRNS_CONTINUE : TRNS_END_CASE; + *resume = !*resume; + return retval; } +static bool +end_case_trns_free (void *resume) +{ + free (resume); + return true; +} + +static const struct trns_class end_case_trns_class = { + .name = "END CASE", + .execute = end_case_trns_proc, + .destroy = end_case_trns_free, +}; + /* REREAD transformation. */ struct reread_trns { @@ -272,55 +282,70 @@ cmd_reread (struct lexer *lexer, struct dataset *ds) struct file_handle *fh; /* File to be re-read. */ struct expression *e; /* Expression for column to set. */ struct reread_trns *t; /* Created transformation. */ + char *encoding = NULL; fh = fh_get_default_handle (); e = NULL; - while (lex_token (lexer) != '.') + while (lex_token (lexer) != T_ENDCMD) { if (lex_match_id (lexer, "COLUMN")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (e) { - msg (SE, _("COLUMN subcommand multiply specified.")); - expr_free (e); - return CMD_CASCADING_FAILURE; + lex_sbc_only_once ("COLUMN"); + goto error; } - e = expr_parse (lexer, ds, EXPR_NUMBER); + e = expr_parse (lexer, ds, VAL_NUMERIC); if (!e) - return CMD_CASCADING_FAILURE; + goto error; } else if (lex_match_id (lexer, "FILE")) { - lex_match (lexer, '='); - fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL); if (fh == NULL) - { - expr_free (e); - return CMD_CASCADING_FAILURE; - } + goto error; + } + else if (lex_match_id (lexer, "ENCODING")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + + free (encoding); + encoding = ss_xstrdup (lex_tokss (lexer)); + + lex_get (lexer); } else { lex_error (lexer, NULL); - expr_free (e); - return CMD_CASCADING_FAILURE; + goto error; } } t = xmalloc (sizeof *t); - t->reader = dfm_open_reader (fh, lexer); + t->reader = dfm_open_reader (fh, lexer, encoding); t->column = e; - add_transformation (ds, reread_trns_proc, reread_trns_free, t); + add_transformation (ds, &reread_trns_class, t); + fh_unref (fh); + free (encoding); return CMD_SUCCESS; + +error: + expr_free (e); + free (encoding); + return CMD_CASCADING_FAILURE; } /* Executes a REREAD transformation. */ -static int -reread_trns_proc (void *t_, struct ccase *c, casenumber case_num) +static enum trns_result +reread_trns_proc (void *t_, struct ccase **c, casenumber case_num) { struct reread_trns *t = t_; @@ -328,8 +353,8 @@ reread_trns_proc (void *t_, struct ccase *c, casenumber case_num) dfm_reread_record (t->reader, 1); else { - double column = expr_evaluate_num (t->column, c, case_num); - if (!finite (column) || column < 1) + double column = expr_evaluate_num (t->column, *c, case_num); + if (!isfinite (column) || column < 1) { msg (SE, _("REREAD: Column numbers must be positive finite " "numbers. Column set to 1.")); @@ -352,21 +377,33 @@ reread_trns_free (void *t_) return true; } +static const struct trns_class reread_trns_class = { + .name = "REREAD", + .execute = reread_trns_proc, + .destroy = reread_trns_free, +}; + /* Parses END FILE command. */ int -cmd_end_file (struct lexer *lexer, struct dataset *ds) +cmd_end_file (struct lexer *lexer UNUSED, struct dataset *ds) { assert (in_input_program ()); - add_transformation (ds, end_file_trns_proc, NULL, NULL); + add_transformation (ds, &end_file_trns_class, NULL); + saw_END_FILE = true; - return lex_end_of_command (lexer); + return CMD_SUCCESS; } /* Executes an END FILE transformation. */ -static int -end_file_trns_proc (void *trns_ UNUSED, struct ccase *c UNUSED, +static enum trns_result +end_file_trns_proc (void *trns_ UNUSED, struct ccase **c UNUSED, casenumber case_num UNUSED) { return TRNS_END_FILE; } + +static const struct trns_class end_file_trns_class = { + .name = "END FILE", + .execute = end_file_trns_proc, +};