X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Finpt-pgm.c;h=de8b810052671d9997d6cbdbea76988f00574c94;hb=2be9bee9da6a2ce27715e58128569594319abfa2;hp=1fcf0f0963a66780df88498ff89d84a6f66fe41a;hpb=77e551d23575da6b89f866612ab39c2b0497c9be;p=pspp-builds.git diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index 1fcf0f09..de8b8100 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -1,309 +1,255 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include + #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include "data/case.h" +#include "data/caseinit.h" +#include "data/casereader-provider.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/expressions/public.h" +#include "language/lexer/lexer.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) -/* Indicates how a `union value' should be initialized. */ -enum value_init_type +/* Private result codes for use within INPUT PROGRAM. */ +enum cmd_result_extensions { - INP_NUMERIC = 01, /* Numeric. */ - INP_STRING = 0, /* String. */ - - INP_INIT_ONCE = 02, /* Initialize only once. */ - INP_REINIT = 0, /* Reinitialize for each iteration. */ + CMD_END_CASE = CMD_PRIVATE_FIRST }; -struct input_program_pgm +/* Indicates how a `union value' should be initialized. */ +struct input_program_pgm { - enum value_init_type *init; /* How to initialize each `union value'. */ - size_t init_cnt; /* Number of elements in inp_init. */ - size_t case_size; /* Size of case in bytes. */ + struct trns_chain *trns_chain; + enum trns_result restart; + + casenumber case_nr; /* Incremented by END CASE transformation. */ + + struct caseinit *init; + struct caseproto *proto; }; -static trns_proc_func end_case_trns_proc, reread_trns_proc, end_file_trns_proc; +static void destroy_input_program (struct input_program_pgm *); +static trns_proc_func end_case_trns_proc; +static trns_proc_func reread_trns_proc; +static trns_proc_func end_file_trns_proc; static trns_free_func reread_trns_free; -int -cmd_input_program (void) -{ - discard_variables (); +static const struct casereader_class input_program_casereader_class; - /* FIXME: we shouldn't do this here, but I'm afraid that other - code will check the class of vfm_source. */ - vfm_source = create_case_source (&input_program_source_class, NULL); +static bool inside_input_program; - return lex_end_of_command (); +/* Returns true if we're parsing the inside of a INPUT + PROGRAM...END INPUT PROGRAM construct, false otherwise. */ +bool +in_input_program (void) +{ + return inside_input_program; +} + +/* Emits an END CASE transformation for INP. */ +static void +emit_END_CASE (struct dataset *ds, struct input_program_pgm *inp) +{ + add_transformation (ds, end_case_trns_proc, NULL, inp); } int -cmd_end_input_program (void) +cmd_input_program (struct lexer *lexer, struct dataset *ds) { struct input_program_pgm *inp; - size_t i; + bool saw_END_CASE = false; - if (!case_source_is_class (vfm_source, &input_program_source_class)) + proc_discard_active_file (ds); + if (!lex_match (lexer, T_ENDCMD)) + return lex_end_of_command (lexer); + + inp = xmalloc (sizeof *inp); + inp->trns_chain = NULL; + inp->init = NULL; + inp->proto = NULL; + + inside_input_program = true; + while (!lex_match_phrase (lexer, "END INPUT PROGRAM")) { - msg (SE, _("No matching INPUT PROGRAM command.")); - return CMD_CASCADING_FAILURE; - } - - if (dict_get_next_value_idx (default_dict) == 0) - msg (SW, _("No data-input or transformation commands specified " - "between INPUT PROGRAM and END INPUT PROGRAM.")); + enum cmd_result result; - /* Mark the boundary between INPUT PROGRAM transformations and - ordinary transformations. */ - f_trns = n_trns; + result = cmd_parse_in_state (lexer, ds, CMD_STATE_INPUT_PROGRAM); + if (result == CMD_END_CASE) + { + emit_END_CASE (ds, inp); + saw_END_CASE = true; + } + else if (cmd_result_is_failure (result) && result != CMD_FAILURE) + { + if (result == CMD_EOF) + msg (SE, _("Unexpected end-of-file within INPUT PROGRAM.")); + inside_input_program = false; + proc_discard_active_file (ds); + destroy_input_program (inp); + return result; + } + } + if (!saw_END_CASE) + emit_END_CASE (ds, inp); + inside_input_program = false; - /* Figure out how to initialize each input case. */ - inp = xmalloc (sizeof *inp); - inp->init_cnt = dict_get_next_value_idx (default_dict); - inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init); - for (i = 0; i < inp->init_cnt; i++) - inp->init[i] = -1; - for (i = 0; i < dict_get_var_cnt (default_dict); i++) + if (dict_get_next_value_idx (dataset_dict (ds)) == 0) { - struct variable *var = dict_get_var (default_dict, i); - enum value_init_type value_init; - size_t j; - - value_init = var->type == NUMERIC ? INP_NUMERIC : INP_STRING; - value_init |= var->reinit ? INP_REINIT : INP_INIT_ONCE; - - for (j = 0; j < var->nv; j++) - inp->init[j + var->fv] = value_init; + msg (SE, _("Input program did not create any variables.")); + proc_discard_active_file (ds); + destroy_input_program (inp); + return CMD_FAILURE; } - for (i = 0; i < inp->init_cnt; i++) - assert (inp->init[i] != -1); - inp->case_size = dict_get_case_size (default_dict); - /* Put inp into vfm_source for later use. */ - vfm_source->aux = inp; + inp->trns_chain = proc_capture_transformations (ds); + trns_chain_finalize (inp->trns_chain); - return lex_end_of_command (); -} + inp->restart = TRNS_CONTINUE; -/* Initializes case C. Called before the first case is read. */ -static void -init_case (const struct input_program_pgm *inp, struct ccase *c) -{ - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - case_data_rw (c, i)->f = 0.0; - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - case INP_STRING | INP_REINIT: - memset (case_data_rw (c, i)->s, ' ', sizeof case_data_rw (c, i)->s); - break; - default: - assert (0); - } + /* Figure out how to initialize each input case. */ + inp->init = caseinit_create (); + caseinit_mark_for_init (inp->init, dataset_dict (ds)); + inp->proto = caseproto_ref (dict_get_proto (dataset_dict (ds))); + + proc_set_active_file_data ( + ds, casereader_create_sequential (NULL, inp->proto, CASENUMBER_MAX, + &input_program_casereader_class, inp)); + + return CMD_SUCCESS; } -/* Clears case C. Called between reading successive records. */ -static void -clear_case (const struct input_program_pgm *inp, struct ccase *c) +int +cmd_end_input_program (struct lexer *lexer UNUSED, struct dataset *ds UNUSED) { - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - break; - case INP_STRING | INP_REINIT: - memset (case_data_rw (c, i)->s, ' ', sizeof case_data_rw (c, i)->s); - break; - default: - assert (0); - } + /* Inside INPUT PROGRAM, this should get caught at the top of the loop in + cmd_input_program(). + + Outside of INPUT PROGRAM, the command parser should reject this + command. */ + NOT_REACHED (); } -/* Executes each transformation in turn on a `blank' case. - Returns true if successful, false if an I/O error occurred. */ +/* Returns true if STATE is valid given the transformations that + are allowed within INPUT PROGRAM. */ static bool -input_program_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) +is_valid_state (enum trns_result state) { - struct input_program_pgm *inp = source->aux; - size_t i; - - /* Nonzero if there were any END CASE commands in the set of - transformations. If so, we don't automatically write out - cases. */ - int end_case = 0; - - /* FIXME? This is the number of cases sent out of the input - program, not the number of cases written to the procedure. - The difference should only show up in $CASENUM in COMPUTE. - We should check behavior against SPSS. */ - int cases_written = 0; - - assert (inp != NULL); - - /* Figure end_case. */ - for (i = 0; i < f_trns; i++) - if (t_trns[i].proc == end_case_trns_proc) - end_case = 1; - - /* FIXME: This is an ugly kluge. */ - for (i = 0; i < f_trns; i++) - if (t_trns[i].proc == repeating_data_trns_proc) - repeating_data_set_write_case (t_trns[i].private, write_case, wc_data); - - init_case (inp, c); - for (;;) - { - /* Perform transformations on `blank' case. */ - for (i = 0; i < f_trns; ) - { - int code; - - if (t_trns[i].proc == end_case_trns_proc) - { - cases_written++; - if (!write_case (wc_data)) - return false; - clear_case (inp, c); - i++; - continue; - } - - code = t_trns[i].proc (t_trns[i].private, c, cases_written + 1); - switch (code) - { - case TRNS_CONTINUE: - i++; - break; - - case TRNS_DROP_CASE: - abort (); - - case TRNS_ERROR: - return false; - - case TRNS_NEXT_CASE: - goto next_case; - - case TRNS_END_FILE: - return true; + return (state == TRNS_CONTINUE + || state == TRNS_ERROR + || state == TRNS_END_FILE + || state >= 0); +} - default: - i = code; - break; - } - } +/* Reads and returns one case. + Returns the case if successful, null at end of file or if an + I/O error occurred. */ +static struct ccase * +input_program_casereader_read (struct casereader *reader UNUSED, void *inp_) +{ + struct input_program_pgm *inp = inp_; + struct ccase *c = case_create (inp->proto); - /* Write the case if appropriate. */ - if (!end_case) + do + { + assert (is_valid_state (inp->restart)); + if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) { - cases_written++; - if (!write_case (wc_data)) - return false; + case_unref (c); + return NULL; } - /* Blank out the case for the next iteration. */ - next_case: - clear_case (inp, c); + c = case_unshare (c); + caseinit_init_vars (inp->init, c); + inp->restart = trns_chain_execute (inp->trns_chain, inp->restart, + &c, inp->case_nr); + assert (is_valid_state (inp->restart)); + caseinit_update_left_vars (inp->init, c); } + while (inp->restart < 0); + + return c; } -/* Destroys an INPUT PROGRAM source. */ static void -input_program_source_destroy (struct case_source *source) +destroy_input_program (struct input_program_pgm *pgm) { - struct input_program_pgm *inp = source->aux; - - cancel_transformations (); - - if (inp != NULL) + if (pgm != NULL) { - free (inp->init); - free (inp); + trns_chain_destroy (pgm->trns_chain); + caseinit_destroy (pgm->init); + caseproto_unref (pgm->proto); + free (pgm); } } -const struct case_source_class input_program_source_class = +/* Destroys the casereader. */ +static void +input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_) +{ + struct input_program_pgm *inp = inp_; + if (inp->restart == TRNS_ERROR) + casereader_force_error (reader); + destroy_input_program (inp); +} + +static const struct casereader_class input_program_casereader_class = { - "INPUT PROGRAM", + input_program_casereader_read, + input_program_casereader_destroy, + NULL, NULL, - input_program_source_read, - input_program_source_destroy, }; int -cmd_end_case (void) +cmd_end_case (struct lexer *lexer, struct dataset *ds UNUSED) { - if (!case_source_is_class (vfm_source, &input_program_source_class)) - { - msg (SE, _("This command may only be executed between INPUT PROGRAM " - "and END INPUT PROGRAM.")); - return CMD_CASCADING_FAILURE; - } - - add_transformation (end_case_trns_proc, NULL, NULL); - - return lex_end_of_command (); + assert (in_input_program ()); + if (lex_token (lexer) == T_ENDCMD) + return CMD_END_CASE; + return CMD_SUCCESS; } -/* Should never be called, because this is handled in - input_program_source_read(). */ +/* Outputs the current case */ int -end_case_trns_proc (void *trns_ UNUSED, struct ccase *c UNUSED, - int case_num UNUSED) +end_case_trns_proc (void *inp_, struct ccase **c UNUSED, + casenumber case_nr UNUSED) { - abort (); + struct input_program_pgm *inp = inp_; + inp->case_nr++; + return TRNS_END_CASE; } /* REREAD transformation. */ @@ -315,7 +261,7 @@ struct reread_trns /* Parses REREAD command. */ int -cmd_reread (void) +cmd_reread (struct lexer *lexer, struct dataset *ds) { struct file_handle *fh; /* File to be re-read. */ struct expression *e; /* Expression for column to set. */ @@ -323,52 +269,54 @@ cmd_reread (void) fh = fh_get_default_handle (); e = NULL; - while (token != '.') + while (lex_token (lexer) != T_ENDCMD) { - if (lex_match_id ("COLUMN")) + if (lex_match_id (lexer, "COLUMN")) { - lex_match ('='); - + lex_match (lexer, T_EQUALS); + if (e) { - msg (SE, _("COLUMN subcommand multiply specified.")); + msg (SE, _("%s subcommand may be given at most once."), "COLUMN"); expr_free (e); return CMD_CASCADING_FAILURE; } - - e = expr_parse (default_dict, EXPR_NUMBER); + + e = expr_parse (lexer, ds, EXPR_NUMBER); if (!e) return CMD_CASCADING_FAILURE; } - else if (lex_match_id ("FILE")) + else if (lex_match_id (lexer, "FILE")) { - lex_match ('='); - fh = fh_parse (FH_REF_FILE | FH_REF_INLINE); + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); if (fh == NULL) { expr_free (e); return CMD_CASCADING_FAILURE; } - lex_get (); } else { - lex_error (NULL); + lex_error (lexer, NULL); expr_free (e); + return CMD_CASCADING_FAILURE; } } t = xmalloc (sizeof *t); - t->reader = dfm_open_reader (fh); + t->reader = dfm_open_reader (fh, lexer); t->column = e; - add_transformation (reread_trns_proc, reread_trns_free, t); + add_transformation (ds, reread_trns_proc, reread_trns_free, t); + fh_unref (fh); return CMD_SUCCESS; } /* Executes a REREAD transformation. */ static int -reread_trns_proc (void *t_, struct ccase *c, int case_num) +reread_trns_proc (void *t_, struct ccase **c, casenumber case_num) { struct reread_trns *t = t_; @@ -376,8 +324,8 @@ reread_trns_proc (void *t_, struct ccase *c, int case_num) dfm_reread_record (t->reader, 1); else { - double column = expr_evaluate_num (t->column, c, case_num); - if (!finite (column) || column < 1) + double column = expr_evaluate_num (t->column, *c, case_num); + if (!isfinite (column) || column < 1) { msg (SE, _("REREAD: Column numbers must be positive finite " "numbers. Column set to 1.")); @@ -402,24 +350,19 @@ reread_trns_free (void *t_) /* Parses END FILE command. */ int -cmd_end_file (void) +cmd_end_file (struct lexer *lexer UNUSED, struct dataset *ds) { - if (!case_source_is_class (vfm_source, &input_program_source_class)) - { - msg (SE, _("This command may only be executed between INPUT PROGRAM " - "and END INPUT PROGRAM.")); - return CMD_CASCADING_FAILURE; - } + assert (in_input_program ()); - add_transformation (end_file_trns_proc, NULL, NULL); + add_transformation (ds, end_file_trns_proc, NULL, NULL); - return lex_end_of_command (); + return CMD_SUCCESS; } /* Executes an END FILE transformation. */ static int -end_file_trns_proc (void *trns_ UNUSED, struct ccase *c UNUSED, - int case_num UNUSED) +end_file_trns_proc (void *trns_ UNUSED, struct ccase **c UNUSED, + casenumber case_num UNUSED) { return TRNS_END_FILE; }