X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Finpt-pgm.c;h=b9e2c326feb86374b6e17f7afc70568b38d61e63;hb=60c545e6e958d868db3399a8989d37d8f9e0c131;hp=0581249a841e6edd4ba8a15257a2ab2a29979ed3;hpb=c646c399bf8c942a5e33abaa6c12336429c09f24;p=pspp diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index 0581249a84..b9e2c326fe 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -1,338 +1,273 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include - #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/case.h" +#include "data/caseinit.h" +#include "data/casereader-provider.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/session.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-reader.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/inpt-pgm.h" +#include "language/expressions/public.h" +#include "language/lexer/lexer.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) -/* Private result codes for use within INPUT PROGRAM. */ -enum cmd_result_extensions - { - CMD_END_INPUT_PROGRAM = CMD_PRIVATE_FIRST, - CMD_END_CASE - }; - /* Indicates how a `union value' should be initialized. */ -enum value_init_type +struct input_program_pgm { - INP_NUMERIC = 01, /* Numeric. */ - INP_STRING = 0, /* String. */ - - INP_INIT_ONCE = 02, /* Initialize only once. */ - INP_REINIT = 0, /* Reinitialize for each iteration. */ - }; + struct session *session; + struct dataset *ds; -struct input_program_pgm - { - size_t case_nr; /* Incremented by END CASE transformation. */ - write_case_func *write_case;/* Called by END CASE. */ - write_case_data wc_data; /* Aux data used by END CASE. */ + struct trns_chain xforms; + size_t idx; + bool eof; + + casenumber case_nr; /* Incremented by END CASE transformation. */ - enum value_init_type *init; /* How to initialize each `union value'. */ - size_t init_cnt; /* Number of elements in inp_init. */ - size_t case_size; /* Size of case in bytes. */ + struct caseinit *init; + struct caseproto *proto; }; static void destroy_input_program (struct input_program_pgm *); -static trns_proc_func end_case_trns_proc; -static trns_proc_func reread_trns_proc; -static trns_proc_func end_file_trns_proc; -static trns_free_func reread_trns_free; +static const struct trns_class end_case_trns_class; +static const struct trns_class reread_trns_class; +static const struct trns_class end_file_trns_class; -static const struct case_source_class input_program_source_class; +static const struct casereader_class input_program_casereader_class; static bool inside_input_program; +static bool saw_END_CASE; +static bool saw_END_FILE; +static bool saw_DATA_LIST; /* Returns true if we're parsing the inside of a INPUT PROGRAM...END INPUT PROGRAM construct, false otherwise. */ bool -in_input_program (void) +in_input_program (void) { return inside_input_program; } +void +data_list_seen (void) +{ + saw_DATA_LIST = true; +} + /* Emits an END CASE transformation for INP. */ static void -emit_END_CASE (struct input_program_pgm *inp) +emit_END_CASE (struct dataset *ds) { - add_transformation (end_case_trns_proc, NULL, inp); + add_transformation (ds, &end_case_trns_class, xzalloc (sizeof (bool))); } int -cmd_input_program (void) +cmd_input_program (struct lexer *lexer, struct dataset *ds) { - struct input_program_pgm *inp; - size_t i; - bool saw_END_CASE = false; + if (!lex_match (lexer, T_ENDCMD)) + return lex_end_of_command (lexer); - discard_variables (); - if (token != '.') - return lex_end_of_command (); + struct session *session = session_create (dataset_session (ds)); + struct dataset *inp_ds = dataset_create (session, "INPUT PROGRAM"); - inp = xmalloc (sizeof *inp); - inp->init = NULL; - + struct input_program_pgm *inp = xmalloc (sizeof *inp); + *inp = (struct input_program_pgm) { .session = session, .ds = inp_ds }; + + proc_push_transformations (inp->ds); inside_input_program = true; - for (;;) + saw_END_CASE = saw_END_FILE = saw_DATA_LIST = false; + while (!lex_match_phrase (lexer, "END INPUT PROGRAM")) { enum cmd_result result; - lex_get (); - result = cmd_parse (CMD_STATE_INPUT_PROGRAM); - if (result == CMD_END_INPUT_PROGRAM) - break; - else if (result == CMD_END_CASE) - { - emit_END_CASE (inp); - saw_END_CASE = true; - } - else if (cmd_result_is_failure (result) && result != CMD_FAILURE) + + result = cmd_parse_in_state (lexer, inp->ds, CMD_STATE_INPUT_PROGRAM); + if (result == CMD_EOF + || result == CMD_FINISH + || result == CMD_CASCADING_FAILURE) { + proc_pop_transformations (inp->ds, &inp->xforms); + if (result == CMD_EOF) - msg (SE, _("Unexpected end-of-file within INPUT PROGRAM.")); + msg (SE, _("Unexpected end-of-file within %s."), "INPUT PROGRAM"); inside_input_program = false; - discard_variables (); destroy_input_program (inp); return result; } } if (!saw_END_CASE) - emit_END_CASE (inp); + emit_END_CASE (inp->ds); inside_input_program = false; + proc_pop_transformations (inp->ds, &inp->xforms); - if (dict_get_next_value_idx (default_dict) == 0) + if (!saw_DATA_LIST && !saw_END_FILE) + { + msg (SE, _("Input program must contain %s or %s."), "DATA LIST", "END FILE"); + destroy_input_program (inp); + return CMD_FAILURE; + } + if (dict_get_next_value_idx (dataset_dict (inp->ds)) == 0) { msg (SE, _("Input program did not create any variables.")); - discard_variables (); destroy_input_program (inp); return CMD_FAILURE; } - - /* Mark the boundary between INPUT PROGRAM transformations and - ordinary transformations. */ - f_trns = n_trns; /* Figure out how to initialize each input case. */ - inp->init_cnt = dict_get_next_value_idx (default_dict); - inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init); - for (i = 0; i < inp->init_cnt; i++) - inp->init[i] = -1; - for (i = 0; i < dict_get_var_cnt (default_dict); i++) - { - struct variable *var = dict_get_var (default_dict, i); - enum value_init_type value_init; - size_t j; - - value_init = var->type == NUMERIC ? INP_NUMERIC : INP_STRING; - value_init |= var->leave ? INP_INIT_ONCE : INP_REINIT; - - for (j = 0; j < var->nv; j++) - inp->init[j + var->fv] = value_init; - } - for (i = 0; i < inp->init_cnt; i++) - assert (inp->init[i] != -1); - inp->case_size = dict_get_case_size (default_dict); + inp->init = caseinit_create (); + caseinit_mark_for_init (inp->init, dataset_dict (inp->ds)); + inp->proto = caseproto_ref (dict_get_proto (dataset_dict (inp->ds))); - /* Create vfm_source. */ - vfm_source = create_case_source (&input_program_source_class, inp); + dataset_set_dict (ds, dict_clone (dataset_dict (inp->ds))); + dataset_set_source ( + ds, casereader_create_sequential (NULL, inp->proto, CASENUMBER_MAX, + &input_program_casereader_class, inp)); return CMD_SUCCESS; } -int -cmd_end_input_program (void) +/* Reads and returns one case. + Returns the case if successful, null at end of file or if an + I/O error occurred. */ +static struct ccase * +input_program_casereader_read (struct casereader *reader UNUSED, void *inp_) { - assert (in_input_program ()); - return CMD_END_INPUT_PROGRAM; -} - -/* Initializes case C. Called before the first case is read. */ -static void -init_case (const struct input_program_pgm *inp, struct ccase *c) -{ - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - case_data_rw (c, i)->f = 0.0; - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - case INP_STRING | INP_REINIT: - memset (case_data_rw (c, i)->s, ' ', sizeof case_data_rw (c, i)->s); - break; - default: - assert (0); - } -} + struct input_program_pgm *inp = inp_; -/* Clears case C. Called between reading successive records. */ -static void -clear_case (const struct input_program_pgm *inp, struct ccase *c) -{ - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - break; - case INP_STRING | INP_REINIT: - memset (case_data_rw (c, i)->s, ' ', sizeof case_data_rw (c, i)->s); - break; - default: - assert (0); - } -} + if (inp->eof || !inp->xforms.n) + return NULL; -/* Executes each transformation in turn on a `blank' case. - Returns true if successful, false if an I/O error occurred. */ -static bool -input_program_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) -{ - struct input_program_pgm *inp = source->aux; + struct ccase *c = case_create (inp->proto); + caseinit_init_vars (inp->init, c); - inp->case_nr = 1; - inp->write_case = write_case; - inp->wc_data = wc_data; - for (init_case (inp, c); ; clear_case (inp, c)) + for (size_t i = inp->idx < inp->xforms.n ? inp->idx : 0; ; i++) { - int i; - - /* Perform transformations on `blank' case. */ - for (i = 0; i < f_trns; ) - { - int code; - - code = t_trns[i].proc (t_trns[i].private, c, inp->case_nr); - switch (code) - { - case TRNS_CONTINUE: - i++; - break; - - case TRNS_DROP_CASE: - break; - - case TRNS_ERROR: - return false; - - case TRNS_NEXT_CASE: - goto next_case; - - case TRNS_END_FILE: - return true; + if (i >= inp->xforms.n) + { + i = 0; + c = case_unshare (c); + caseinit_update_left_vars (inp->init, c); + caseinit_init_vars (inp->init, c); + } - default: - i = code; - break; - } - } - next_case: ; + const struct transformation *trns = &inp->xforms.xforms[i]; + switch (trns->class->execute (trns->aux, &c, inp->case_nr)) + { + case TRNS_END_CASE: + inp->case_nr++; + inp->idx = i; + return c; + + case TRNS_ERROR: + casereader_force_error (reader); + /* Fall through. */ + case TRNS_END_FILE: + inp->eof = true; + case_unref (c); + return NULL; + + case TRNS_CONTINUE: + break; + + default: + NOT_REACHED (); + } } } static void -destroy_input_program (struct input_program_pgm *pgm) +destroy_input_program (struct input_program_pgm *pgm) { - if (pgm != NULL) + if (pgm != NULL) { - free (pgm->init); + session_destroy (pgm->session); + trns_chain_uninit (&pgm->xforms); + caseinit_destroy (pgm->init); + caseproto_unref (pgm->proto); free (pgm); } } -/* Destroys an INPUT PROGRAM source. */ +/* Destroys the casereader. */ static void -input_program_source_destroy (struct case_source *source) +input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_) { - struct input_program_pgm *inp = source->aux; - + struct input_program_pgm *inp = inp_; destroy_input_program (inp); } -static const struct case_source_class input_program_source_class = +static const struct casereader_class input_program_casereader_class = { - "INPUT PROGRAM", + input_program_casereader_read, + input_program_casereader_destroy, + NULL, NULL, - input_program_source_read, - input_program_source_destroy, }; int -cmd_end_case (void) +cmd_end_case (struct lexer *lexer UNUSED, struct dataset *ds) { assert (in_input_program ()); - if (token == '.') - return CMD_END_CASE; - return lex_end_of_command (); + emit_END_CASE (ds); + saw_END_CASE = true; + return CMD_SUCCESS; } -/* Sends the current case as the source's output. */ -int -end_case_trns_proc (void *inp_, struct ccase *c, int case_nr UNUSED) +/* Outputs the current case */ +static enum trns_result +end_case_trns_proc (void *resume_, struct ccase **c UNUSED, + casenumber case_nr UNUSED) { - struct input_program_pgm *inp = inp_; - - if (!inp->write_case (inp->wc_data)) - return TRNS_ERROR; + bool *resume = resume_; + enum trns_result retval = *resume ? TRNS_CONTINUE : TRNS_END_CASE; + *resume = !*resume; + return retval; +} - inp->case_nr++; - clear_case (inp, c); - return TRNS_CONTINUE; +static bool +end_case_trns_free (void *resume) +{ + free (resume); + return true; } +static const struct trns_class end_case_trns_class = { + .name = "END CASE", + .execute = end_case_trns_proc, + .destroy = end_case_trns_free, +}; + /* REREAD transformation. */ struct reread_trns { @@ -342,60 +277,75 @@ struct reread_trns /* Parses REREAD command. */ int -cmd_reread (void) +cmd_reread (struct lexer *lexer, struct dataset *ds) { struct file_handle *fh; /* File to be re-read. */ struct expression *e; /* Expression for column to set. */ struct reread_trns *t; /* Created transformation. */ + char *encoding = NULL; fh = fh_get_default_handle (); e = NULL; - while (token != '.') + while (lex_token (lexer) != T_ENDCMD) { - if (lex_match_id ("COLUMN")) + if (lex_match_id (lexer, "COLUMN")) { - lex_match ('='); - + lex_match (lexer, T_EQUALS); + if (e) { - msg (SE, _("COLUMN subcommand multiply specified.")); - expr_free (e); - return CMD_CASCADING_FAILURE; + lex_sbc_only_once ("COLUMN"); + goto error; } - - e = expr_parse (default_dict, EXPR_NUMBER); + + e = expr_parse (lexer, ds, VAL_NUMERIC); if (!e) - return CMD_CASCADING_FAILURE; + goto error; } - else if (lex_match_id ("FILE")) + else if (lex_match_id (lexer, "FILE")) { - lex_match ('='); - fh = fh_parse (FH_REF_FILE | FH_REF_INLINE); + lex_match (lexer, T_EQUALS); + fh_unref (fh); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL); if (fh == NULL) - { - expr_free (e); - return CMD_CASCADING_FAILURE; - } - lex_get (); + goto error; + } + else if (lex_match_id (lexer, "ENCODING")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + + free (encoding); + encoding = ss_xstrdup (lex_tokss (lexer)); + + lex_get (lexer); } else { - lex_error (NULL); - expr_free (e); + lex_error (lexer, NULL); + goto error; } } t = xmalloc (sizeof *t); - t->reader = dfm_open_reader (fh); + t->reader = dfm_open_reader (fh, lexer, encoding); t->column = e; - add_transformation (reread_trns_proc, reread_trns_free, t); + add_transformation (ds, &reread_trns_class, t); + fh_unref (fh); + free (encoding); return CMD_SUCCESS; + +error: + expr_free (e); + free (encoding); + return CMD_CASCADING_FAILURE; } /* Executes a REREAD transformation. */ -static int -reread_trns_proc (void *t_, struct ccase *c, int case_num) +static enum trns_result +reread_trns_proc (void *t_, struct ccase **c, casenumber case_num) { struct reread_trns *t = t_; @@ -403,8 +353,8 @@ reread_trns_proc (void *t_, struct ccase *c, int case_num) dfm_reread_record (t->reader, 1); else { - double column = expr_evaluate_num (t->column, c, case_num); - if (!finite (column) || column < 1) + double column = expr_evaluate_num (t->column, *c, case_num); + if (!isfinite (column) || column < 1) { msg (SE, _("REREAD: Column numbers must be positive finite " "numbers. Column set to 1.")); @@ -427,21 +377,33 @@ reread_trns_free (void *t_) return true; } +static const struct trns_class reread_trns_class = { + .name = "REREAD", + .execute = reread_trns_proc, + .destroy = reread_trns_free, +}; + /* Parses END FILE command. */ int -cmd_end_file (void) +cmd_end_file (struct lexer *lexer UNUSED, struct dataset *ds) { assert (in_input_program ()); - add_transformation (end_file_trns_proc, NULL, NULL); + add_transformation (ds, &end_file_trns_class, NULL); + saw_END_FILE = true; - return lex_end_of_command (); + return CMD_SUCCESS; } /* Executes an END FILE transformation. */ -static int -end_file_trns_proc (void *trns_ UNUSED, struct ccase *c UNUSED, - int case_num UNUSED) +static enum trns_result +end_file_trns_proc (void *trns_ UNUSED, struct ccase **c UNUSED, + casenumber case_num UNUSED) { return TRNS_END_FILE; } + +static const struct trns_class end_file_trns_class = { + .name = "END FILE", + .execute = end_file_trns_proc, +};