From: Ben Pfaff Date: Sun, 6 Jan 2013 20:57:23 +0000 (-0800) Subject: Merge 'master' into 'psppsheet'. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fbuilds%2F20130127032105%2Fpspp;hp=6f3865480503c571963d8a2d1af858a4d72d4e88;p=pspp Merge 'master' into 'psppsheet'. --- diff --git a/Smake b/Smake index 72ec791a5e..1e54533169 100644 --- a/Smake +++ b/Smake @@ -81,8 +81,7 @@ GNULIB_MODULES = \ unicase/u8-tolower \ unicase/u8-toupper \ unictype/ctype-print \ - unictype/property-id-continue \ - unictype/property-id-start \ + unictype/category-of \ unigbrk/uc-is-grapheme-break \ unilbrk/u8-possible-linebreaks \ uninorm/nfkd \ diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index a66f442729..0895f641a8 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -60,7 +60,7 @@ typedef struct fmt_spec output_format ; /* A thin wrapper around sfm_writer */ -struct sysfile_info +struct syswriter_info { bool opened; @@ -68,7 +68,7 @@ struct sysfile_info struct casewriter *writer; /* A pointer to the dictionary. Owned externally */ - const struct dictionary *dict; + const struct pspp_dict *dict; /* The scalar containing the dictionary */ SV *dict_sv; @@ -84,9 +84,21 @@ struct sysreader_info struct casereader *reader; /* A pointer to the dictionary. */ - struct dictionary *dict; + struct pspp_dict *dict; +}; + + +struct input_format { + struct hmap_node hmap_node; /* In struct pspp_dict's input_formats map. */ + const struct variable *var; + struct fmt_spec input_format; }; +/* A thin wrapper around struct dictionary.*/ +struct pspp_dict { + struct dictionary *dict; + struct hmap input_formats; /* Contains struct input_format. */ +}; /* A message handler which writes messages to PSPP::errstr */ @@ -98,15 +110,15 @@ message_handler (const struct msg *m, void *aux) } static int -sysfile_close (struct sysfile_info *sfi) +sysfile_close (struct syswriter_info *swi) { int retval ; - if ( ! sfi->opened ) + if ( ! swi->opened ) return 0; - retval = casewriter_destroy (sfi->writer); + retval = casewriter_destroy (swi->writer); if (retval > 0 ) - sfi->opened = false; + swi->opened = false; return retval; } @@ -150,14 +162,6 @@ value_to_scalar (const union value *val, const struct variable *var) } -static void -var_set_input_format (struct variable *v, input_format ip_fmt) -{ - struct fmt_spec *if_copy = malloc (sizeof (*if_copy)); - memcpy (if_copy, &ip_fmt, sizeof (ip_fmt)); - var_attach_aux (v, if_copy, var_dtor_free); -} - static void make_value_from_scalar (union value *uv, SV *val, const struct variable *var) { @@ -165,6 +169,28 @@ make_value_from_scalar (union value *uv, SV *val, const struct variable *var) scalar_to_value (uv, val, var); } +static struct pspp_dict * +create_pspp_dict (struct dictionary *dict) +{ + struct pspp_dict *pspp_dict = xmalloc (sizeof *pspp_dict); + pspp_dict->dict = dict; + hmap_init (&pspp_dict->input_formats); + return pspp_dict; +} + +static const struct fmt_spec * +find_input_format (const struct pspp_dict *dict, const struct variable *var) +{ + struct input_format *input_format; + + HMAP_FOR_EACH_IN_BUCKET (input_format, struct input_format, hmap_node, + hash_pointer (var, 0), &dict->input_formats) + if (input_format->var == var) + return &input_format->input_format; + + return NULL; +} + MODULE = PSPP @@ -223,94 +249,106 @@ RETVAL MODULE = PSPP PACKAGE = PSPP::Dict -struct dictionary * +struct pspp_dict * pxs_dict_new() CODE: - RETVAL = dict_create ("UTF-8"); + RETVAL = create_pspp_dict (dict_create ("UTF-8")); OUTPUT: RETVAL void DESTROY (dict) - struct dictionary *dict + struct pspp_dict *dict CODE: - dict_destroy (dict); + if (dict != NULL) + { + struct input_format *input_format, *next_input_format; + HMAP_FOR_EACH_SAFE (input_format, next_input_format, + struct input_format, hmap_node, &dict->input_formats) + { + hmap_delete (&dict->input_formats, &input_format->hmap_node); + free (input_format); + } + hmap_destroy (&dict->input_formats); + dict_destroy (dict->dict); + free (dict); + } int get_var_cnt (dict) - struct dictionary *dict + struct pspp_dict *dict CODE: - RETVAL = dict_get_var_cnt (dict); + RETVAL = dict_get_var_cnt (dict->dict); OUTPUT: RETVAL void set_label (dict, label) - struct dictionary *dict + struct pspp_dict *dict char *label CODE: - dict_set_label (dict, label); + dict_set_label (dict->dict, label); void set_documents (dict, docs) - struct dictionary *dict + struct pspp_dict *dict char *docs CODE: - dict_set_documents_string (dict, docs); + dict_set_documents_string (dict->dict, docs); void add_document (dict, doc) - struct dictionary *dict + struct pspp_dict *dict char *doc CODE: - dict_add_document_line (dict, doc, false); + dict_add_document_line (dict->dict, doc, false); void clear_documents (dict) - struct dictionary *dict + struct pspp_dict *dict CODE: - dict_clear_documents (dict); + dict_clear_documents (dict->dict); void set_weight (dict, var) - struct dictionary *dict + struct pspp_dict *dict struct variable *var CODE: - dict_set_weight (dict, var); + dict_set_weight (dict->dict, var); struct variable * pxs_get_variable (dict, idx) - struct dictionary *dict + struct pspp_dict *dict SV *idx INIT: SV *errstr = get_sv("PSPP::errstr", TRUE); sv_setpv (errstr, ""); - if ( SvIV (idx) >= dict_get_var_cnt (dict)) + if ( SvIV (idx) >= dict_get_var_cnt (dict->dict)) { sv_setpv (errstr, "The dictionary doesn't have that many variables."); XSRETURN_UNDEF; } CODE: - RETVAL = dict_get_var (dict, SvIV (idx)); + RETVAL = dict_get_var (dict->dict, SvIV (idx)); OUTPUT: RETVAL struct variable * pxs_get_var_by_name (dict, name) - struct dictionary *dict + struct pspp_dict *dict const char *name INIT: SV *errstr = get_sv("PSPP::errstr", TRUE); sv_setpv (errstr, ""); CODE: - struct variable *var = dict_lookup_var (dict, name); + struct variable *var = dict_lookup_var (dict->dict, name); if ( ! var ) sv_setpv (errstr, "No such variable."); RETVAL = var; @@ -323,7 +361,7 @@ MODULE = PSPP PACKAGE = PSPP::Var struct variable * pxs_dict_create_var (dict, name, ip_fmt) - struct dictionary * dict + struct pspp_dict * dict char *name input_format ip_fmt INIT: @@ -336,10 +374,11 @@ INIT: } CODE: struct fmt_spec op_fmt; + struct input_format *input_format; struct variable *v; op_fmt = fmt_for_output_from_input (&ip_fmt); - v = dict_create_var (dict, name, + v = dict_create_var (dict->dict, name, fmt_is_string (op_fmt.type) ? op_fmt.w : 0); if ( NULL == v ) { @@ -347,7 +386,13 @@ CODE: XSRETURN_UNDEF; } var_set_both_formats (v, &op_fmt); - var_set_input_format (v, ip_fmt); + + input_format = xmalloc (sizeof *input_format); + input_format->var = v; + input_format->input_format = ip_fmt; + hmap_insert (&dict->input_formats, &input_format->hmap_node, + hash_pointer (v, 0)); + RETVAL = v; OUTPUT: RETVAL @@ -567,10 +612,10 @@ RETVAL MODULE = PSPP PACKAGE = PSPP::Sysfile -struct sysfile_info * +struct syswriter_info * pxs_create_sysfile (name, dict, opts_hr) char *name - struct dictionary *dict; + struct pspp_dict *dict; SV *opts_hr INIT: SV *dict_sv = ST(1); @@ -593,36 +638,36 @@ INIT: CODE: struct file_handle *fh = fh_create_file (NULL, name, fh_default_properties () ); - struct sysfile_info *sfi = xmalloc (sizeof (*sfi)); - sfi->writer = sfm_open_writer (fh, dict, opts); - sfi->dict = dict; - sfi->opened = true; - sfi->dict_sv = dict_sv; - SvREFCNT_inc (sfi->dict_sv); + struct syswriter_info *swi = xmalloc (sizeof (*swi)); + swi->writer = sfm_open_writer (fh, dict->dict, opts); + swi->dict = dict; + swi->opened = true; + swi->dict_sv = dict_sv; + SvREFCNT_inc (swi->dict_sv); - RETVAL = sfi; + RETVAL = swi; OUTPUT: RETVAL int -close (sfi) - struct sysfile_info *sfi +close (swi) + struct syswriter_info *swi CODE: - RETVAL = sysfile_close (sfi); + RETVAL = sysfile_close (swi); OUTPUT: RETVAL void -DESTROY (sfi) - struct sysfile_info *sfi +DESTROY (swi) + struct syswriter_info *swi CODE: - sysfile_close (sfi); - SvREFCNT_dec (sfi->dict_sv); - free (sfi); + sysfile_close (swi); + SvREFCNT_dec (swi->dict_sv); + free (swi); int -append_case (sfi, ccase) - struct sysfile_info *sfi +append_case (swi, ccase) + struct syswriter_info *swi SV *ccase INIT: SV *errstr = get_sv("PSPP::errstr", TRUE); @@ -640,17 +685,18 @@ CODE: struct ccase *c; SV *sv; - if ( av_len (av_case) >= dict_get_var_cnt (sfi->dict)) + if ( av_len (av_case) >= dict_get_var_cnt (swi->dict->dict)) XSRETURN_UNDEF; - c = case_create (dict_get_proto (sfi->dict)); + c = case_create (dict_get_proto (swi->dict->dict)); - dict_get_vars (sfi->dict, &vv, &nv, 1u << DC_ORDINARY | 1u << DC_SYSTEM); + dict_get_vars (swi->dict->dict, &vv, &nv, + 1u << DC_ORDINARY | 1u << DC_SYSTEM); for (sv = av_shift (av_case); SvOK (sv); sv = av_shift (av_case)) { const struct variable *v = vv[i++]; - const struct fmt_spec *ifmt = var_get_aux (v); + const struct fmt_spec *ifmt = find_input_format (swi->dict, v); /* If an input format has been set, then use it. Otherwise just convert the raw value. @@ -663,7 +709,7 @@ CODE: error = data_in (ss, SvUTF8(sv) ? UTF8: "iso-8859-1", ifmt->type, case_data_rw (c, v), var_get_width (v), - dict_get_encoding (sfi->dict)); + dict_get_encoding (swi->dict->dict)); ok = error == NULL; free (error); @@ -680,13 +726,13 @@ CODE: } /* The remaining variables must be sysmis or blank string */ - while (i < dict_get_var_cnt (sfi->dict)) + while (i < dict_get_var_cnt (swi->dict->dict)) { const struct variable *v = vv[i++]; union value *val = case_data_rw (c, v); value_set_missing (val, var_get_width (v)); } - casewriter_write (sfi->writer, c); + casewriter_write (swi->writer, c); RETVAL = 1; finish: free (vv); @@ -706,22 +752,25 @@ CODE: struct sysreader_info *sri = NULL; struct file_handle *fh = fh_create_file (NULL, name, fh_default_properties () ); + struct dictionary *dict; sri = xmalloc (sizeof (*sri)); - sri->reader = sfm_open_reader (fh, NULL, &sri->dict, &sri->opts); + sri->reader = sfm_open_reader (fh, NULL, &dict, &sri->opts); - if ( NULL == sri->reader) - { - free (sri); - sri = NULL; - } + if ( sri->reader != NULL) + sri->dict = create_pspp_dict (dict); + else + { + free (sri); + sri = NULL; + } RETVAL = sri; OUTPUT: RETVAL -struct dictionary * +struct pspp_dict * pxs_get_dict (reader) struct sysreader_info *reader; CODE: @@ -755,10 +804,10 @@ PPCODE: { int v; - EXTEND (SP, dict_get_var_cnt (sfr->dict)); - for (v = 0; v < dict_get_var_cnt (sfr->dict); ++v ) + EXTEND (SP, dict_get_var_cnt (sfr->dict->dict)); + for (v = 0; v < dict_get_var_cnt (sfr->dict->dict); ++v ) { - const struct variable *var = dict_get_var (sfr->dict, v); + const struct variable *var = dict_get_var (sfr->dict->dict, v); const union value *val = case_data (c, var); PUSHs (sv_2mortal (value_to_scalar (val, var))); diff --git a/perl-module/typemap b/perl-module/typemap index cd45c33958..23d478ba3a 100644 --- a/perl-module/typemap +++ b/perl-module/typemap @@ -1,7 +1,7 @@ TYPEMAP - struct dictionary * T_PTRREF + struct pspp_dict * T_PTRREF struct variable * T_PTRREF - struct sysfile_info * T_PTRREF + struct syswriter_info * T_PTRREF struct sysreader_info * T_PTRREF input_format INPUT_FMT_SPEC output_format OUTPUT_FMT_SPEC diff --git a/src/data/case-map.c b/src/data/case-map.c index c54e6753fe..3cbe24792a 100644 --- a/src/data/case-map.c +++ b/src/data/case-map.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +27,8 @@ #include "data/variable.h" #include "data/case.h" #include "libpspp/assertion.h" +#include "libpspp/hash-functions.h" +#include "libpspp/hmap.h" #include "gl/xalloc.h" @@ -209,57 +211,115 @@ case_map_to_compact_dict (const struct dictionary *d, return map; } + +struct stage_var + { + struct hmap_node hmap_node; /* In struct case_map_stage's 'stage_vars'. */ + const struct variable *var; + int case_index; + }; -/* Prepares dictionary D for producing a case map. Afterward, - the caller may delete, reorder, or rename variables within D - at will before using case_map_from_dict() to produce the case +struct case_map_stage + { + const struct dictionary *dict; + struct hmap stage_vars; + }; + +/* Prepares and returns a "struct case_map_stage" for producing a case map for + DICT. Afterward, the caller may delete, reorder, or rename variables within + DICT at will before using case_map_stage_get_case_map() to produce the case map. - Uses D's aux members, which must otherwise not be in use. */ -void -case_map_prepare_dict (const struct dictionary *d) + The caller must *not* add new variables to DICT. */ +struct case_map_stage * +case_map_stage_create (const struct dictionary *dict) { - size_t var_cnt = dict_get_var_cnt (d); + size_t n_vars = dict_get_var_cnt (dict); + struct case_map_stage *stage; size_t i; - for (i = 0; i < var_cnt; i++) + stage = xmalloc (sizeof *stage); + stage->dict = dict; + hmap_init (&stage->stage_vars); + + for (i = 0; i < n_vars; i++) { - struct variable *v = dict_get_var (d, i); - int *src_fv = xmalloc (sizeof *src_fv); - *src_fv = var_get_case_index (v); - var_attach_aux (v, src_fv, var_dtor_free); + const struct variable *var = dict_get_var (dict, i); + struct stage_var *stage_var; + + stage_var = xmalloc (sizeof *stage_var); + stage_var->var = var; + stage_var->case_index = var_get_case_index (var); + hmap_insert (&stage->stage_vars, &stage_var->hmap_node, + hash_pointer (var, 0)); + } + + return stage; +} + +/* Destroys STAGE, which was created by case_map_stage_create(). */ +void +case_map_stage_destroy (struct case_map_stage *stage) +{ + if (stage != NULL) + { + struct stage_var *stage_var, *next_stage_var; + + HMAP_FOR_EACH_SAFE (stage_var, next_stage_var, + struct stage_var, hmap_node, &stage->stage_vars) + { + hmap_delete (&stage->stage_vars, &stage_var->hmap_node); + free (stage_var); + } + hmap_destroy (&stage->stage_vars); + free (stage); } } -/* Produces a case map from dictionary D, which must have been - previously prepared with case_map_prepare_dict(). +static const struct stage_var * +case_map_stage_find_var (const struct case_map_stage *stage, + const struct variable *var) +{ + const struct stage_var *stage_var; + + HMAP_FOR_EACH_IN_BUCKET (stage_var, struct stage_var, hmap_node, + hash_pointer (var, 0), &stage->stage_vars) + if (stage_var->var == var) + return stage_var; + + /* If the following assertion is reached, it indicates a bug in the + case_map_stage client: the client allowed a new variable to be added to + the dictionary. This is not allowed, because of the risk that the new + varaible might have the same address as an old variable that has been + deleted. */ + NOT_REACHED (); +} - Does not retain any reference to D, and clears the aux members - set up by case_map_prepare_dict(). +/* Produces a case map from STAGE, which must have been previously created with + case_map_stage_create(). The case map maps from the original case index of + the variables in STAGE's dictionary to their current case indexes. - Returns the new case map, or a null pointer if no mapping is - required (that is, no data has changed position). */ + Returns the new case map, or a null pointer if no mapping is required (that + is, no data has changed position). */ struct case_map * -case_map_from_dict (const struct dictionary *d) +case_map_stage_get_case_map (const struct case_map_stage *stage) { struct case_map *map; - size_t var_cnt = dict_get_var_cnt (d); + size_t n_vars = dict_get_var_cnt (stage->dict); size_t n_values; size_t i; bool identity_map = true; - map = create_case_map (dict_get_proto (d)); - for (i = 0; i < var_cnt; i++) + map = create_case_map (dict_get_proto (stage->dict)); + for (i = 0; i < n_vars; i++) { - struct variable *v = dict_get_var (d, i); - int *src_fv = var_detach_aux (v); + const struct variable *var = dict_get_var (stage->dict, i); + const struct stage_var *stage_var = case_map_stage_find_var (stage, var); - if (var_get_case_index (v) != *src_fv) + if (var_get_case_index (var) != stage_var->case_index) identity_map = false; - insert_mapping (map, *src_fv, var_get_case_index (v)); - - free (src_fv); + insert_mapping (map, stage_var->case_index, var_get_case_index (var)); } if (identity_map) diff --git a/src/data/case-map.h b/src/data/case-map.h index cefbe7667a..0e9970a873 100644 --- a/src/data/case-map.h +++ b/src/data/case-map.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,8 +45,9 @@ struct casewriter *case_map_create_output_translator (struct case_map *, /* For mapping cases for one version of a dictionary to those in a modified version of the same dictionary. */ -void case_map_prepare_dict (const struct dictionary *); -struct case_map *case_map_from_dict (const struct dictionary *); +struct case_map_stage *case_map_stage_create (const struct dictionary *); +void case_map_stage_destroy (struct case_map_stage *); +struct case_map *case_map_stage_get_case_map (const struct case_map_stage *); /* For eliminating "holes" in a case. */ struct case_map *case_map_to_compact_dict (const struct dictionary *d, diff --git a/src/data/dictionary.c b/src/data/dictionary.c index d36f8c519f..024f97b3d5 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -279,17 +279,6 @@ dict_clear (struct dictionary *d) attrset_clear (&d->attributes); } -/* Destroys the aux data for every variable in D, by calling - var_clear_aux() for each variable. */ -void -dict_clear_aux (struct dictionary *d) -{ - int i; - - for (i = 0; i < d->var_cnt; i++) - var_clear_aux (d->var[i].var); -} - /* Clears a dictionary and destroys it. */ void dict_destroy (struct dictionary *d) @@ -600,9 +589,6 @@ dict_delete_var (struct dictionary *d, struct variable *v) assert (dict_contains_var (d, v)); - /* Delete aux data. */ - var_clear_aux (v); - dict_unset_split_var (d, v); dict_unset_mrset_var (d, v); diff --git a/src/data/dictionary.h b/src/data/dictionary.h index d50921201b..4050756799 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2004, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,7 +32,6 @@ struct dictionary *dict_clone (const struct dictionary *); /* Clearing and destroying dictionaries. */ void dict_clear (struct dictionary *); -void dict_clear_aux (struct dictionary *); void dict_destroy (struct dictionary *); /* Common ways to access variables. */ diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index a1a7415ca1..4ddd80be00 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -145,6 +145,7 @@ process_node (struct gnumeric_reader *r) if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_ELEMENT == r->node_type) { + ++r->sheet_index; r->state = STATE_SHEET_START; } break; @@ -154,21 +155,15 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_SHEET_NAME; } - else if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) - { - r->state = STATE_INIT; - } break; case STATE_SHEET_NAME: if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { - r->state = STATE_SHEET_START; + r->state = STATE_INIT; } else if (XML_READER_TYPE_TEXT == r->node_type) { - ++r->sheet_index; if ( r->target_sheet != NULL) { xmlChar *value = xmlTextReaderValue (r->xtr); @@ -229,7 +224,9 @@ process_node (struct gnumeric_reader *r) case STATE_CELL: if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_CELLS_START; + { + r->state = STATE_CELLS_START; + } break; default: break; @@ -299,7 +296,7 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic (xmlInputCloseCallback) gzclose, gz, NULL, NULL, 0); - if ( r->xtr == NULL) + if ( r->xtr == NULL ) goto error; if ( gri->cell_range ) @@ -422,10 +419,13 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } } - - /* Create the dictionary and populate it */ - *dict = r->dict = dict_create ( - CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr))); + { + const xmlChar *enc = xmlTextReaderConstEncoding (r->xtr); + if ( enc == NULL) + goto error; + /* Create the dictionary and populate it */ + *dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); + } for (i = 0 ; i < n_var_specs ; ++i ) { diff --git a/src/data/identifier.c b/src/data/identifier.c index a757b31e3a..6191f0db90 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -188,7 +188,13 @@ lex_is_idn (char c) bool lex_uc_is_id1 (ucs4_t uc) { - return is_ascii_id1 (uc) || (uc >= 0x80 && uc_is_property_id_start (uc)); + return (uc < 0x80 + ? is_ascii_id1 (uc) + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC may be a character in an identifier @@ -198,7 +204,12 @@ lex_uc_is_idn (ucs4_t uc) { return (uc < 0x80 ? is_ascii_id1 (uc) || isdigit (uc) || uc == '.' || uc == '_' - : uc >= 0x80 && uc_is_property_id_continue (uc)); + : (uc_is_general_category_withtable (uc, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_S | + UC_CATEGORY_MASK_N) + && uc != 0xfffc && uc != 0xfffd)); } /* Returns true if Unicode code point UC is a space that separates tokens. */ diff --git a/src/data/variable.c b/src/data/variable.c index fe4645ee41..285d9d88f7 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -72,10 +72,6 @@ struct variable char **short_names; size_t short_name_cnt; - /* Each command may use these fields as needed. */ - void *aux; - void (*aux_dtor) (struct variable *); - /* Custom attributes. */ struct attrset attributes; }; @@ -149,7 +145,6 @@ var_destroy (struct variable *v) assert (!var_has_vardict (v)); mv_destroy (&v->miss); var_clear_short_names (v); - var_clear_aux (v); val_labs_destroy (v->val_labs); var_clear_label (v); attrset_destroy (var_get_attributes (v)); @@ -968,63 +963,6 @@ var_get_case_index (const struct variable *v) return vardict_get_case_index (v->vardict); } -/* Returns V's auxiliary data, or a null pointer if none has been - attached. */ -void * -var_get_aux (const struct variable *v) -{ - return v->aux; -} - -/* Assign auxiliary data AUX to variable V, which must not - already have auxiliary data. Before V's auxiliary data is - cleared, AUX_DTOR(V) will be called. (var_dtor_free, below, - may be appropriate for use as AUX_DTOR.) */ -void * -var_attach_aux (const struct variable *v_, - void *aux, void (*aux_dtor) (struct variable *)) -{ - struct variable *v = CONST_CAST (struct variable *, v_); - assert (v->aux == NULL); - assert (aux != NULL); - v->aux = aux; - v->aux_dtor = aux_dtor; - return aux; -} - -/* Remove auxiliary data, if any, from V, and return it, without - calling any associated destructor. */ -void * -var_detach_aux (struct variable *v) -{ - void *aux = v->aux; - assert (aux != NULL); - v->aux = NULL; - return aux; -} - -/* Clears auxiliary data, if any, from V, and calls any - associated destructor. */ -void -var_clear_aux (struct variable *v) -{ - if (v->aux != NULL) - { - if (v->aux_dtor != NULL) - v->aux_dtor (v); - v->aux = NULL; - } -} - -/* This function is appropriate for use an auxiliary data - destructor (passed as AUX_DTOR to var_attach_aux()) for the - case where the auxiliary data should be passed to free(). */ -void -var_dtor_free (struct variable *v) -{ - free (v->aux); -} - /* Returns variable V's attribute set. The caller may examine or modify the attribute set, but must not destroy it. Destroying V, or calling var_set_attributes() on V, will also destroy its diff --git a/src/data/variable.h b/src/data/variable.h index aeed5a5e09..aef0d87c5b 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -157,14 +157,6 @@ void var_clear_short_names (struct variable *); size_t var_get_dict_index (const struct variable *); size_t var_get_case_index (const struct variable *); -/* Variable auxiliary data. */ -void *var_get_aux (const struct variable *); -void *var_attach_aux (const struct variable *, - void *aux, void (*aux_dtor) (struct variable *)); -void var_clear_aux (struct variable *); -void *var_detach_aux (struct variable *); -void var_dtor_free (struct variable *); - /* Custom attributes. */ struct attrset *var_get_attributes (const struct variable *); void var_set_attributes (struct variable *, const struct attrset *); diff --git a/src/language/command.c b/src/language/command.c index 0502ff9d00..5b376629d4 100644 --- a/src/language/command.c +++ b/src/language/command.c @@ -138,7 +138,6 @@ cmd_parse_in_state (struct lexer *lexer, struct dataset *ds, ds = session_active_dataset (session); assert (!proc_is_open (ds)); unset_cmd_algorithm (); - dict_clear_aux (dataset_dict (ds)); if (!dataset_end_of_command (ds)) result = CMD_CASCADING_FAILURE; diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index 789300015e..ac2944caee 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -227,10 +227,19 @@ parse_spreadsheet (struct lexer *lexer) else if (lex_match_id (lexer, "INDEX")) { sri->sheet_index = lex_integer (lexer); + if (sri->sheet_index <= 0) + { + msg (SE, _("The sheet index must be greater than or equal to 1")); + goto error; + } lex_get (lexer); } else - goto error; + { + msg (SE, _("%s must be followed by either \"%s\" or \"%s\"."), + "/SHEET", "NAME", "INDEX"); + goto error; + } } else if (lex_match_id (lexer, "CELLRANGE")) { @@ -249,7 +258,11 @@ parse_spreadsheet (struct lexer *lexer) lex_get (lexer); } else - goto error; + { + msg (SE, _("%s must be followed by either \"%s\" or \"%s\"."), + "/CELLRANGE", "FULL", "RANGE"); + goto error; + } } else if (lex_match_id (lexer, "READNAMES")) { @@ -264,7 +277,11 @@ parse_spreadsheet (struct lexer *lexer) sri->read_names = false; } else - goto error; + { + msg (SE, _("%s must be followed by either \"%s\" or \"%s\"."), + "/READNAMES", "ON", "OFF"); + goto error; + } } else { diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 35b894a750..1218a27b18 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -73,6 +73,7 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, struct file_handle *fh = NULL; struct dictionary *dict = NULL; struct case_map *map = NULL; + struct case_map_stage *stage = NULL; char *encoding = NULL; for (;;) @@ -125,7 +126,7 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, if (reader == NULL) goto error; - case_map_prepare_dict (dict); + stage = case_map_stage_create (dict); while (lex_token (lexer) != T_ENDCMD) { @@ -135,7 +136,8 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, } dict_compact_values (dict); - map = case_map_from_dict (dict); + map = case_map_stage_get_case_map (stage); + case_map_stage_destroy (stage); if (map != NULL) reader = case_map_create_input_translator (map, reader); @@ -147,6 +149,7 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, return CMD_SUCCESS; error: + case_map_stage_destroy (stage); fh_unref (fh); casereader_destroy (reader); if (dict != NULL) diff --git a/src/language/data-io/save-translate.c b/src/language/data-io/save-translate.c index bc68e553e1..6601f30b18 100644 --- a/src/language/data-io/save-translate.c +++ b/src/language/data-io/save-translate.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) enum { CSV_FILE = 1, TAB_FILE } type; struct dictionary *dict; + struct case_map_stage *stage; struct case_map *map; struct casewriter *writer; struct file_handle *handle; @@ -67,6 +68,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) type = 0; dict = dict_clone (dataset_dict (ds)); + stage = NULL; map = NULL; handle = NULL; @@ -81,7 +83,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) delimiter = 0; qualifier = '"'; - case_map_prepare_dict (dict); + stage = case_map_stage_create (dict); dict_delete_scratch_vars (dict); while (lex_match (lexer, T_SLASH)) @@ -271,7 +273,8 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) goto error; fh_unref (handle); - map = case_map_from_dict (dict); + map = case_map_stage_get_case_map (stage); + case_map_stage_destroy (stage); if (map != NULL) writer = case_map_create_output_translator (map, writer); dict_destroy (dict); @@ -283,6 +286,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: + case_map_stage_destroy (stage); fh_unref (handle); dict_destroy (dict); case_map_destroy (map); diff --git a/src/language/data-io/save.c b/src/language/data-io/save.c index b8031f71ea..e01a8c941e 100644 --- a/src/language/data-io/save.c +++ b/src/language/data-io/save.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -156,6 +156,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, struct file_handle *handle; /* Output file. */ struct dictionary *dict; /* Dictionary for output file. */ struct casewriter *writer; /* Writer. */ + struct case_map_stage *stage; /* Preparation for 'map'. */ struct case_map *map; /* Map from input data to data for writer. */ /* Common options. */ @@ -172,11 +173,12 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, handle = NULL; dict = dict_clone (dataset_dict (ds)); writer = NULL; + stage = NULL; map = NULL; sysfile_opts = sfm_writer_default_options (); porfile_opts = pfm_writer_default_options (); - case_map_prepare_dict (dict); + stage = case_map_stage_create (dict); dict_delete_scratch_vars (dict); lex_match (lexer, T_SLASH); @@ -301,7 +303,8 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, if (writer == NULL) goto error; - map = case_map_from_dict (dict); + map = case_map_stage_get_case_map (stage); + case_map_stage_destroy (stage); if (map != NULL) writer = case_map_create_output_translator (map, writer); dict_destroy (dict); @@ -310,6 +313,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, return writer; error: + case_map_stage_destroy (stage); fh_unref (handle); casewriter_destroy (writer); dict_destroy (dict); diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 67f5855939..05a6b6d3f3 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -123,6 +123,7 @@ enum /* A crosstabulation of 2 or more variables. */ struct pivot_table { + struct crosstabs_proc *proc; struct fmt_spec weight_format; /* Format for weight variable. */ double missing; /* Weight of missing cases. */ @@ -162,17 +163,13 @@ struct pivot_table /* Integer mode variable info. */ struct var_range { + struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */ + const struct variable *var; /* The variable. */ int min; /* Minimum value. */ int max; /* Maximum value + 1. */ int count; /* max - min. */ }; -static inline struct var_range * -get_var_range (const struct variable *v) -{ - return var_get_aux (v); -} - struct crosstabs_proc { const struct dictionary *dict; @@ -185,6 +182,7 @@ struct crosstabs_proc /* Variables specifies on VARIABLES. */ const struct variable **variables; size_t n_variables; + struct hmap var_ranges; /* TABLES. */ struct pivot_table *pivots; @@ -201,6 +199,9 @@ struct crosstabs_proc bool descending; /* True if descending sort order is requested. */ }; +const struct var_range *get_var_range (const struct crosstabs_proc *, + const struct variable *); + static bool should_tabulate_case (const struct pivot_table *, const struct ccase *, enum mv_class exclude); static void tabulate_general_case (struct pivot_table *, const struct ccase *, @@ -215,6 +216,7 @@ int cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { const struct variable *wv = dict_get_weight (dataset_dict (ds)); + struct var_range *range, *next_range; struct crosstabs_proc proc; struct casegrouper *grouper; struct casereader *input, *group; @@ -228,6 +230,7 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds) proc.bad_warn = true; proc.variables = NULL; proc.n_variables = 0; + hmap_init (&proc.var_ranges); proc.pivots = NULL; proc.n_pivots = 0; proc.descending = false; @@ -346,6 +349,12 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds) exit: free (proc.variables); + HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node, + &proc.var_ranges) + { + hmap_delete (&proc.var_ranges, &range->hmap_node); + free (range); + } for (pt = &proc.pivots[0]; pt < &proc.pivots[proc.n_pivots]; pt++) { free (pt->vars); @@ -427,6 +436,7 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct pivot_table *pt = &proc->pivots[proc->n_pivots++]; int j; + pt->proc = proc; pt->weight_format = proc->weight_format; pt->missing = 0.; pt->n_vars = n_by; @@ -513,11 +523,15 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, for (i = orig_nv; i < proc->n_variables; i++) { + const struct variable *var = proc->variables[i]; struct var_range *vr = xmalloc (sizeof *vr); + + vr->var = var; vr->min = min; vr->max = max + 1.; vr->count = max - min + 1; - var_attach_aux (proc->variables[i], vr, var_dtor_free); + hmap_insert (&proc->var_ranges, &vr->hmap_node, + hash_pointer (var, 0)); } if (lex_token (lexer) == T_SLASH) @@ -535,6 +549,22 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, /* Data file processing. */ +const struct var_range * +get_var_range (const struct crosstabs_proc *proc, const struct variable *var) +{ + if (!hmap_is_empty (&proc->var_ranges)) + { + const struct var_range *range; + + HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node, + hash_pointer (var, 0), &proc->var_ranges) + if (range->var == var) + return range; + } + + return NULL; +} + static bool should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, enum mv_class exclude) @@ -543,7 +573,7 @@ should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, for (j = 0; j < pt->n_vars; j++) { const struct variable *var = pt->vars[j]; - struct var_range *range = get_var_range (var); + const struct var_range *range = get_var_range (pt->proc, var); if (var_is_value_missing (var, case_data (c, var), exclude)) return false; @@ -1423,7 +1453,7 @@ enum_var_values (const struct pivot_table *pt, int var_idx, union value **valuesp, int *n_values, bool descending) { const struct variable *var = pt->vars[var_idx]; - struct var_range *range = get_var_range (var); + const struct var_range *range = get_var_range (pt->proc, var); union value *values; size_t i; diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 5d308f11ab..a20396397d 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1574,7 +1574,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { int i; casenumber imin = 0; - double imax = es[v].cc; + casenumber imax; struct casereader *reader; struct ccase *c; @@ -1592,6 +1592,8 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); es[v].sorted_writer = NULL; + imax = casereader_get_case_cnt (es[v].sorted_reader); + es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); for (i = 0; i < examine->calc_extremes; ++i) @@ -1604,7 +1606,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) (c = casereader_read (reader)) != NULL; case_unref (c)) { const double val = case_data_idx (c, EX_VAL)->f; - const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */ + const double wt = case_data_idx (c, EX_WT)->f; moments_pass_two (es[v].mom, val, wt); @@ -1620,15 +1622,15 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) min->val = val; value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } - imin += wt; + imin ++; } - imax -= wt; + imax --; if (imax < examine->calc_extremes) { int x; - for (x = imax; x < imax + wt; ++x) + for (x = imax; x < imax + 1; ++x) { struct extremity *max; @@ -1646,7 +1648,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) if (examine->calc_extremes > 0) { assert (es[v].minima[0].val == es[v].minimum); - assert (es[v].maxima[0].val == es[v].maximum); + assert (es[v].maxima[0].val == es[v].maximum); } { diff --git a/src/libpspp/hmap.h b/src/libpspp/hmap.h index c3cf62fa64..3f2858a8a8 100644 --- a/src/libpspp/hmap.h +++ b/src/libpspp/hmap.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -217,6 +217,30 @@ void hmap_moved (struct hmap *, : 0); \ (DATA) = (NEXT)) +/* These macros are like the *_WITH_HASH macros above, except that they don't + skip data elements that are in the same hash bucket but have different hash + values. This is a small optimization in code where comparing keys is just + as fast as comparing hashes (e.g. the key is an "int") or comparing keys + would duplicate comparing the hashes (e.g. the hash is the first word of a + multi-word random key). + + These macros evaluate HASH only once. They evaluate their + other arguments many times. */ +#define HMAP_FIRST_IN_BUCKET(STRUCT, MEMBER, HMAP, HASH) \ + HMAP_NULLABLE_DATA (hmap_first_in_bucket (HMAP, HASH), STRUCT, MEMBER) +#define HMAP_NEXT_IN_BUCKET(DATA, STRUCT, MEMBER) \ + HMAP_NULLABLE_DATA (hmap_next_in_bucket (&(DATA)->MEMBER), STRUCT, MEMBER) +#define HMAP_FOR_EACH_IN_BUCKET(DATA, STRUCT, MEMBER, HASH, HMAP) \ + for ((DATA) = HMAP_FIRST_IN_BUCKET (STRUCT, MEMBER, HMAP, HASH); \ + (DATA) != NULL; \ + (DATA) = HMAP_NEXT_IN_BUCKET (DATA, STRUCT, MEMBER)) +#define HMAP_FOR_EACH_IN_BUCKET_SAFE(DATA, NEXT, STRUCT, MEMBER, HASH, HMAP) \ + for ((DATA) = HMAP_FIRST_IN_BUCKET (STRUCT, MEMBER, HMAP, HASH); \ + ((DATA) != NULL \ + ? ((NEXT) = HMAP_NEXT_IN_BUCKET (DATA, STRUCT, MEMBER), 1) \ + : 0); \ + (DATA) = (NEXT)) + /* Convenience macros for iteration. These macros automatically use HMAP_DATA to obtain the data @@ -357,6 +381,48 @@ hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash) map->count++; } +/* Returns the first node in MAP in the bucket for HASH, or a null pointer if + that bucket in HASH is empty. + + This function runs in constant time. + + Nodes are returned in arbitrary order that may change whenever the hash + table's current capacity changes, as reported by hmap_capacity(). Calls to + hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of + a hash map. Inserting a node with hmap_insert_fast() or deleting one with + hmap_delete() will not change the relative ordering of nodes. + + The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide + convenient ways to iterate over all the nodes with a given hash. The + HMAP_FIRST_IN_BUCKET macro is an interface to this particular function that + is often more convenient. */ +static inline struct hmap_node * +hmap_first_in_bucket (const struct hmap *map, size_t hash) +{ + return map->buckets[hash & map->mask]; +} + +/* Returns the next node following NODE within the same bucket, or a null + pointer if NODE is the last node in its bucket. + + This function runs in constant time. + + Nodes are returned in arbitrary order that may change whenever the hash + table's current capacity changes, as reported by hmap_capacity(). Calls to + hmap_insert(), hmap_reserve(), and hmap_shrink() can change the capacity of + a hash map. Inserting a node with hmap_insert_fast() or deleting one with + hmap_delete() will not change the relative ordering of nodes. + + The HMAP_FOR_EACH_IN_BUCKET and HMAP_FOR_EACH_IN_BUCKET_SAFE macros provide + convenient ways to iterate over all the nodes with a given hash. The + HMAP_NEXT_IN_BUCKET macro is an interface to this particular function that + is often more convenient. */ +static inline struct hmap_node * +hmap_next_in_bucket (const struct hmap_node *node) +{ + return node->next; +} + /* Removes NODE from MAP. The client is responsible for freeing any data associated with NODE, if necessary. diff --git a/src/math/moments.c b/src/math/moments.c index 83cbbe4ca1..40180f7b7e 100644 --- a/src/math/moments.c +++ b/src/math/moments.c @@ -225,12 +225,8 @@ moments_calculate (const struct moments *m, } else { - /* After the second pass we can calculate any stat. We - don't support "online" computation during the second - pass, so As a simple self-check, the total weight for - the passes must agree. */ + /* After the second pass we can calculate any stat. */ assert (m->pass == 2); - assert (m->w1 == m->w2); if (m->w2 > 0.) { diff --git a/src/math/percentiles.c b/src/math/percentiles.c index 2063dd2c65..9055279b5a 100644 --- a/src/math/percentiles.c +++ b/src/math/percentiles.c @@ -48,8 +48,6 @@ percentile_calculate (const struct percentile *ptl, enum pc_alg alg) struct percentile *mutable = CONST_CAST (struct percentile *, ptl); const struct order_stats *os = &ptl->parent; - assert (os->cc == ptl->w); - if ( ptl->g1 == SYSMIS) mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1; diff --git a/src/math/trimmed-mean.c b/src/math/trimmed-mean.c index b985125295..2d44d0dc08 100644 --- a/src/math/trimmed-mean.c +++ b/src/math/trimmed-mean.c @@ -81,8 +81,6 @@ trimmed_mean_calculate (const struct trimmed_mean *tm) { const struct order_stats *os = (const struct order_stats *) tm; - assert (os->cc == tm->w); - return ( (os->k[0].cc_p1 - os->k[0].tc) * os->k[0].y_p1 diff --git a/tests/language/data-io/get-data-spreadsheet.at b/tests/language/data-io/get-data-spreadsheet.at index 19b8964dca..c9060aef5f 100644 --- a/tests/language/data-io/get-data-spreadsheet.at +++ b/tests/language/data-io/get-data-spreadsheet.at @@ -175,6 +175,153 @@ AT_BANNER([GET DATA Spreadsheet /TYPE=GNM]) CHECK_SPREADSHEET_READER([GNM]) +dnl Check for a bug where gnumeric files were interpreted incorrectly +AT_SETUP([GET DATA /TYPE=GNM sheet index bug]) +AT_DATA([minimal3.gnumeric],[dnl + + + + + Sheet1 + Sheet2 + Sheet3 + + + + Sheet1 + 2 + 3 + + + Print_Area + #REF! + A1 + + + Sheet_Title + "Sheet1" + A1 + + + + + + + + + + + Name + x + y + Sheet One + 1 + 2 + foo + 3 + 4 + bar + 5 + 6 + + + + Sheet2 + 2 + 2 + + + Print_Area + #REF! + A1 + + + Sheet_Title + "Sheet2" + A1 + + + + + + + + + + + + Comment + DOB + wealth + Sheet Two + 24/5/1966 + 0.02 + wee + 37145 + 3000 + + + + Sheet3 + 2 + 2 + + + Print_Area + #REF! + A1 + + + Sheet_Title + "Sheet3" + A1 + + + + + + + + + + + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + + + + +]) + +AT_DATA([gnum.sps], [dnl +GET DATA + /TYPE=GNM + /FILE='minimal3.gnumeric' + /SHEET=index 3 + /READNAMES=off + . + +LIST. +]) + +AT_CHECK([pspp -O format=csv gnum.sps], [0], [dnl +Table: Data List +VAR001,VAR002,VAR003 +3 ,4.00,5.00 +6 ,7.00,8.00 +9 ,10.00,11.00 +]) + + +AT_CLEANUP + AT_BANNER([GET DATA Spreadsheet /TYPE=ODS]) CHECK_SPREADSHEET_READER([ODS]) diff --git a/tests/language/expressions/evaluate.at b/tests/language/expressions/evaluate.at index 4512458afc..cd514fef93 100644 --- a/tests/language/expressions/evaluate.at +++ b/tests/language/expressions/evaluate.at @@ -3,12 +3,13 @@ m4_define([CHECK_EXPR_EVAL], AT_DATA([evaluate.sps], [set mxwarn 1000. set mxerr 1000. +set epoch 1940. m4_foreach([check], [m4_shift($@)], [DEBUG EVALUATE NOOPT m4_argn(4, check)/[]m4_car(check). DEBUG EVALUATE m4_argn(4, check)/[]m4_car(check). ])]) AT_CAPTURE_FILE([evaluate.sps]) - m4_pushdef([i], [2]) + m4_pushdef([i], [3]) AT_CHECK([pspp --testing-mode --error-file=- --no-output evaluate.sps], [m4_if(m4_bregexp([m4_foreach([check], [m4_shift($@)], [m4_argn(3, check)])], [error:]), [-1], [0], [1])], [stdout]) diff --git a/tests/language/stats/examine.at b/tests/language/stats/examine.at index 3663a1bb79..bad259d58e 100644 --- a/tests/language/stats/examine.at +++ b/tests/language/stats/examine.at @@ -36,10 +36,11 @@ EXAMINE /STATISTICS descriptives extreme(3) . ]) -AT_CHECK([pspp -o pspp.csv examine.sps]) + + dnl In the following data, only the extreme values have been checked. dnl The descriptives have been blindly pasted. -AT_CHECK([cat pspp.csv], [0], [dnl +AT_CHECK([pspp -O format=csv examine.sps], [0], [dnl Table: Reading free-form data from INLINE. Variable,Format QUALITY,F8.0 @@ -77,8 +78,8 @@ Breaking Strain,Highest,1,12,7.00 ,,2,16,6.00 ,,3,14,5.00 ,Lowest,1,3,1.00 -,,2,3,1.00 -,,3,4,1.00 +,,2,4,1.00 +,,3,2,2.00 Table: Descriptives ,,,Statistic,Std. Error @@ -110,11 +111,11 @@ Breaking Strain,Aspeger,Highest,1,6,4.00 ,,,2,5,4.00 ,,,3,1,3.00 ,,Lowest,1,3,1.00 -,,,2,3,1.00 -,,,3,4,1.00 +,,,2,4,1.00 +,,,3,2,2.00 ,Bloggs,Highest,1,7,5.00 ,,,2,9,4.00 -,,,3,9,4.00 +,,,3,11,3.00 ,,Lowest,1,8,2.00 ,,,2,10,2.00 ,,,3,11,3.00 @@ -123,7 +124,7 @@ Breaking Strain,Aspeger,Highest,1,6,4.00 ,,,3,14,5.00 ,,Lowest,1,15,3.00 ,,,2,13,4.00 -,,,3,13,4.00 +,,,3,14,5.00 Table: Descriptives ,Manufacturer,,,Statistic,Std. Error @@ -167,6 +168,7 @@ Breaking Strain,Aspeger,Mean,,2.25,.45 ,,Skewness,,.30,.75 ,,Kurtosis,,.15,1.48 ]) + AT_CLEANUP AT_SETUP([EXAMINE -- extremes]) @@ -202,8 +204,8 @@ examine v1 /statistics=extreme(6) . ]) -AT_CHECK([pspp -o pspp.csv examine.sps]) -AT_CHECK([cat pspp.csv], [0], [dnl + +AT_CHECK([pspp -O format=csv examine.sps], [0],[dnl Table: Case Processing Summary ,Cases,,,,, ,Valid,,Missing,,Total, @@ -215,16 +217,92 @@ Table: Extreme Values V1,Highest,1,21,20.00 ,,2,20,19.00 ,,3,19,18.00 -,,4,19,18.00 -,,5,18,17.00 -,,6,17,16.00 +,,4,18,17.00 +,,5,17,16.00 +,,6,16,15.00 ,Lowest,1,1,1.00 ,,2,2,2.00 ,,3,3,3.00 -,,4,3,3.00 -,,5,4,3.00 -,,6,5,4.00 +,,4,4,3.00 +,,5,5,4.00 +,,6,6,5.00 ]) + +AT_CLEANUP + + + +AT_SETUP([EXAMINE -- extremes with fractional weights]) +AT_DATA([extreme.sps], [dnl +set format=F20.3. +data list notable list /w * x *. +begin data. + 0.88 300000 + 0.86 320000 + 0.98 480000 + 0.93 960000 + 1.35 960000 + 1.31 960000 + 0.88 960000 + 0.88 1080000 + 0.88 1080000 + 0.95 1200000 + 1.47 1200000 + 0.93 1200000 + 0.98 1320000 + 1.31 1380000 + 0.93 1440000 + 0.88 1560000 + 1.56 1560000 + 1.47 1560000 +end data. + +weight by w. + + +EXAMINE + x + /STATISTICS = DESCRIPTIVES EXTREME (5) + . +]) + +AT_CHECK([pspp -O format=csv extreme.sps], [0], [dnl +Table: Case Processing Summary +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x,19.430,100%,.000,0%,19.430,100% + +Table: Extreme Values +,,,Case Number,Value +x,Highest,1,18,1560000.000 +,,2,17,1560000.000 +,,3,16,1560000.000 +,,4,15,1440000.000 +,,5,14,1380000.000 +,Lowest,1,1,300000.000 +,,2,2,320000.000 +,,3,3,480000.000 +,,4,4,960000.000 +,,5,5,960000.000 + +Table: Descriptives +,,,Statistic,Std. Error +x,Mean,,1120010.293,86222.178 +,95% Confidence Interval for Mean,Lower Bound,939166.693, +,,Upper Bound,1300853.894, +,5% Trimmed Mean,,1141017.899, +,Median,,1200000.000, +,Variance,,144447748124.869, +,Std. Deviation,,380062.821, +,Minimum,,300000.000, +,Maximum,,1560000.000, +,Range,,1260000.000, +,Interquartile Range,,467258.065, +,Skewness,,-.887,.519 +,Kurtosis,,.340,1.005 +]) + AT_CLEANUP dnl Test the PERCENTILES subcommand of the EXAMINE command.