From d30e5930595659a5fe23de0cf7316cc4eb87b162 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 2 Jul 2011 17:32:16 +0200 Subject: [PATCH] Quick Cluster: Avoid crash when presented with missing values --- src/language/stats/quick-cluster.c | 19 ++++++++++++++ tests/language/stats/quick-cluster.at | 38 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/language/stats/quick-cluster.c b/src/language/stats/quick-cluster.c index 9adcc642..c61be3b7 100644 --- a/src/language/stats/quick-cluster.c +++ b/src/language/stats/quick-cluster.c @@ -46,6 +46,13 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid +enum missing_type + { + MISS_LISTWISE, + MISS_PAIRWISE, + }; + + struct qc { const struct variable **vars; @@ -55,6 +62,9 @@ struct qc int maxiter; /* Maximum iterations (Given by the user) */ const struct variable *wv; /* Weighting variable. */ + + enum missing_type missing_type; + enum mv_class exclude; }; /* Holds all of the information for the functions. int n, holds the number of @@ -279,6 +289,7 @@ kmeans_calculate_indexes_and_check_convergence (struct Kmeans *kmeans, const str struct ccase *index_case_new = case_create (kmeans->proto); int bestindex = kmeans_get_nearest_group (kmeans, c, qc); double weight = qc->wv ? case_data (c, qc->wv)->f : 1.0; + assert (bestindex < kmeans->num_elements_groups->size); kmeans->num_elements_groups->data[bestindex] += weight; if (kmeans->index_rdr) { @@ -487,6 +498,7 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) const struct dictionary *dict = dataset_dict (ds); qc.ngroups = 2; qc.maxiter = 2; + qc.missing_type = MISS_LISTWISE; if (!parse_variables_const (lexer, dict, &qc.vars, &qc.n_vars, PV_NO_DUPLICATE | PV_NUMERIC)) @@ -536,6 +548,13 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) while (casegrouper_get_next_group (grouper, &group)) { + if ( qc.missing_type == MISS_LISTWISE ) + { + group = casereader_create_filter_missing (group, qc.vars, qc.n_vars, + qc.exclude, + NULL, NULL); + } + kmeans = kmeans_create (&qc); kmeans_cluster (kmeans, group, &qc); quick_cluster_show_results (kmeans, &qc); diff --git a/tests/language/stats/quick-cluster.at b/tests/language/stats/quick-cluster.at index b2195a47..05688e8e 100644 --- a/tests/language/stats/quick-cluster.at +++ b/tests/language/stats/quick-cluster.at @@ -115,3 +115,41 @@ AT_CHECK([diff pspp-w.csv pspp-unw.csv], [0]) AT_CLEANUP +AT_SETUP([QUICK CLUSTER with listwise missing]) +AT_DATA([quick-miss.sps], [dnl +data list notable list /x *. +begin data. +1 +1 +2 +3 +4 +. +2 +end data. + +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +]) + +AT_CHECK([pspp -o pspp-m.csv quick-miss.sps]) + +AT_DATA([quick-nmiss.sps], [dnl +data list notable list /x *. +begin data. +1 +1 +2 +3 +4 +2 +end data. + +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +]) + +AT_CHECK([pspp -o pspp-nm.csv quick-nmiss.sps]) + +AT_CHECK([diff pspp-m.csv pspp-nm.csv], [0]) + + +AT_CLEANUP -- 2.30.2