X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fquick-cluster.c;h=b171951557f95c1a464aacc9e1dfcfabc8576dd6;hb=ccaf88efae9f6f4d93d7812834c412e8fd511bb2;hp=0a2b75de252b7e9e7f5405c923a2700fbb15d05b;hpb=654d2360a1dce57033dbf0030d17522d3c797987;p=pspp diff --git a/src/language/stats/quick-cluster.c b/src/language/stats/quick-cluster.c index 0a2b75de25..b171951557 100644 --- a/src/language/stats/quick-cluster.c +++ b/src/language/stats/quick-cluster.c @@ -160,7 +160,7 @@ diff_matrix (const gsl_matrix *m1, const gsl_matrix *m2) -static double +static double matrix_mindist (const gsl_matrix *m, int *mn, int *mm) { int i, j; @@ -190,20 +190,6 @@ matrix_mindist (const gsl_matrix *m, int *mn, int *mm) } -static void -dump_matrix (const gsl_matrix *m) -{ - size_t i, j; - - for (i = 0 ; i < m->size1; ++i) - { - for (j = 0 ; j < m->size2; ++j) - printf ("%02f ", gsl_matrix_get (m, i, j)); - printf ("\n"); - } -} - - /* Return the distance of C from the group whose index is WHICH */ static double dist_from_case (const struct Kmeans *kmeans, const struct ccase *c, const struct qc *qc, int which) @@ -215,10 +201,10 @@ dist_from_case (const struct Kmeans *kmeans, const struct ccase *c, const struct const union value *val = case_data (c, qc->vars[j]); if ( var_is_value_missing (qc->vars[j], val, qc->exclude)) NOT_REACHED (); - + dist += pow2 (gsl_matrix_get (kmeans->centers, which, j) - val->f); } - + return dist; } @@ -239,7 +225,7 @@ min_dist_from (const struct Kmeans *kmeans, const struct qc *qc, int which) { dist += pow2 (gsl_matrix_get (kmeans->centers, i, j) - gsl_matrix_get (kmeans->centers, which, j)); } - + if (dist < mindist) { mindist = dist; @@ -251,7 +237,7 @@ min_dist_from (const struct Kmeans *kmeans, const struct qc *qc, int which) -/* Calculate the intial cluster centers. */ +/* Calculate the initial cluster centers. */ static void kmeans_initial_centers (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc) { @@ -304,7 +290,7 @@ kmeans_initial_centers (struct Kmeans *kmeans, const struct casereader *reader, } } else if (dist_from_case (kmeans, c, qc, mp) > min_dist_from (kmeans, qc, mq)) - /* If the distance between C and the second nearest group (MP) is greater than the + /* If the distance between C and the second nearest group (MP) is greater than the smallest distance between the nearest group (MQ) and any other group, then replace MQ with C */ { @@ -422,7 +408,7 @@ kmeans_cluster (struct Kmeans *kmeans, struct casereader *reader, const struct q if ( var_is_value_missing (qc->vars[j], val, qc->exclude)) missing = true; } - + if (missing) continue; @@ -450,7 +436,7 @@ kmeans_cluster (struct Kmeans *kmeans, struct casereader *reader, const struct q double *x = gsl_matrix_ptr (kmeans->updated_centers, group, j); *x += val->f * (qc->wv ? case_data (c, qc->wv)->f : 1.0); } - } + } casereader_destroy (r); } @@ -467,21 +453,20 @@ kmeans_cluster (struct Kmeans *kmeans, struct casereader *reader, const struct q *x /= n + 1; // Plus 1 for the initial centers } } - + gsl_matrix_memcpy (kmeans->centers, kmeans->updated_centers); { kmeans->n = 0; - int i; /* Step 3 */ gsl_vector_long_set_all (kmeans->num_elements_groups, 0.0); gsl_matrix_set_all (kmeans->updated_centers, 0.0); struct ccase *c; struct casereader *cs = casereader_clone (reader); - for (; (c = casereader_read (cs)) != NULL; i++, case_unref (c)) + for (; (c = casereader_read (cs)) != NULL; case_unref (c)) { - int group = -1; + int group = -1; kmeans_get_nearest_group (kmeans, c, qc, &group, NULL, NULL, NULL); for (j = 0; j < qc->n_vars; ++j) @@ -497,8 +482,6 @@ kmeans_cluster (struct Kmeans *kmeans, struct casereader *reader, const struct q long *n = gsl_vector_long_ptr (kmeans->num_elements_groups, group); *n += qc->wv ? case_data (c, qc->wv)->f : 1.0; kmeans->n++; - - } casereader_destroy (cs); @@ -612,7 +595,7 @@ quick_cluster_show_membership (struct Kmeans *kmeans, const struct casereader *r for (i = 0; (c = casereader_read (cs)) != NULL; i++, case_unref (c)) { - int clust = -1; + int clust = -1; assert (i < kmeans->n); kmeans_get_nearest_group (kmeans, c, qc, &clust, NULL, NULL, NULL); clust = ip->data[clust]; @@ -662,7 +645,7 @@ static void quick_cluster_show_results (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc) { kmeans_order_groups (kmeans, qc); /* what does this do? */ - + if( qc->print_initial_clusters ) quick_cluster_show_centers (kmeans, true, qc); quick_cluster_show_centers (kmeans, false, qc); @@ -725,7 +708,7 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - } + } } else if (lex_match_id (lexer, "PRINT")) { @@ -752,9 +735,9 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "CLUSTERS")) { - if (lex_force_match (lexer, T_LPAREN)) + if (lex_force_match (lexer, T_LPAREN) && + lex_force_int (lexer)) { - lex_force_int (lexer); qc.ngroups = lex_integer (lexer); if (qc.ngroups <= 0) { @@ -762,14 +745,15 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) goto error; } lex_get (lexer); - lex_force_match (lexer, T_RPAREN); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } } else if (lex_match_id (lexer, "CONVERGE")) { - if (lex_force_match (lexer, T_LPAREN)) + if (lex_force_match (lexer, T_LPAREN) && + lex_force_num (lexer)) { - lex_force_num (lexer); qc.epsilon = lex_number (lexer); if (qc.epsilon <= 0) { @@ -777,14 +761,15 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) goto error; } lex_get (lexer); - lex_force_match (lexer, T_RPAREN); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } } else if (lex_match_id (lexer, "MXITER")) { - if (lex_force_match (lexer, T_LPAREN)) + if (lex_force_match (lexer, T_LPAREN) && + lex_force_int (lexer)) { - lex_force_int (lexer); qc.maxiter = lex_integer (lexer); if (qc.maxiter <= 0) { @@ -792,7 +777,8 @@ cmd_quick_cluster (struct lexer *lexer, struct dataset *ds) goto error; } lex_get (lexer); - lex_force_match (lexer, T_RPAREN); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } } else if (lex_match_id (lexer, "NOINITIAL"))