frequencies: fixed bug #48128 in percentiles
[pspp] / src / language / stats / frequencies.c
index cd131f514b29afc3c2cdfdcc16bda2f5a882b2f7..675a02319aaebf1ca6f8d3b91cb323106eb1e929 100644 (file)
@@ -214,7 +214,7 @@ struct frq_proc
     int n_percentiles, n_show_percentiles;
 
     /* Frequency table display. */
-    int max_categories;         /* Maximum categories to show. */
+    long int max_categories;         /* Maximum categories to show. */
     int sort;                   /* FRQ_AVALUE or FRQ_DVALUE
                                    or FRQ_AFREQ or FRQ_DFREQ. */
 
@@ -580,7 +580,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
 {
   int i;
   struct frq_proc frq;
-  const struct variable **vars;
+  const struct variable **vars = NULL;
 
   bool sbc_barchart = false;
   bool sbc_piechart = false;
@@ -614,7 +614,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
 
   frq.n_stats = 4;
 
-  frq.max_categories = INT_MAX;
+  frq.max_categories = LONG_MAX;
 
   frq.percentiles = NULL;
   frq.n_percentiles = 0;
@@ -803,6 +803,18 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
                {
                  frq.max_categories = 0;
                }
+              else if (lex_match_id (lexer, "LIMIT"))
+                {
+                  if (!lex_force_match (lexer, T_LPAREN)
+                      || !lex_force_int (lexer))
+                    goto error;
+
+                  frq.max_categories = lex_integer (lexer);
+                  lex_get (lexer);
+
+                  if (!lex_force_match (lexer, T_RPAREN))
+                    goto error;
+                }
              else if (lex_match_id (lexer, "AVALUE"))
                {
                  frq.sort = FRQ_AVALUE;
@@ -902,7 +914,8 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
                              lex_error (lexer, _("Histogram frequency must be greater than zero."));
                            }
                          lex_get (lexer);
-                         lex_force_match (lexer, T_RPAREN);
+                         if (! lex_force_match (lexer, T_RPAREN))
+                           goto error;
                        }
                     }
                }
@@ -919,29 +932,34 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
                              lex_error (lexer, _("Histogram percentage must be greater than zero."));
                            }
                          lex_get (lexer);
-                         lex_force_match (lexer, T_RPAREN);
+                         if (! lex_force_match (lexer, T_RPAREN))
+                           goto error;
                        }
                     }
                }
              else if (lex_match_id (lexer, "MINIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      hi_min = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else if (lex_match_id (lexer, "MAXIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      hi_max = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else
                {
@@ -958,23 +976,27 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
            {
              if (lex_match_id (lexer, "MINIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      pie_min = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else if (lex_match_id (lexer, "MAXIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      pie_max = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else if (lex_match_id (lexer, "MISSING"))
                {
@@ -1000,23 +1022,27 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
            {
              if (lex_match_id (lexer, "MINIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      bar_min = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else if (lex_match_id (lexer, "MAXIMUM"))
                {
-                 lex_force_match (lexer, T_LPAREN);
+                 if (! lex_force_match (lexer, T_LPAREN))
+                   goto error;
                  if (lex_force_num (lexer))
                    {
                      bar_max = lex_number (lexer);
                      lex_get (lexer);
                    }
-                 lex_force_match (lexer, T_RPAREN);
+                 if (! lex_force_match (lexer, T_RPAREN))
+                   goto error;
                }
              else if (lex_match_id (lexer, "FREQ"))
                {
@@ -1027,7 +1053,8 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
                          lex_number (lexer);
                          lex_get (lexer);
                        }
-                     lex_force_match (lexer, T_RPAREN);
+                     if (! lex_force_match (lexer, T_RPAREN))
+                       goto error;
                    }
                  bar_freq = true;
                }
@@ -1040,7 +1067,8 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
                          lex_number (lexer);
                          lex_get (lexer);
                        }
-                     lex_force_match (lexer, T_RPAREN);
+                     if (! lex_force_match (lexer, T_RPAREN))
+                       goto error;
                    }
                  bar_freq = false;
                }
@@ -1096,6 +1124,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
        frq.percentiles[frq.n_percentiles].show = true;
 
        frq.n_percentiles++;
+        frq.n_show_percentiles++;
     }
 
 
@@ -1184,19 +1213,20 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
     frq.n_show_percentiles = 0;
     for (i = o = 0; i < frq.n_percentiles; ++i)
       {
-       frq.percentiles[o].p = frq.percentiles[i].p;
-
-       if (frq.percentiles[i].show)
-         frq.percentiles[o].show = true;
-
-       if (frq.percentiles[i].p != previous_p)
-         {
-           if (frq.percentiles[i].show)
-             frq.n_show_percentiles++;
-
-           o++;
-         }
-
+        if (frq.percentiles[i].p != previous_p)
+          {
+            frq.percentiles[o].p = frq.percentiles[i].p;
+            frq.percentiles[o].show = frq.percentiles[i].show;
+            if (frq.percentiles[i].show)
+              frq.n_show_percentiles++;
+            o++;
+          }
+        else if (frq.percentiles[i].show &&
+                 !frq.percentiles[o].show)
+          {
+            frq.percentiles[o].show = true;
+            frq.n_show_percentiles++;
+          }
        previous_p = frq.percentiles[i].p;
       }
 
@@ -1213,19 +1243,37 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
       {
        struct ccase *c;
        precalc (&frq, group, ds);
+
        for (; (c = casereader_read (group)) != NULL; case_unref (c))
          calc (&frq, c, ds);
        postcalc (&frq, ds);
+       casereader_destroy (group);
       }
     ok = casegrouper_destroy (grouper);
     ok = proc_commit (ds) && ok;
   }
 
 
+  free (vars);
+  free (frq.vars);
+  free (frq.bar);
+  free (frq.pie);
+  free (frq.hist);
+  free (frq.percentiles);
+  pool_destroy (frq.pool);
+
   return CMD_SUCCESS;
 
  error:
 
+  free (vars);
+  free (frq.vars);
+  free (frq.bar);
+  free (frq.pie);
+  free (frq.hist);
+  free (frq.percentiles);
+  pool_destroy (frq.pool);
+
   return CMD_FAILURE;
 }
 
@@ -1294,6 +1342,8 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
         }
     }
 
+  if (valid_freq <= 0)
+    return NULL;
 
   iqr = calculate_iqr (frq);
 
@@ -1447,7 +1497,8 @@ do_barchart(const struct frq_chart *bar, const struct variable **var,
   struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices);
 
   chart_item_submit (barchart_create (var, 1,
-                                     (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"),
+                                     (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"), 
+                                     (bar->y_scale == FRQ_PERCENT),
                                      slices, n_slices));
   free (slices);
 }