projects
/
pspp
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
392c853
)
FREQUENCIES: Refactor frequency counting with sorting instead of hashing.
author
Ben Pfaff
<blp@cs.stanford.edu>
Tue, 23 Mar 2010 04:41:25 +0000
(21:41 -0700)
committer
Ben Pfaff
<blp@cs.stanford.edu>
Mon, 3 May 2010 04:29:22 +0000
(21:29 -0700)
src/language/stats/frequencies.q
patch
|
blob
|
history
diff --git
a/src/language/stats/frequencies.q
b/src/language/stats/frequencies.q
index 723d26e8341c3f7c732f3ed394067dea0b4204c6..c360713b56689800caaf9c2dadc5aea1b3f3d206 100644
(file)
--- a/
src/language/stats/frequencies.q
+++ b/
src/language/stats/frequencies.q
@@
-23,10
+23,12
@@
#include "data/case.h"
#include "data/casegrouper.h"
#include "data/casereader.h"
#include "data/case.h"
#include "data/casegrouper.h"
#include "data/casereader.h"
+#include "data/casewriter.h"
#include "data/dictionary.h"
#include "data/format.h"
#include "data/procedure.h"
#include "data/settings.h"
#include "data/dictionary.h"
#include "data/format.h"
#include "data/procedure.h"
#include "data/settings.h"
+#include "data/subcase.h"
#include "data/value-labels.h"
#include "data/variable.h"
#include "language/command.h"
#include "data/value-labels.h"
#include "data/variable.h"
#include "language/command.h"
@@
-43,6
+45,7
@@
#include "libpspp/str.h"
#include "math/histogram.h"
#include "math/moments.h"
#include "libpspp/str.h"
#include "math/histogram.h"
#include "math/moments.h"
+#include "math/sort.h"
#include "output/chart-item.h"
#include "output/charts/piechart.h"
#include "output/charts/plot-hist.h"
#include "output/chart-item.h"
#include "output/charts/piechart.h"
#include "output/charts/plot-hist.h"
@@
-158,9
+161,10
@@
struct frq_chart
/* Entire frequency table. */
struct freq_tab
{
/* Entire frequency table. */
struct freq_tab
{
- struct hmap data; /* Hash table for accumulating counts. */
+ struct casewriter *sorter;
+
struct freq *valid; /* Valid freqs. */
struct freq *valid; /* Valid freqs. */
- int n_valid; /* Number of
total
freqs. */
+ int n_valid; /* Number of
valid
freqs. */
const struct dictionary *dict; /* Source of entries in the table. */
struct freq *missing; /* Missing freqs. */
const struct dictionary *dict; /* Source of entries in the table. */
struct freq *missing; /* Missing freqs. */
@@
-435,18
+439,27
@@
calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds)
for (i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
for (i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
- const union value *value = case_data (c, vf->var);
- size_t hash = value_hash (value, vf->width, 0);
- struct freq *f;
+ struct casewriter *writer = vf->tab.sorter;
+ struct ccase *f;
- f =
freq_hmap_search (&vf->tab.data, value, vf->width, hash
);
- if (f == NULL)
-
f = freq_hmap_insert (&vf->tab.data, value, vf->width, hash)
;
+ f =
case_create (casewriter_get_proto (writer)
);
+ value_copy (case_data_rw_idx (f, 0), case_data (c, vf->var), vf->width);
+
case_data_rw_idx (f, 1)->f = weight
;
-
f->count += weight
;
+
casewriter_write (writer, f)
;
}
}
}
}
+static struct ccase *
+combine_freq_cases (struct ccase *a, struct ccase *b, void *aux UNUSED)
+{
+ a = case_unshare (a);
+ case_data_rw_idx (a, 1)->f += case_data_idx (b, 1)->f;
+ case_unref (b);
+
+ return a;
+}
+
/* Prepares each variable that is the target of FREQUENCIES by setting
up its hash table. */
static void
/* Prepares each variable that is the target of FREQUENCIES by setting
up its hash table. */
static void
@@
-463,7
+476,22
@@
precalc (struct frq_proc *frq, struct casereader *input, struct dataset *ds)
}
for (i = 0; i < frq->n_vars; i++)
}
for (i = 0; i < frq->n_vars; i++)
- hmap_init (&frq->vars[i].tab.data);
+ {
+ int width = var_get_width (frq->vars[i].var);
+ struct caseproto *proto;
+ struct subcase ordering;
+
+ proto = caseproto_create ();
+ proto = caseproto_add_width (proto, width);
+ proto = caseproto_add_width (proto, 0);
+
+ subcase_init (&ordering, 0, width, SC_ASCEND);
+ frq->vars[i].tab.sorter = sort_distinct_create_writer (
+ &ordering, proto, combine_freq_cases, NULL, NULL);
+
+ caseproto_unref (proto);
+ subcase_destroy (&ordering);
+ }
}
/* Finishes up with the variables after frequencies have been
}
/* Finishes up with the variables after frequencies have been
@@
-560,13
+588,23
@@
postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf)
{
struct freq_tab *ft = &vf->tab;
struct freq_compare_aux aux;
{
struct freq_tab *ft = &vf->tab;
struct freq_compare_aux aux;
+ struct casereader *reader;
+ struct ccase *c;
size_t count;
struct freq *freqs, *f;
size_t i;
/* Extract data from hash table. */
size_t count;
struct freq *freqs, *f;
size_t i;
/* Extract data from hash table. */
- count = hmap_count (&ft->data);
- freqs = freq_hmap_extract (&ft->data);
+ reader = casewriter_make_reader (ft->sorter);
+ freqs = xnmalloc (casereader_count_cases (reader), sizeof *freqs);
+ for (count = 0; (c = casereader_read (reader)) != NULL; count++)
+ {
+ struct freq *f = &freqs[count];
+ value_clone (&f->value, case_data_idx (c, 0), vf->width);
+ f->count = case_num_idx (c, 1);
+ case_unref (c);
+ }
+ casereader_destroy (reader);
/* Put data into ft. */
ft->valid = freqs;
/* Put data into ft. */
ft->valid = freqs;
@@
-604,8
+642,14
@@
postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf)
static void
cleanup_freq_tab (struct var_freqs *vf)
{
static void
cleanup_freq_tab (struct var_freqs *vf)
{
+ if (value_needs_init (vf->width))
+ {
+ int i;
+
+ for (i = 0; i < vf->tab.n_valid + vf->tab.n_missing; i++)
+ value_destroy (&vf->tab.valid[i].value, vf->width);
+ }
free (vf->tab.valid);
free (vf->tab.valid);
- freq_hmap_destroy (&vf->tab.data, vf->width);
}
/* Parses the VARIABLES subcommand. */
}
/* Parses the VARIABLES subcommand. */