1 /* PSPP - computes sample statistics.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <data/sparse-cases.h>
26 #include <data/settings.h>
27 #include <data/case-tmpfile.h>
28 #include <libpspp/assertion.h>
29 #include <libpspp/range-set.h>
30 #include <libpspp/sparse-array.h>
34 /* A sparse array of cases. */
37 size_t column_cnt; /* Number of values per case. */
38 union value *default_columns; /* Defaults for unwritten cases. */
39 casenumber max_memory_cases; /* Max cases before dumping to disk. */
40 struct sparse_array *memory; /* Backing, if stored in memory. */
41 struct case_tmpfile *disk; /* Backing, if stored on disk. */
42 struct range_set *disk_cases; /* Allocated cases, if on disk. */
45 /* Creates and returns a new sparse array of cases with
46 COLUMN_CNT values per case. */
48 sparse_cases_create (size_t column_cnt)
50 struct sparse_cases *sc = xmalloc (sizeof *sc);
51 sc->column_cnt = column_cnt;
52 sc->default_columns = NULL;
53 sc->max_memory_cases = get_workspace_cases (column_cnt);
54 sc->memory = sparse_array_create (sizeof (struct ccase));
56 sc->disk_cases = NULL;
60 /* Creates and returns a new sparse array of cases that contains
61 the same data as OLD. */
63 sparse_cases_clone (const struct sparse_cases *old)
65 struct sparse_cases *new = xmalloc (sizeof *new);
67 new->column_cnt = old->column_cnt;
69 if (old->default_columns != NULL)
71 = xmemdup (old->default_columns,
72 old->column_cnt * sizeof *old->default_columns);
74 new->default_columns = NULL;
76 new->max_memory_cases = old->max_memory_cases;
78 if (old->memory != NULL)
80 unsigned long int idx;
83 new->memory = sparse_array_create (sizeof (struct ccase));
84 for (c = sparse_array_scan (old->memory, NULL, &idx); c != NULL;
85 c = sparse_array_scan (old->memory, &idx, &idx))
86 case_clone (sparse_array_insert (new->memory, idx), c);
91 if (old->disk != NULL)
93 const struct range_set_node *node;
95 new->disk = case_tmpfile_create (old->column_cnt);
96 new->disk_cases = range_set_create ();
97 for (node = range_set_first (old->disk_cases); node != NULL;
98 node = range_set_next (old->disk_cases, node))
100 unsigned long int start = range_set_node_get_start (node);
101 unsigned long int end = range_set_node_get_end (node);
102 unsigned long int idx;
105 for (idx = start; idx < end; idx++)
106 if (!case_tmpfile_get_case (old->disk, idx, &c)
107 || !case_tmpfile_put_case (new->disk, idx, &c))
109 sparse_cases_destroy (new);
117 new->disk_cases = NULL;
123 /* Destroys sparse array of cases SC. */
125 sparse_cases_destroy (struct sparse_cases *sc)
129 if (sc->memory != NULL)
131 unsigned long int idx;
133 for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
134 c = sparse_array_scan (sc->memory, &idx, &idx))
136 sparse_array_destroy (sc->memory);
138 free (sc->default_columns);
139 case_tmpfile_destroy (sc->disk);
140 range_set_destroy (sc->disk_cases);
145 /* Returns the number of `union value's in each case in SC. */
147 sparse_cases_get_value_cnt (const struct sparse_cases *sc)
149 return sc->column_cnt;
152 /* Dumps the cases in SC, which must currently be stored in
153 memory, to disk. Returns true if successful, false on I/O
156 dump_sparse_cases_to_disk (struct sparse_cases *sc)
158 unsigned long int idx;
161 assert (sc->memory != NULL);
162 assert (sc->disk == NULL);
164 sc->disk = case_tmpfile_create (sc->column_cnt);
165 sc->disk_cases = range_set_create ();
167 for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
168 c = sparse_array_scan (sc->memory, &idx, &idx))
170 if (!case_tmpfile_put_case (sc->disk, idx, c))
172 case_tmpfile_destroy (sc->disk);
174 range_set_destroy (sc->disk_cases);
175 sc->disk_cases = NULL;
178 range_set_insert (sc->disk_cases, idx, 1);
180 sparse_array_destroy (sc->memory);
185 /* Returns true if any data has ever been written to ROW in SC,
188 sparse_cases_contains_row (const struct sparse_cases *sc, casenumber row)
190 return (sc->memory != NULL
191 ? sparse_array_get (sc->memory, row) != NULL
192 : range_set_contains (sc->disk_cases, row));
195 /* Reads columns COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in
196 the given ROW in SC, into the VALUE_CNT values in VALUES.
197 Returns true if successful, false on I/O error. */
199 sparse_cases_read (struct sparse_cases *sc, casenumber row, size_t column,
200 union value values[], size_t value_cnt)
202 assert (value_cnt <= sc->column_cnt);
203 assert (column + value_cnt <= sc->column_cnt);
205 if (sparse_cases_contains_row (sc, row))
208 if (sc->memory != NULL)
209 case_clone (&c, sparse_array_get (sc->memory, row));
210 else if (!case_tmpfile_get_case (sc->disk, row, &c))
212 case_copy_out (&c, column, values, value_cnt);
217 assert (sc->default_columns != NULL);
218 memcpy (values, sc->default_columns + column,
219 sizeof *values * value_cnt);
225 /* Implements sparse_cases_write for an on-disk sparse_cases. */
227 write_disk_case (struct sparse_cases *sc, casenumber row, size_t column,
228 const union value values[], size_t value_cnt)
233 /* Get current case data. */
234 if (column == 0 && value_cnt == sc->column_cnt)
235 case_create (&c, sc->column_cnt);
236 else if (!case_tmpfile_get_case (sc->disk, row, &c))
239 /* Copy in new data. */
240 case_copy_in (&c, column, values, value_cnt);
242 /* Write new case. */
243 ok = case_tmpfile_put_case (sc->disk, row, &c);
245 range_set_insert (sc->disk_cases, row, 1);
250 /* Writes the VALUE_CNT values in VALUES into columns
251 COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in the given ROW
253 Returns true if successful, false on I/O error. */
255 sparse_cases_write (struct sparse_cases *sc, casenumber row, size_t column,
256 const union value values[], size_t value_cnt)
258 if (sc->memory != NULL)
260 struct ccase *c = sparse_array_get (sc->memory, row);
263 if (sparse_array_count (sc->memory) >= sc->max_memory_cases)
265 if (!dump_sparse_cases_to_disk (sc))
267 return write_disk_case (sc, row, column, values, value_cnt);
270 c = sparse_array_insert (sc->memory, row);
271 case_create (c, sc->column_cnt);
272 if (sc->default_columns != NULL
273 && (column != 0 || value_cnt != sc->column_cnt))
274 case_copy_in (c, 0, sc->default_columns, sc->column_cnt);
276 case_copy_in (c, column, values, value_cnt);
280 return write_disk_case (sc, row, column, values, value_cnt);
283 /* Writes the VALUE_CNT values in VALUES to columns
284 START_COLUMN...(START_COLUMN + VALUE_CNT), exclusive, in every
285 row in SC, even those rows that have not yet been written.
286 Returns true if successful, false on I/O error.
288 The runtime of this function is linear in the number of rows
289 in SC that have already been written. */
291 sparse_cases_write_columns (struct sparse_cases *sc, size_t start_column,
292 const union value values[], size_t value_cnt)
294 assert (value_cnt <= sc->column_cnt);
295 assert (start_column + value_cnt <= sc->column_cnt);
298 if (sc->default_columns == NULL)
299 sc->default_columns = xnmalloc (sc->column_cnt,
300 sizeof *sc->default_columns);
301 memcpy (sc->default_columns + start_column, values,
302 value_cnt * sizeof *sc->default_columns);
304 /* Set individual rows. */
305 if (sc->memory != NULL)
308 unsigned long int idx;
310 for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
311 c = sparse_array_scan (sc->memory, &idx, &idx))
312 case_copy_in (c, start_column, values, value_cnt);
316 const struct range_set_node *node;
318 for (node = range_set_first (sc->disk_cases); node != NULL;
319 node = range_set_next (sc->disk_cases, node))
321 unsigned long int start = range_set_node_get_start (node);
322 unsigned long int end = range_set_node_get_end (node);
323 unsigned long int row;
325 for (row = start; row < end; row++)
326 case_tmpfile_put_values (sc->disk, row,
327 start_column, values, value_cnt);
330 if (case_tmpfile_error (sc->disk))