X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdatasheet.c;h=1659f6f71eeec267ff7d87086d1a9c41c7afb287;hb=06b59daf76fdd55f29302f02ea66e06b35043e5b;hp=4abc526f0cb2a720cb8e3023eb7d732effdb5da4;hpb=cb586666724d5fcbdb658ce471b85484f0a7babe;p=pspp diff --git a/src/data/datasheet.c b/src/data/datasheet.c index 4abc526f0c..1659f6f71e 100644 --- a/src/data/datasheet.c +++ b/src/data/datasheet.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,28 +16,28 @@ #include -#include +#include "data/datasheet.h" #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "md4.h" -#include "xalloc.h" +#include "data/casereader-provider.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/lazy-casereader.h" +#include "data/settings.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/misc.h" +#include "libpspp/range-map.h" +#include "libpspp/range-set.h" +#include "libpspp/sparse-xarray.h" +#include "libpspp/taint.h" +#include "libpspp/tower.h" + +#include "gl/minmax.h" +#include "gl/md4.h" +#include "gl/xalloc.h" struct column; @@ -61,13 +61,13 @@ static unsigned long axis_get_size (const struct axis *); static void axis_insert (struct axis *, unsigned long int log_start, unsigned long int phy_start, - unsigned long int cnt); + unsigned long int n); static void axis_remove (struct axis *, - unsigned long int start, unsigned long int cnt); + unsigned long int start, unsigned long int n); static void axis_move (struct axis *, unsigned long int old_start, unsigned long int new_start, - unsigned long int cnt); + unsigned long int n); static struct source *source_create_empty (size_t n_bytes); static struct source *source_create_casereader (struct casereader *); @@ -80,9 +80,10 @@ static int source_allocate_column (struct source *, int width); static void source_release_column (struct source *, int ofs, int width); static bool source_in_use (const struct source *); -static bool source_read (const struct column *, casenumber row, union value *); +static bool source_read (const struct column *, casenumber row, union value *, + size_t n); static bool source_write (const struct column *, casenumber row, - const union value *); + const union value *, size_t n); static bool source_write_column (struct column *, const union value *); static bool source_has_backing (const struct source *); @@ -174,7 +175,7 @@ value_to_data (const union value *value_, int width) if (width == 0) return &value->f; else - return value_str_rw (value, width); + return value->s; } /* Returns the number of bytes needed to store all the values in @@ -360,6 +361,8 @@ datasheet_insert_column (struct datasheet *ds, { struct column *col; + assert (before <= ds->n_columns); + ds->columns = xnrealloc (ds->columns, ds->n_columns + 1, sizeof *ds->columns); insert_element (ds->columns, ds->n_columns, sizeof *ds->columns, before); @@ -382,6 +385,8 @@ datasheet_insert_column (struct datasheet *ds, void datasheet_delete_columns (struct datasheet *ds, size_t start, size_t n) { + assert (start + n <= ds->n_columns); + if (n > 0) { size_t i; @@ -411,6 +416,9 @@ datasheet_move_columns (struct datasheet *ds, size_t old_start, size_t new_start, size_t n) { + assert (old_start + n <= ds->n_columns); + assert (new_start + n <= ds->n_columns); + move_range (ds->columns, ds->n_columns, sizeof *ds->columns, old_start, new_start, n); @@ -424,8 +432,8 @@ struct resize_datasheet_value_aux size_t src_ofs; int src_width; - void (*resize_cb) (const union value *, union value *, void *aux); - void *resize_cb_aux; + void (*resize_cb) (const union value *, union value *, const void *aux); + const void *resize_cb_aux; union value dst_value; size_t dst_ofs; @@ -453,8 +461,8 @@ resize_datasheet_value (const void *src, void *dst, void *aux_) bool datasheet_resize_column (struct datasheet *ds, size_t column, int new_width, void (*resize_cb) (const union value *, - union value *, void *aux), - void *resize_cb_aux) + union value *, const void *aux), + const void *resize_cb_aux) { struct column old_col; struct column *col; @@ -499,18 +507,22 @@ datasheet_resize_column (struct datasheet *ds, size_t column, int new_width, for (lrow = 0; lrow < n_rows; lrow++) { unsigned long int prow = axis_map (ds->rows, lrow); - if (!source_read (&old_col, prow, &src)) + if (!source_read (&old_col, prow, &src, 1)) { /* FIXME: back out col changes. */ - return false; + break; } resize_cb (&src, &dst, resize_cb_aux); - if (!source_write (col, prow, &dst)) + if (!source_write (col, prow, &dst, 1)) { /* FIXME: back out col changes. */ - return false; + break; } } + value_destroy (&src, old_width); + value_destroy (&dst, new_width); + if (lrow < n_rows) + return false; release_source (ds, old_col.source); } @@ -592,13 +604,13 @@ datasheet_get_value (const struct datasheet *ds, casenumber row, successful, false on I/O error. On failure, ROW might be partially modified or corrupted. */ bool -datasheet_put_value (struct datasheet *ds UNUSED, casenumber row UNUSED, - size_t column UNUSED, const union value *value UNUSED) +datasheet_put_value (struct datasheet *ds, casenumber row, + size_t column, const union value *value) { return rw_case (ds, OP_WRITE, row, column, 1, (union value *) value); } -/* Inserts the CNT cases at C into datasheet DS just before row +/* Inserts the N cases at C into datasheet DS just before row BEFORE. Returns true if successful, false on I/O error. On failure, datasheet DS is not modified. @@ -607,73 +619,73 @@ datasheet_put_value (struct datasheet *ds UNUSED, casenumber row UNUSED, bool datasheet_insert_rows (struct datasheet *ds, casenumber before, struct ccase *c[], - casenumber cnt) + casenumber n) { casenumber added = 0; - while (cnt > 0) + while (n > 0) { unsigned long first_phy; - unsigned long phy_cnt; + unsigned long n_phys; unsigned long i; /* Allocate physical rows from the pool of available rows. */ - if (!axis_allocate (ds->rows, cnt, &first_phy, &phy_cnt)) + if (!axis_allocate (ds->rows, n, &first_phy, &n_phys)) { /* No rows were available. Extend the row axis to make some new ones available. */ - phy_cnt = cnt; - first_phy = axis_extend (ds->rows, cnt); + n_phys = n; + first_phy = axis_extend (ds->rows, n); } /* Insert the new rows into the row mapping. */ - axis_insert (ds->rows, before, first_phy, phy_cnt); + axis_insert (ds->rows, before, first_phy, n_phys); /* Initialize the new rows. */ - for (i = 0; i < phy_cnt; i++) + for (i = 0; i < n_phys; i++) if (!datasheet_put_row (ds, before + i, c[i])) { - while (++i < cnt) + while (++i < n) case_unref (c[i]); - datasheet_delete_rows (ds, before - added, phy_cnt + added); + datasheet_delete_rows (ds, before - added, n_phys + added); return false; } /* Advance. */ - c += phy_cnt; - cnt -= phy_cnt; - before += phy_cnt; - added += phy_cnt; + c += n_phys; + n -= n_phys; + before += n_phys; + added += n_phys; } return true; } -/* Deletes the CNT rows in DS starting from row FIRST. */ +/* Deletes the N rows in DS starting from row FIRST. */ void datasheet_delete_rows (struct datasheet *ds, - casenumber first, casenumber cnt) + casenumber first, casenumber n) { size_t lrow; /* Free up rows for reuse. FIXME: optimize. */ - for (lrow = first; lrow < first + cnt; lrow++) + for (lrow = first; lrow < first + n; lrow++) axis_make_available (ds->rows, axis_map (ds->rows, lrow), 1); /* Remove rows from logical-to-physical mapping. */ - axis_remove (ds->rows, first, cnt); + axis_remove (ds->rows, first, n); } -/* Moves the CNT rows in DS starting at position OLD_START so +/* Moves the N rows in DS starting at position OLD_START so that they then start at position NEW_START. Equivalent to deleting the given rows, then inserting them at what becomes position NEW_START after the deletion. */ void datasheet_move_rows (struct datasheet *ds, size_t old_start, size_t new_start, - size_t cnt) + size_t n) { - axis_move (ds->rows, old_start, new_start, cnt); + axis_move (ds->rows, old_start, new_start, n); } static const struct casereader_random_class datasheet_reader_class; @@ -722,10 +734,10 @@ datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_) /* "advance" function for the datasheet random casereader. */ static void datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_, - casenumber case_cnt) + casenumber n_cases) { struct datasheet *ds = ds_; - datasheet_delete_rows (ds, 0, case_cnt); + datasheet_delete_rows (ds, 0, n_cases); } /* Random casereader class for a datasheet. */ @@ -798,6 +810,7 @@ rw_case (struct datasheet *ds, enum rw_op op, casenumber lrow, size_t start_column, size_t n_columns, union value data[]) { + struct column *columns = &ds->columns[start_column]; casenumber prow; size_t i; @@ -806,24 +819,34 @@ rw_case (struct datasheet *ds, enum rw_op op, assert (start_column + n_columns <= datasheet_get_n_columns (ds)); prow = axis_map (ds->rows, lrow); - for (i = 0; i < n_columns; i++) + for (i = 0; i < n_columns;) { - struct column *c = &ds->columns[start_column + i]; - if (c->width >= 0) + struct source *source = columns[i].source; + size_t j; + bool ok; + + if (columns[i].width < 0) { - bool ok; + i++; + continue; + } - if (op == OP_READ) - ok = source_read (c, prow, &data[i]); - else - ok = source_write (c, prow, &data[i]); + for (j = i + 1; j < n_columns; j++) + if (columns[j].width < 0 || columns[j].source != source) + break; - if (!ok) - { - taint_set_taint (ds->taint); - return false; - } + if (op == OP_READ) + ok = source_read (&columns[i], prow, &data[i], j - i); + else + ok = source_write (&columns[i], prow, &data[i], j - i); + + if (!ok) + { + taint_set_taint (ds->taint); + return false; } + + i = j; } return true; } @@ -919,8 +942,7 @@ axis_hash (const struct axis *axis, struct md4_ctx *ctx) md4_process_bytes (&size, sizeof size, ctx); } - for (rsn = range_set_first (axis->available); rsn != NULL; - rsn = range_set_next (axis->available, rsn)) + RANGE_SET_FOR_EACH (rsn, axis->available) { unsigned long int start = range_set_node_get_start (rsn); unsigned long int end = range_set_node_get_end (rsn); @@ -971,7 +993,7 @@ static void axis_make_available (struct axis *axis, unsigned long int start, unsigned long int width) { - range_set_insert (axis->available, start, width); + range_set_set1 (axis->available, start, width); } /* Extends the total physical length of AXIS by WIDTH and returns @@ -1006,30 +1028,30 @@ axis_get_size (const struct axis *axis) return tower_height (&axis->log_to_phy); } -/* Inserts the CNT contiguous physical ordinates starting at +/* Inserts the N contiguous physical ordinates starting at PHY_START into AXIS's logical-to-physical mapping, starting at logical position LOG_START. */ static void axis_insert (struct axis *axis, unsigned long int log_start, unsigned long int phy_start, - unsigned long int cnt) + unsigned long int n) { struct tower_node *before = split_axis (axis, log_start); struct tower_node *new = make_axis_group (phy_start); - tower_insert (&axis->log_to_phy, cnt, new, before); + tower_insert (&axis->log_to_phy, n, new, before); merge_axis_nodes (axis, new, NULL); check_axis_merged (axis); } -/* Removes CNT ordinates from AXIS's logical-to-physical mapping +/* Removes N ordinates from AXIS's logical-to-physical mapping starting at logical position START. */ static void axis_remove (struct axis *axis, - unsigned long int start, unsigned long int cnt) + unsigned long int start, unsigned long int n) { - if (cnt > 0) + if (n > 0) { - struct tower_node *last = split_axis (axis, start + cnt); + struct tower_node *last = split_axis (axis, start + n); struct tower_node *cur, *next; for (cur = split_axis (axis, start); cur != last; cur = next) { @@ -1041,24 +1063,24 @@ axis_remove (struct axis *axis, } } -/* Moves the CNT ordinates in AXIS's logical-to-mapping starting +/* Moves the N ordinates in AXIS's logical-to-mapping starting at logical position OLD_START so that they then start at position NEW_START. */ static void axis_move (struct axis *axis, unsigned long int old_start, unsigned long int new_start, - unsigned long int cnt) + unsigned long int n) { - if (cnt > 0 && old_start != new_start) + if (n > 0 && old_start != new_start) { struct tower_node *old_first, *old_last, *new_first; struct tower_node *merge1, *merge2; struct tower tmp_array; - /* Move ordinates OLD_START...(OLD_START + CNT) into new, + /* Move ordinates OLD_START...(OLD_START + N) into new, separate TMP_ARRAY. */ old_first = split_axis (axis, old_start); - old_last = split_axis (axis, old_start + cnt); + old_last = split_axis (axis, old_start + n); tower_init (&tmp_array); tower_splice (&tmp_array, NULL, &axis->log_to_phy, old_first, old_last); @@ -1221,7 +1243,7 @@ source_create_empty (size_t n_bytes) size_t row_size = n_bytes + 4 * sizeof (void *); size_t max_memory_rows = settings_get_workspace () / row_size; source->avail = range_set_create (); - range_set_insert (source->avail, 0, n_bytes); + range_set_set1 (source->avail, 0, n_bytes); source->data = sparse_xarray_create (n_bytes, MAX (max_memory_rows, 4)); source->backing = NULL; source->backing_rows = 0; @@ -1240,7 +1262,7 @@ source_create_casereader (struct casereader *reader) size_t n_columns; size_t i; - range_set_delete (source->avail, 0, n_bytes); + range_set_set0 (source->avail, 0, n_bytes); source->backing = reader; source->backing_rows = casereader_count_cases (reader); @@ -1294,7 +1316,7 @@ static void source_release_column (struct source *source, int ofs, int width) { assert (width >= 0); - range_set_insert (source->avail, ofs, width_to_n_bytes (width)); + range_set_set1 (source->avail, ofs, width_to_n_bytes (width)); if (source->backing != NULL) source->n_used--; } @@ -1329,30 +1351,39 @@ source_get_backing_n_rows (const struct source *source) return source->backing_rows; } -/* Reads the given COLUMN from SOURCE in the given ROW, into - VALUE. Returns true if successful, false on I/O error. +/* Reads the N COLUMNS in the given ROW, into the N VALUES. Returns true if + successful, false on I/O error. + + All of the COLUMNS must have the same source. - The caller must have initialized VALUE with the proper - width. */ + The caller must have initialized VALUES with the proper width. */ static bool -source_read (const struct column *column, casenumber row, union value *value) +source_read (const struct column columns[], casenumber row, + union value values[], size_t n) { - struct source *source = column->source; + struct source *source = columns[0].source; + size_t i; - assert (column->width >= 0); if (source->backing == NULL || sparse_xarray_contains_row (source->data, row)) - return sparse_xarray_read (source->data, row, column->byte_ofs, - width_to_n_bytes (column->width), - value_to_data (value, column->width)); + { + bool ok = true; + + for (i = 0; i < n && ok; i++) + ok = sparse_xarray_read (source->data, row, columns[i].byte_ofs, + width_to_n_bytes (columns[i].width), + value_to_data (&values[i], columns[i].width)); + return ok; + } else { struct ccase *c = casereader_peek (source->backing, row); bool ok = c != NULL; if (ok) { - value_copy (value, case_data_idx (c, column->value_ofs), - column->width); + for (i = 0; i < n; i++) + value_copy (&values[i], case_data_idx (c, columns[i].value_ofs), + columns[i].width); case_unref (c); } return ok; @@ -1384,18 +1415,20 @@ copy_case_into_source (struct source *source, struct ccase *c, casenumber row) return true; } -/* Writes VALUE to SOURCE in the given ROW and COLUMN. Returns - true if successful, false on I/O error. On error, the row's - data may be completely or partially corrupted, both inside and - outside the region to be written. */ +/* Writes the N VALUES to their source in the given ROW and COLUMNS. Returns + true if successful, false on I/O error. On error, the row's data may be + completely or partially corrupted, both inside and outside the region to be + written. + + All of the COLUMNS must have the same source. */ static bool -source_write (const struct column *column, casenumber row, - const union value *value) +source_write (const struct column columns[], casenumber row, + const union value values[], size_t n) { - struct source *source = column->source; + struct source *source = columns[0].source; struct casereader *backing = source->backing; + size_t i; - assert (column->width >= 0); if (backing != NULL && !sparse_xarray_contains_row (source->data, row) && row < source->backing_rows) @@ -1413,21 +1446,14 @@ source_write (const struct column *column, casenumber row, return false; } - return sparse_xarray_write (source->data, row, column->byte_ofs, - width_to_n_bytes (column->width), - value_to_data (value, column->width)); + for (i = 0; i < n; i++) + if (!sparse_xarray_write (source->data, row, columns[i].byte_ofs, + width_to_n_bytes (columns[i].width), + value_to_data (&values[i], columns[i].width))) + return false; + return true; } -/* Within SOURCE, which must not have a backing casereader, - writes the VALUE_CNT values in VALUES_CNT to the VALUE_CNT - columns starting from START_COLUMN, in every row, even in rows - not yet otherwise initialized. Returns true if successful, - false if an I/O error occurs. - - We don't support backing != NULL because (1) it's harder and - (2) this function is only called by - datasheet_insert_column, which doesn't reuse columns from - sources that are backed by casereaders. */ static bool source_write_column (struct column *column, const union value *value) {