From: Jim Meyering Date: Mon, 7 Feb 2011 15:01:24 +0000 (+0100) Subject: di-set, ino-map: new modules, from coreutils X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6f0680eb29a1737d704a1df26aafc00490cd34d8;p=pspp di-set, ino-map: new modules, from coreutils * lib/di-set.c: New file. * lib/di-set.h: Likewise. * lib/ino-map.c: Likewise. * lib/ino-map.h: Likewise. * modules/di-set: Likewise. * modules/di-set-tests: Likewise. * modules/ino-map: Likewise. * modules/ino-map-tests: Likewise. * tests/test-di-set.c: Likewise. * tests/test-ino-map.c: Likewise. --- diff --git a/ChangeLog b/ChangeLog index fd4c65c6b6..0acf480797 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2011-02-07 Jim Meyering + + di-set, ino-map: new modules, from coreutils + * lib/di-set.c: New file. + * lib/di-set.h: Likewise. + * lib/ino-map.c: Likewise. + * lib/ino-map.h: Likewise. + * modules/di-set: Likewise. + * modules/di-set-tests: Likewise. + * modules/ino-map: Likewise. + * modules/ino-map-tests: Likewise. + * tests/test-di-set.c: Likewise. + * tests/test-ino-map.c: Likewise. + 2011-02-06 Paul Eggert getloadavg: merge minor changes from Emacs diff --git a/lib/di-set.c b/lib/di-set.c new file mode 100644 index 0000000000..5e839a311e --- /dev/null +++ b/lib/di-set.c @@ -0,0 +1,259 @@ +/* Set operations for device-inode pairs stored in a space-efficient manner. + + Copyright 2009-2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* written by Paul Eggert and Jim Meyering */ + +#include +#include "di-set.h" + +#include "hash.h" +#include "ino-map.h" + +#include +#include + +/* The hash package hashes "void *", but this package wants to hash + integers. Use integers that are as large as possible, but no + larger than void *, so that they can be cast to void * and back + without losing information. */ +typedef size_t hashint; +#define HASHINT_MAX ((hashint) -1) + +/* Integers represent inode numbers. Integers in the range + 1..(LARGE_INO_MIN-1) represent inode numbers directly. (The hash + package does not work with null pointers, so inode 0 cannot be used + as a key.) To find the representations of other inode numbers, map + them through INO_MAP. */ +#define LARGE_INO_MIN (HASHINT_MAX / 2) + +/* Set operations for device-inode pairs stored in a space-efficient + manner. Use a two-level hash table. The top level hashes by + device number, as there are typically a small number of devices. + The lower level hashes by mapped inode numbers. In the typical + case where the inode number is positive and small, the inode number + maps to itself, masquerading as a void * value; otherwise, its + value is the result of hashing the inode value through INO_MAP. */ + +/* A pair that maps a device number to a set of inode numbers. */ +struct di_ent +{ + dev_t dev; + struct hash_table *ino_set; +}; + +/* A two-level hash table that manages and indexes these pairs. */ +struct di_set +{ + /* Map device numbers to sets of inode number representatives. */ + struct hash_table *dev_map; + + /* If nonnull, map large inode numbers to their small + representatives. If null, there are no large inode numbers in + this set. */ + struct ino_map *ino_map; + + /* Cache of the most recently allocated and otherwise-unused storage + for probing this table. */ + struct di_ent *probe; +}; + +/* Hash a device-inode-set entry. */ +static size_t +di_ent_hash (void const *x, size_t table_size) +{ + struct di_ent const *p = x; + dev_t dev = p->dev; + + /* When DEV is wider than size_t, exclusive-OR the words of DEV into H. + This avoids loss of info, without applying % to the wider type, + which could be quite slow on some systems. */ + size_t h = dev; + unsigned int i; + unsigned int n_words = sizeof dev / sizeof h + (sizeof dev % sizeof h != 0); + for (i = 1; i < n_words; i++) + h ^= dev >> CHAR_BIT * sizeof h * i; + + return h % table_size; +} + +/* Return true if two device-inode-set entries are the same. */ +static bool +di_ent_compare (void const *x, void const *y) +{ + struct di_ent const *a = x; + struct di_ent const *b = y; + return a->dev == b->dev; +} + +/* Free a device-inode-set entry. */ +static void +di_ent_free (void *v) +{ + struct di_ent *a = v; + hash_free (a->ino_set); + free (a); +} + +/* Create a set of device-inode pairs. Return NULL on allocation failure. */ +struct di_set * +di_set_alloc (void) +{ + struct di_set *dis = malloc (sizeof *dis); + if (dis) + { + enum { INITIAL_DEV_MAP_SIZE = 11 }; + dis->dev_map = hash_initialize (INITIAL_DEV_MAP_SIZE, NULL, + di_ent_hash, di_ent_compare, + di_ent_free); + if (! dis->dev_map) + { + free (dis); + return NULL; + } + dis->ino_map = NULL; + dis->probe = NULL; + } + + return dis; +} + +/* Free a set of device-inode pairs. */ +void +di_set_free (struct di_set *dis) +{ + hash_free (dis->dev_map); + free (dis->ino_map); + free (dis->probe); + free (dis); +} + +/* Hash an encoded inode number I. */ +static size_t +di_ino_hash (void const *i, size_t table_size) +{ + return (hashint) i % table_size; +} + +/* Using the DIS table, map a device to a hash table that represents + a set of inode numbers. Return NULL on error. */ +static struct hash_table * +map_device (struct di_set *dis, dev_t dev) +{ + /* Find space for the probe, reusing the cache if available. */ + struct di_ent *ent; + struct di_ent *probe = dis->probe; + if (probe) + { + /* If repeating a recent query, return the cached result. */ + if (probe->dev == dev) + return probe->ino_set; + } + else + { + dis->probe = probe = malloc (sizeof *probe); + if (! probe) + return NULL; + } + + /* Probe for the device. */ + probe->dev = dev; + ent = hash_insert (dis->dev_map, probe); + if (! ent) + return NULL; + + if (ent != probe) + { + /* Use the existing entry. */ + probe->ino_set = ent->ino_set; + } + else + { + enum { INITIAL_INO_SET_SIZE = 1021 }; + + /* Prepare to allocate a new probe next time; this one is in use. */ + dis->probe = NULL; + + /* DEV is new; allocate an inode set for it. */ + probe->ino_set = hash_initialize (INITIAL_INO_SET_SIZE, NULL, + di_ino_hash, NULL, NULL); + } + + return probe->ino_set; +} + +/* Using the DIS table, map an inode number to a mapped value. + Return INO_MAP_INSERT_FAILURE on error. */ +static hashint +map_inode_number (struct di_set *dis, ino_t ino) +{ + if (0 < ino && ino < LARGE_INO_MIN) + return ino; + + if (! dis->ino_map) + { + dis->ino_map = ino_map_alloc (LARGE_INO_MIN); + if (! dis->ino_map) + return INO_MAP_INSERT_FAILURE; + } + + return ino_map_insert (dis->ino_map, ino); +} + +/* Attempt to insert the DEV,INO pair into the set DIS. + If it matches a pair already in DIS, keep that pair and return 0. + Otherwise, if insertion is successful, return 1. + Upon any failure return -1. */ +int +di_set_insert (struct di_set *dis, dev_t dev, ino_t ino) +{ + hashint i; + + /* Map the device number to a set of inodes. */ + struct hash_table *ino_set = map_device (dis, dev); + if (! ino_set) + return -1; + + /* Map the inode number to a small representative I. */ + i = map_inode_number (dis, ino); + if (i == INO_MAP_INSERT_FAILURE) + return -1; + + /* Put I into the inode set. */ + return hash_insert0 (ino_set, (void *) i, NULL); +} + +/* Look up the DEV,INO pair in the set DIS. + If found, return 1; if not found, return 0. + Upon any failure return -1. */ +int +di_set_lookup (struct di_set *dis, dev_t dev, ino_t ino) +{ + hashint i; + + /* Map the device number to a set of inodes. */ + struct hash_table *ino_set = map_device (dis, dev); + if (! ino_set) + return -1; + + /* Map the inode number to a small representative I. */ + i = map_inode_number (dis, ino); + if (i == INO_MAP_INSERT_FAILURE) + return -1; + + /* Perform the look-up. */ + return !!hash_lookup (ino_set, (void const *) i); +} diff --git a/lib/di-set.h b/lib/di-set.h new file mode 100644 index 0000000000..f05e8760c3 --- /dev/null +++ b/lib/di-set.h @@ -0,0 +1,14 @@ +#include + +#undef _ATTRIBUTE_NONNULL_ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ +# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m))) +#else +# define _ATTRIBUTE_NONNULL_(m) +#endif + +struct di_set *di_set_alloc (void); +int di_set_insert (struct di_set *, dev_t, ino_t) _ATTRIBUTE_NONNULL_ (1); +void di_set_free (struct di_set *) _ATTRIBUTE_NONNULL_ (1); +int di_set_lookup (struct di_set *dis, dev_t dev, ino_t ino) + _ATTRIBUTE_NONNULL_ (1);; diff --git a/lib/ino-map.c b/lib/ino-map.c new file mode 100644 index 0000000000..9133a19b21 --- /dev/null +++ b/lib/ino-map.c @@ -0,0 +1,164 @@ +/* Map an ino_t inode number to a small integer. + + Copyright 2009-2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* written by Paul Eggert and Jim Meyering */ + +#include +#include "ino-map.h" + +#include "hash.h" +#include "verify.h" + +#include +#include + +/* A pair that maps an inode number to a mapped inode number; the + latter is a small unique ID for the former. */ +struct ino_map_ent +{ + ino_t ino; + size_t mapped_ino; +}; + +/* A table that manages and indexes these pairs. */ +struct ino_map +{ + /* A table of KEY,VAL pairs, where KEY is the raw ino_t value and + VAL is the small number that it maps to. */ + struct hash_table *map; + + /* The next mapped inode number to hand out. */ + size_t next_mapped_ino; + + /* Cache of the most recently allocated and otherwise-unused storage + for probing the table. */ + struct ino_map_ent *probe; +}; + +/* Hash an inode map entry. */ +static size_t +ino_hash (void const *x, size_t table_size) +{ + struct ino_map_ent const *p = x; + ino_t ino = p->ino; + + /* When INO is wider than size_t, exclusive-OR the words of INO into H. + This avoids loss of info, without applying % to the wider type, + which could be quite slow on some systems. */ + size_t h = ino; + unsigned int i; + unsigned int n_words = sizeof ino / sizeof h + (sizeof ino % sizeof h != 0); + for (i = 1; i < n_words; i++) + h ^= ino >> CHAR_BIT * sizeof h * i; + + return h % table_size; +} + +/* Return true if two inode map entries are the same. */ +static bool +ino_compare (void const *x, void const *y) +{ + struct ino_map_ent const *a = x; + struct ino_map_ent const *b = y; + return a->ino == b->ino; +} + +/* Allocate an inode map that will hand out integers starting with + NEXT_MAPPED_INO. Return NULL if memory is exhausted. */ +struct ino_map * +ino_map_alloc (size_t next_mapped_ino) +{ + struct ino_map *im = malloc (sizeof *im); + + if (im) + { + enum { INITIAL_INO_MAP_TABLE_SIZE = 1021 }; + im->map = hash_initialize (INITIAL_INO_MAP_TABLE_SIZE, NULL, + ino_hash, ino_compare, free); + if (! im->map) + { + free (im); + return NULL; + } + im->next_mapped_ino = next_mapped_ino; + im->probe = NULL; + } + + return im; +} + +/* Free an inode map. */ +void +ino_map_free (struct ino_map *map) +{ + hash_free (map->map); + free (map->probe); + free (map); +} + + +/* Insert into MAP the inode number INO if it's not there already, + and return its nonnegative mapped inode number. + If INO is already in MAP, return the existing mapped inode number. + Return INO_MAP_INSERT_FAILURE on memory or counter exhaustion. */ +size_t +ino_map_insert (struct ino_map *im, ino_t ino) +{ + struct ino_map_ent *ent; + + /* Find space for the probe, reusing the cache if available. */ + struct ino_map_ent *probe = im->probe; + if (probe) + { + /* If repeating a recent query, return the cached result. */ + if (probe->ino == ino) + return probe->mapped_ino; + } + else + { + im->probe = probe = malloc (sizeof *probe); + if (! probe) + return INO_MAP_INSERT_FAILURE; + } + + probe->ino = ino; + ent = hash_insert (im->map, probe); + if (! ent) + return INO_MAP_INSERT_FAILURE; + + if (ent != probe) + { + /* Use the existing entry. */ + probe->mapped_ino = ent->mapped_ino; + } + else + { + /* If adding 1 to map->next_mapped_ino would cause it to + overflow to zero, then it must equal INO_MAP_INSERT_FAILURE, + which is the value that should be returned in that case. + Verify that this works. */ + verify (INO_MAP_INSERT_FAILURE + 1 == 0); + + /* Prepare to allocate a new probe next time; this one is in use. */ + im->probe = NULL; + + /* INO is new; allocate a mapped inode number for it. */ + probe->mapped_ino = im->next_mapped_ino++; + } + + return probe->mapped_ino; +} diff --git a/lib/ino-map.h b/lib/ino-map.h new file mode 100644 index 0000000000..661b78a55f --- /dev/null +++ b/lib/ino-map.h @@ -0,0 +1,14 @@ +#include + +#undef _ATTRIBUTE_NONNULL_ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__ +# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m))) +#else +# define _ATTRIBUTE_NONNULL_(m) +#endif + +#define INO_MAP_INSERT_FAILURE ((size_t) -1) + +struct ino_map *ino_map_alloc (size_t); +void ino_map_free (struct ino_map *) _ATTRIBUTE_NONNULL_ (1); +size_t ino_map_insert (struct ino_map *, ino_t) _ATTRIBUTE_NONNULL_ (1); diff --git a/modules/di-set b/modules/di-set new file mode 100644 index 0000000000..562db14a4c --- /dev/null +++ b/modules/di-set @@ -0,0 +1,24 @@ +Description: +manipulate sets of device-inode pairs efficiently + +Files: +lib/di-set.c +lib/di-set.h + +Depends-on: +ino-map +hash + +configure.ac: + +Makefile.am: +lib_SOURCES += di-set.c di-set.h + +Include: +"di-set.h" + +License +GPL + +Maintainer: +Jim Meyering diff --git a/modules/di-set-tests b/modules/di-set-tests new file mode 100644 index 0000000000..d60f7fdc61 --- /dev/null +++ b/modules/di-set-tests @@ -0,0 +1,10 @@ +Files: +tests/test-di-set.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-di-set +check_PROGRAMS += test-di-set diff --git a/modules/ino-map b/modules/ino-map new file mode 100644 index 0000000000..06ee51d5ba --- /dev/null +++ b/modules/ino-map @@ -0,0 +1,24 @@ +Description: +maintain a mapping of ino_t numbers to small integers + +Files: +lib/ino-map.c +lib/ino-map.h + +Depends-on: +hash +verify + +configure.ac: + +Makefile.am: +lib_SOURCES += ino-map.c ino-map.h + +Include: +"ino-map.h" + +License +GPL + +Maintainer: +Jim Meyering diff --git a/modules/ino-map-tests b/modules/ino-map-tests new file mode 100644 index 0000000000..fa10b4ffcc --- /dev/null +++ b/modules/ino-map-tests @@ -0,0 +1,10 @@ +Files: +tests/test-ino-map.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-ino-map +check_PROGRAMS += test-ino-map diff --git a/tests/test-di-set.c b/tests/test-di-set.c new file mode 100644 index 0000000000..5de8da2fff --- /dev/null +++ b/tests/test-di-set.c @@ -0,0 +1,65 @@ +/* Test the di-set module. + Copyright (C) 2010-2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering. */ + +#include +#include +#include +#include +#include + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#include "di-set.h" + +int +main (void) +{ + struct di_set *dis = di_set_alloc (); + ASSERT (dis); + + ASSERT (di_set_lookup (dis, 2, 5) == 0); /* initial lookup fails */ + ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */ + ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */ + ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok */ + ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok */ + ASSERT (di_set_lookup (dis, 2, 5) == 1); /* now, the lookup succeeds */ + + /* very large (or negative) inode number */ + ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 1); + ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 0); /* dup */ + + unsigned int i; + for (i = 0; i < 3000; i++) + ASSERT (di_set_insert (dis, 9, i) == 1); + for (i = 0; i < 3000; i++) + ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */ + + di_set_free (dis); + + return 0; +} diff --git a/tests/test-ino-map.c b/tests/test-ino-map.c new file mode 100644 index 0000000000..12ad9f8666 --- /dev/null +++ b/tests/test-ino-map.c @@ -0,0 +1,62 @@ +/* Test the ino-map module. + Copyright (C) 2010-2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering. */ + +#include +#include +#include +#include + +/* FIXME: once/if in gnulib, use #include "macros.h" in place of this */ +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#include "ino-map.h" + +int +main () +{ + enum { INO_MAP_INIT = 123 }; + struct ino_map *ino_map = ino_map_alloc (INO_MAP_INIT); + ASSERT (ino_map != NULL); + + ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT); + ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT); + ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1); + ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1); + ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2); + ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2); + + int i; + for (i = 0; i < 100; i++) + { + ASSERT (ino_map_insert (ino_map, 10000 + i) == INO_MAP_INIT + 3 + i); + } + + ino_map_free (ino_map); + + return 0; +}