+2006-10-12 Jim Meyering <jim@meyering.net>
+
+ Big performance improvement for fts-based tools that use FTS_NOSTAT.
+ Avoid spurious inode-mismatch problems on non-POSIX file systems.
+ Details: http://article.gmane.org/gmane.comp.lib.gnulib.bugs/7416
+ * lib/fts_.h (FTS_DEFER_STAT): Define new flag.
+ (FTS_OPTIONMASK): Extend the mask to reflect this addition.
+ * lib/fts.c (DT_IS_KNOWN, DT_MUST_BE): Define.
+ (FTS_NO_STAT_REQUIRED, FTS_STAT_REQUIRED): Define.
+ (fts_set_stat_required): New function.
+ (fts_open): Defer the calls to fts_stat, if possible or requested.
+ Move the code that maps a command-line fts_info value FTS_DOT to FTS_D
+ into fts_stat itself.
+ (fts_read): Perform any required (deferred) fts_stat call.
+ (fts_build): Likewise, for the directory we're about to open and read.
+ In the readdir loop, carefully decide whether each entry will require
+ an eventual call to fts_stat, using dirent.d_type info if available.
+ (fts_stat): Move the test for whether to honor FTS_COMFOLLOW on
+ a command line argument into this function. Update all callers.
+ Map a return value of FTS_DOT to FTS_D for a command line argument.
+ * modules/fts (Depends-on): Add d-type. Alphabetize.
+ Thanks to Miklos Szeredi for his tenacity and for the initial
+ bug report about "find" failing on a FUSE-based file system.
+
2006-10-12 Paul Eggert <eggert@cs.ucla.edu>
* m4/extensions.m4 (AC_USE_SYSTEM_EXTENSIONS): Renamed from
# define _D_EXACT_NAMLEN(dirent) strlen ((dirent)->d_name)
#endif
+#if HAVE_STRUCT_DIRENT_D_TYPE
+/* True if the type of the directory entry D is known. */
+# define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN)
+/* True if the type of the directory entry D must be T. */
+# define DT_MUST_BE(d, t) ((d)->d_type == (t))
+#else
+# define DT_IS_KNOWN(d) false
+# define DT_MUST_BE(d, t) false
+#endif
+
+enum
+{
+ FTS_NO_STAT_REQUIRED = 1,
+ FTS_STAT_REQUIRED = 2
+};
+
#ifdef _LIBC
# undef close
# define close __close
} \
while (false)
+/* Overload the fts_statp->st_size member (otherwise unused, when
+ fts_info is FTS_NSOK) to indicate whether fts_read should stat
+ this entry or not. */
+static void
+fts_set_stat_required (FTSENT *p, bool required)
+{
+ if (p->fts_info != FTS_NSOK)
+ abort ();
+ p->fts_statp->st_size = (required
+ ? FTS_STAT_REQUIRED
+ : FTS_NO_STAT_REQUIRED);
+}
+
/* file-descriptor-relative opendir. */
/* FIXME: if others need this function, move it into lib/openat.c */
static inline DIR *
FTSENT *parent = NULL;
FTSENT *tmp = NULL; /* pacify gcc */
size_t len;
+ bool defer_stat;
/* Options check. */
if (options & ~FTS_OPTIONMASK) {
parent->fts_level = FTS_ROOTPARENTLEVEL;
}
+ /* The classic fts implementation would call fts_stat with
+ a new entry for each iteration of the loop below.
+ If the comparison function is not specified or if the
+ FTS_DEFER_STAT option is in effect, don't stat any entry
+ in this loop. This is an attempt to minimize the interval
+ between the initial stat/lstat/fstatat and the point at which
+ a directory argument is first opened. This matters for any
+ directory command line argument that resides on a file system
+ without genuine i-nodes. If you specify FTS_DEFER_STAT along
+ with a comparison function, that function must not access any
+ data via the fts_statp pointer. */
+ defer_stat = (compar == NULL || ISSET(FTS_DEFER_STAT));
+
/* Allocate/initialize root(s). */
for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
/* Don't allow zero-length file names. */
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
- p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW) != 0);
-
- /* Command-line "." and ".." are real directories. */
- if (p->fts_info == FTS_DOT)
- p->fts_info = FTS_D;
+ if (defer_stat) {
+ p->fts_info = FTS_NSOK;
+ fts_set_stat_required(p, true);
+ } else {
+ p->fts_info = fts_stat(sp, p, false);
+ }
/*
* If comparison routine supplied, traverse in sorted
*t++ = '/';
memmove(t, p->fts_name, p->fts_namelen + 1);
check_for_dir:
+ if (p->fts_info == FTS_NSOK)
+ {
+ switch (p->fts_statp->st_size)
+ {
+ case FTS_STAT_REQUIRED:
+ p->fts_info = fts_stat(sp, p, false);
+ break;
+ case FTS_NO_STAT_REQUIRED:
+ break;
+ default:
+ abort ();
+ }
+ }
sp->fts_cur = p;
if (p->fts_info == FTS_D)
{
return (sp->fts_cur = NULL);
}
+ if (p->fts_info == FTS_NSOK)
+ abort ();
+
/* NUL terminate the file name. */
sp->fts_path[p->fts_pathlen] = '\0';
}
return (NULL);
}
+ /* Rather than calling fts_stat for each and every entry encountered
+ in the readdir loop (below), stat each directory only right after
+ opening it. */
+ if (cur->fts_info == FTS_NSOK)
+ cur->fts_info = fts_stat(sp, cur, false);
/*
* Nlinks is the number of possible entries of type directory in the
memmove(cp, p->fts_name, p->fts_namelen + 1);
} else
p->fts_accpath = p->fts_name;
- /* Stat it. */
- p->fts_info = fts_stat(sp, p, false);
+
+ bool is_dir;
+ if (sp->fts_compar == NULL || ISSET(FTS_DEFER_STAT)) {
+ /* Record what fts_read will have to do with this
+ entry. In many cases, it will simply fts_stat it,
+ but we can take advantage of any d_type information
+ to optimize away the unnecessary stat calls. I.e.,
+ if FTS_NOSTAT is in effect and we're not following
+ symlinks (FTS_PHYSICAL) and d_type indicates this
+ is *not* a directory, then we won't have to stat it
+ at all. If it *is* a directory, then (currently)
+ we stat it regardless, in order to get device and
+ inode numbers. Some day we might optimize that
+ away, too, for directories where d_ino is known to
+ be valid. */
+ bool skip_stat = (ISSET(FTS_PHYSICAL)
+ && ISSET(FTS_NOSTAT)
+ && DT_IS_KNOWN(dp)
+ && ! DT_MUST_BE(dp, DT_DIR));
+ p->fts_info = FTS_NSOK;
+ fts_set_stat_required(p, !skip_stat);
+ is_dir = (ISSET(FTS_PHYSICAL) && ISSET(FTS_NOSTAT)
+ && DT_MUST_BE(dp, DT_DIR));
+ } else {
+ p->fts_info = fts_stat(sp, p, false);
+ is_dir = (p->fts_info == FTS_D
+ || p->fts_info == FTS_DC
+ || p->fts_info == FTS_DOT);
+ }
/* Decrement link count if applicable. */
- if (nlinks > 0 && (p->fts_info == FTS_D ||
- p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
+ if (nlinks > 0 && is_dir)
nlinks -= nostat;
/* We walk in directory order so "ls -f" doesn't get upset. */
struct stat *sbp = p->fts_statp;
int saved_errno;
+ if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW))
+ follow = true;
+
#if defined FTS_WHITEOUT && 0
/* check for whiteout */
if (p->fts_flags & FTS_ISW) {
}
if (S_ISDIR(sbp->st_mode)) {
- if (ISDOT(p->fts_name))
- return (FTS_DOT);
+ if (ISDOT(p->fts_name)) {
+ /* Command-line "." and ".." are real directories. */
+ return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT);
+ }
#if _LGPL_PACKAGE
{
through the file descriptor member, fts_cwd_fd. */
# define FTS_CWDFD 0x0200
-# define FTS_OPTIONMASK 0x03ff /* valid user option mask */
+ /* Historically, for each directory that fts initially encounters, it would
+ open it, read all entries, and stat each entry, storing the results, and
+ then it would process the first entry. But that behavior is bad for
+ locality of reference, and also causes trouble with inode-simulating
+ file systems like FAT, CIFS, FUSE-based ones, etc., when entries from
+ their name/inode cache are flushed too early.
+ Use this flag to make fts_open and fts_read defer the stat/lstat/fststat
+ of each entry until it actually processed. However, note that if you use
+ this option and also specify a comparison function, that function may not
+ examine any data via fts_statp. */
+# define FTS_DEFER_STAT 0x0400
+
+# define FTS_OPTIONMASK 0x07ff /* valid user option mask */
# define FTS_NAMEONLY 0x1000 /* (private) child names only */
# define FTS_STOP 0x2000 /* (private) unrecoverable error */