This should be a worthwhile optimization in many cases, because
__builtin_ctzl compiles to a single machine instruction on x86, whereas
the generic implementation compiles to several.
static inline int
count_trailing_zeros (unsigned long int x)
{
+#if __GNUC__ >= 4
+ return __builtin_ctzl (x);
+#else /* not GCC 4+ */
/* This algorithm is from _Hacker's Delight_ section 5.4. */
int n = 1;
COUNT_STEP (2);
return n - (x & 1);
+#endif /* not GCC 4+ */
}
/* Returns the least index of the in-use element in LEAF greater