return log_2_floor(n) + !IS_POW2(n);
}
-/* Returns the number of trailing 0-bits in 'n', or 32 if 'n' is 0. */
-int
-ctz(uint32_t n)
-{
- if (!n) {
- return 32;
- } else {
+/* Returns the number of trailing 0-bits in 'n'. Undefined if 'n' == 0. */
#if !defined(UINT_MAX) || !defined(UINT32_MAX)
#error "Someone screwed up the #includes."
#elif __GNUC__ >= 4 && UINT_MAX == UINT32_MAX
- return __builtin_ctz(n);
+/* Defined inline in util.h. */
#else
- unsigned int k;
- int count = 31;
+static int
+raw_ctz(uint32_t n)
+{
+ unsigned int k;
+ int count = 31;
#define CTZ_STEP(X) \
- k = n << (X); \
- if (k) { \
- count -= X; \
- n = k; \
- }
- CTZ_STEP(16);
- CTZ_STEP(8);
- CTZ_STEP(4);
- CTZ_STEP(2);
- CTZ_STEP(1);
+ k = n << (X); \
+ if (k) { \
+ count -= X; \
+ n = k; \
+ }
+ CTZ_STEP(16);
+ CTZ_STEP(8);
+ CTZ_STEP(4);
+ CTZ_STEP(2);
+ CTZ_STEP(1);
#undef CTZ_STEP
- return count;
+ return count;
+}
#endif
- }
+
+/* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */
+int
+popcount(uint32_t x)
+{
+ /* In my testing, this implementation is over twice as fast as any other
+ * portable implementation that I tried, including GCC 4.4
+ * __builtin_popcount(), although nonportable asm("popcnt") was over 50%
+ * faster. */
+#define INIT1(X) \
+ ((((X) & (1 << 0)) != 0) + \
+ (((X) & (1 << 1)) != 0) + \
+ (((X) & (1 << 2)) != 0) + \
+ (((X) & (1 << 3)) != 0) + \
+ (((X) & (1 << 4)) != 0) + \
+ (((X) & (1 << 5)) != 0) + \
+ (((X) & (1 << 6)) != 0) + \
+ (((X) & (1 << 7)) != 0))
+#define INIT2(X) INIT1(X), INIT1((X) + 1)
+#define INIT4(X) INIT2(X), INIT2((X) + 2)
+#define INIT8(X) INIT4(X), INIT4((X) + 4)
+#define INIT16(X) INIT8(X), INIT8((X) + 8)
+#define INIT32(X) INIT16(X), INIT16((X) + 16)
+#define INIT64(X) INIT32(X), INIT32((X) + 32)
+
+ static const uint8_t popcount8[256] = {
+ INIT64(0), INIT64(64), INIT64(128), INIT64(192)
+ };
+
+ return (popcount8[x & 0xff] +
+ popcount8[(x >> 8) & 0xff] +
+ popcount8[(x >> 16) & 0xff] +
+ popcount8[x >> 24]);
}
/* Returns true if the 'n' bytes starting at 'p' are zeros. */