| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * pg_bitutils.c |
| 4 | * Miscellaneous functions for bit-wise operations. |
| 5 | * |
| 6 | * Copyright (c) 2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * IDENTIFICATION |
| 9 | * src/port/pg_bitutils.c |
| 10 | * |
| 11 | *------------------------------------------------------------------------- |
| 12 | */ |
| 13 | #include "c.h" |
| 14 | |
| 15 | #ifdef HAVE__GET_CPUID |
| 16 | #include <cpuid.h> |
| 17 | #endif |
| 18 | #ifdef HAVE__CPUID |
| 19 | #include <intrin.h> |
| 20 | #endif |
| 21 | |
| 22 | #include "port/pg_bitutils.h" |
| 23 | |
| 24 | |
| 25 | /* |
| 26 | * Array giving the position of the left-most set bit for each possible |
| 27 | * byte value. We count the right-most position as the 0th bit, and the |
| 28 | * left-most the 7th bit. The 0th entry of the array should not be used. |
| 29 | * |
| 30 | * Note: this is not used by the functions in pg_bitutils.h when |
| 31 | * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that |
| 32 | * extensions possibly compiled with a different compiler can use it. |
| 33 | */ |
| 34 | const uint8 pg_leftmost_one_pos[256] = { |
| 35 | 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, |
| 36 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 37 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 38 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 39 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 40 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 41 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 42 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 43 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 44 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 45 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 46 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 47 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 48 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 49 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 50 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 |
| 51 | }; |
| 52 | |
| 53 | /* |
| 54 | * Array giving the position of the right-most set bit for each possible |
| 55 | * byte value. We count the right-most position as the 0th bit, and the |
| 56 | * left-most the 7th bit. The 0th entry of the array should not be used. |
| 57 | * |
| 58 | * Note: this is not used by the functions in pg_bitutils.h when |
| 59 | * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that |
| 60 | * extensions possibly compiled with a different compiler can use it. |
| 61 | */ |
| 62 | const uint8 pg_rightmost_one_pos[256] = { |
| 63 | 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 64 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 65 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 66 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 67 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 68 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 69 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 70 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 71 | 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 72 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 73 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 74 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 75 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 76 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 77 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
| 78 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 |
| 79 | }; |
| 80 | |
| 81 | /* |
| 82 | * Array giving the number of 1-bits in each possible byte value. |
| 83 | * |
| 84 | * Note: we export this for use by functions in which explicit use |
| 85 | * of the popcount functions seems unlikely to be a win. |
| 86 | */ |
| 87 | const uint8 pg_number_of_ones[256] = { |
| 88 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
| 89 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
| 90 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
| 91 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 92 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
| 93 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 94 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 95 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 96 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
| 97 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 98 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 99 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 100 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
| 101 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 102 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
| 103 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 |
| 104 | }; |
| 105 | |
| 106 | /* |
| 107 | * On x86_64, we can use the hardware popcount instruction, but only if |
| 108 | * we can verify that the CPU supports it via the cpuid instruction. |
| 109 | * |
| 110 | * Otherwise, we fall back to __builtin_popcount if the compiler has that, |
| 111 | * or a hand-rolled implementation if not. |
| 112 | */ |
| 113 | #ifdef HAVE_X86_64_POPCNTQ |
| 114 | #if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) |
| 115 | #define USE_POPCNT_ASM 1 |
| 116 | #endif |
| 117 | #endif |
| 118 | |
| 119 | static int pg_popcount32_slow(uint32 word); |
| 120 | static int pg_popcount64_slow(uint64 word); |
| 121 | |
| 122 | #ifdef USE_POPCNT_ASM |
| 123 | static bool pg_popcount_available(void); |
| 124 | static int pg_popcount32_choose(uint32 word); |
| 125 | static int pg_popcount64_choose(uint64 word); |
| 126 | static int pg_popcount32_asm(uint32 word); |
| 127 | static int pg_popcount64_asm(uint64 word); |
| 128 | |
| 129 | int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; |
| 130 | int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; |
| 131 | #else |
| 132 | int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; |
| 133 | int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; |
| 134 | #endif /* USE_POPCNT_ASM */ |
| 135 | |
| 136 | #ifdef USE_POPCNT_ASM |
| 137 | |
| 138 | /* |
| 139 | * Return true if CPUID indicates that the POPCNT instruction is available. |
| 140 | */ |
| 141 | static bool |
| 142 | pg_popcount_available(void) |
| 143 | { |
| 144 | unsigned int exx[4] = {0, 0, 0, 0}; |
| 145 | |
| 146 | #if defined(HAVE__GET_CPUID) |
| 147 | __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); |
| 148 | #elif defined(HAVE__CPUID) |
| 149 | __cpuid(exx, 1); |
| 150 | #else |
| 151 | #error cpuid instruction not available |
| 152 | #endif |
| 153 | |
| 154 | return (exx[2] & (1 << 23)) != 0; /* POPCNT */ |
| 155 | } |
| 156 | |
| 157 | /* |
| 158 | * These functions get called on the first call to pg_popcount32 etc. |
| 159 | * They detect whether we can use the asm implementations, and replace |
| 160 | * the function pointers so that subsequent calls are routed directly to |
| 161 | * the chosen implementation. |
| 162 | */ |
| 163 | static int |
| 164 | pg_popcount32_choose(uint32 word) |
| 165 | { |
| 166 | if (pg_popcount_available()) |
| 167 | { |
| 168 | pg_popcount32 = pg_popcount32_asm; |
| 169 | pg_popcount64 = pg_popcount64_asm; |
| 170 | } |
| 171 | else |
| 172 | { |
| 173 | pg_popcount32 = pg_popcount32_slow; |
| 174 | pg_popcount64 = pg_popcount64_slow; |
| 175 | } |
| 176 | |
| 177 | return pg_popcount32(word); |
| 178 | } |
| 179 | |
| 180 | static int |
| 181 | pg_popcount64_choose(uint64 word) |
| 182 | { |
| 183 | if (pg_popcount_available()) |
| 184 | { |
| 185 | pg_popcount32 = pg_popcount32_asm; |
| 186 | pg_popcount64 = pg_popcount64_asm; |
| 187 | } |
| 188 | else |
| 189 | { |
| 190 | pg_popcount32 = pg_popcount32_slow; |
| 191 | pg_popcount64 = pg_popcount64_slow; |
| 192 | } |
| 193 | |
| 194 | return pg_popcount64(word); |
| 195 | } |
| 196 | |
| 197 | /* |
| 198 | * pg_popcount32_asm |
| 199 | * Return the number of 1 bits set in word |
| 200 | */ |
| 201 | static int |
| 202 | pg_popcount32_asm(uint32 word) |
| 203 | { |
| 204 | uint32 res; |
| 205 | |
| 206 | __asm__ __volatile__(" popcntl %1,%0\n" :"=q" (res):"rm" (word):"cc" ); |
| 207 | return (int) res; |
| 208 | } |
| 209 | |
| 210 | /* |
| 211 | * pg_popcount64_asm |
| 212 | * Return the number of 1 bits set in word |
| 213 | */ |
| 214 | static int |
| 215 | pg_popcount64_asm(uint64 word) |
| 216 | { |
| 217 | uint64 res; |
| 218 | |
| 219 | __asm__ __volatile__(" popcntq %1,%0\n" :"=q" (res):"rm" (word):"cc" ); |
| 220 | return (int) res; |
| 221 | } |
| 222 | |
| 223 | #endif /* USE_POPCNT_ASM */ |
| 224 | |
| 225 | |
| 226 | /* |
| 227 | * pg_popcount32_slow |
| 228 | * Return the number of 1 bits set in word |
| 229 | */ |
| 230 | static int |
| 231 | pg_popcount32_slow(uint32 word) |
| 232 | { |
| 233 | #ifdef HAVE__BUILTIN_POPCOUNT |
| 234 | return __builtin_popcount(word); |
| 235 | #else /* !HAVE__BUILTIN_POPCOUNT */ |
| 236 | int result = 0; |
| 237 | |
| 238 | while (word != 0) |
| 239 | { |
| 240 | result += pg_number_of_ones[word & 255]; |
| 241 | word >>= 8; |
| 242 | } |
| 243 | |
| 244 | return result; |
| 245 | #endif /* HAVE__BUILTIN_POPCOUNT */ |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | * pg_popcount64_slow |
| 250 | * Return the number of 1 bits set in word |
| 251 | */ |
| 252 | static int |
| 253 | pg_popcount64_slow(uint64 word) |
| 254 | { |
| 255 | #ifdef HAVE__BUILTIN_POPCOUNT |
| 256 | #if defined(HAVE_LONG_INT_64) |
| 257 | return __builtin_popcountl(word); |
| 258 | #elif defined(HAVE_LONG_LONG_INT_64) |
| 259 | return __builtin_popcountll(word); |
| 260 | #else |
| 261 | #error must have a working 64-bit integer datatype |
| 262 | #endif |
| 263 | #else /* !HAVE__BUILTIN_POPCOUNT */ |
| 264 | int result = 0; |
| 265 | |
| 266 | while (word != 0) |
| 267 | { |
| 268 | result += pg_number_of_ones[word & 255]; |
| 269 | word >>= 8; |
| 270 | } |
| 271 | |
| 272 | return result; |
| 273 | #endif /* HAVE__BUILTIN_POPCOUNT */ |
| 274 | } |
| 275 | |
| 276 | |
| 277 | /* |
| 278 | * pg_popcount |
| 279 | * Returns the number of 1-bits in buf |
| 280 | */ |
| 281 | uint64 |
| 282 | pg_popcount(const char *buf, int bytes) |
| 283 | { |
| 284 | uint64 popcnt = 0; |
| 285 | |
| 286 | #if SIZEOF_VOID_P >= 8 |
| 287 | /* Process in 64-bit chunks if the buffer is aligned. */ |
| 288 | if (buf == (const char *) TYPEALIGN(8, buf)) |
| 289 | { |
| 290 | const uint64 *words = (const uint64 *) buf; |
| 291 | |
| 292 | while (bytes >= 8) |
| 293 | { |
| 294 | popcnt += pg_popcount64(*words++); |
| 295 | bytes -= 8; |
| 296 | } |
| 297 | |
| 298 | buf = (const char *) words; |
| 299 | } |
| 300 | #else |
| 301 | /* Process in 32-bit chunks if the buffer is aligned. */ |
| 302 | if (buf == (const char *) TYPEALIGN(4, buf)) |
| 303 | { |
| 304 | const uint32 *words = (const uint32 *) buf; |
| 305 | |
| 306 | while (bytes >= 4) |
| 307 | { |
| 308 | popcnt += pg_popcount32(*words++); |
| 309 | bytes -= 4; |
| 310 | } |
| 311 | |
| 312 | buf = (const char *) words; |
| 313 | } |
| 314 | #endif |
| 315 | |
| 316 | /* Process any remaining bytes */ |
| 317 | while (bytes--) |
| 318 | popcnt += pg_number_of_ones[(unsigned char) *buf++]; |
| 319 | |
| 320 | return popcnt; |
| 321 | } |
| 322 | |