| 1 | #include "jemalloc/internal/jemalloc_preamble.h" |
| 2 | |
| 3 | #include "jemalloc/internal/assert.h" |
| 4 | #include "jemalloc/internal/bit_util.h" |
| 5 | #include "jemalloc/internal/bitmap.h" |
| 6 | #include "jemalloc/internal/pages.h" |
| 7 | #include "jemalloc/internal/sc.h" |
| 8 | |
| 9 | /* |
| 10 | * This module computes the size classes used to satisfy allocations. The logic |
| 11 | * here was ported more or less line-by-line from a shell script, and because of |
| 12 | * that is not the most idiomatic C. Eventually we should fix this, but for now |
| 13 | * at least the damage is compartmentalized to this file. |
| 14 | */ |
| 15 | |
| 16 | sc_data_t sc_data_global; |
| 17 | |
| 18 | static size_t |
| 19 | reg_size_compute(int lg_base, int lg_delta, int ndelta) { |
| 20 | return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta); |
| 21 | } |
| 22 | |
| 23 | /* Returns the number of pages in the slab. */ |
| 24 | static int |
| 25 | slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) { |
| 26 | size_t page = (ZU(1) << lg_page); |
| 27 | size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta); |
| 28 | |
| 29 | size_t try_slab_size = page; |
| 30 | size_t try_nregs = try_slab_size / reg_size; |
| 31 | size_t perfect_slab_size = 0; |
| 32 | bool perfect = false; |
| 33 | /* |
| 34 | * This loop continues until we find the least common multiple of the |
| 35 | * page size and size class size. Size classes are all of the form |
| 36 | * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is |
| 37 | * (ndelta + ngroup) * delta. The way we choose slabbing strategies |
| 38 | * means that delta is at most the page size and ndelta < ngroup. So |
| 39 | * the loop executes for at most 2 * ngroup - 1 iterations, which is |
| 40 | * also the bound on the number of pages in a slab chosen by default. |
| 41 | * With the current default settings, this is at most 7. |
| 42 | */ |
| 43 | while (!perfect) { |
| 44 | perfect_slab_size = try_slab_size; |
| 45 | size_t perfect_nregs = try_nregs; |
| 46 | try_slab_size += page; |
| 47 | try_nregs = try_slab_size / reg_size; |
| 48 | if (perfect_slab_size == perfect_nregs * reg_size) { |
| 49 | perfect = true; |
| 50 | } |
| 51 | } |
| 52 | return (int)(perfect_slab_size / page); |
| 53 | } |
| 54 | |
| 55 | static void |
| 56 | size_class( |
| 57 | /* Output. */ |
| 58 | sc_t *sc, |
| 59 | /* Configuration decisions. */ |
| 60 | int lg_max_lookup, int lg_page, int lg_ngroup, |
| 61 | /* Inputs specific to the size class. */ |
| 62 | int index, int lg_base, int lg_delta, int ndelta) { |
| 63 | sc->index = index; |
| 64 | sc->lg_base = lg_base; |
| 65 | sc->lg_delta = lg_delta; |
| 66 | sc->ndelta = ndelta; |
| 67 | sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta) |
| 68 | % (ZU(1) << lg_page) == 0); |
| 69 | size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta); |
| 70 | if (index == 0) { |
| 71 | assert(!sc->psz); |
| 72 | } |
| 73 | if (size < (ZU(1) << (lg_page + lg_ngroup))) { |
| 74 | sc->bin = true; |
| 75 | sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta); |
| 76 | } else { |
| 77 | sc->bin = false; |
| 78 | sc->pgs = 0; |
| 79 | } |
| 80 | if (size <= (ZU(1) << lg_max_lookup)) { |
| 81 | sc->lg_delta_lookup = lg_delta; |
| 82 | } else { |
| 83 | sc->lg_delta_lookup = 0; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | static void |
| 88 | size_classes( |
| 89 | /* Output. */ |
| 90 | sc_data_t *sc_data, |
| 91 | /* Determined by the system. */ |
| 92 | size_t lg_ptr_size, int lg_quantum, |
| 93 | /* Configuration decisions. */ |
| 94 | int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) { |
| 95 | int ptr_bits = (1 << lg_ptr_size) * 8; |
| 96 | int ngroup = (1 << lg_ngroup); |
| 97 | int ntiny = 0; |
| 98 | int nlbins = 0; |
| 99 | int lg_tiny_maxclass = (unsigned)-1; |
| 100 | int nbins = 0; |
| 101 | int npsizes = 0; |
| 102 | |
| 103 | int index = 0; |
| 104 | |
| 105 | int ndelta = 0; |
| 106 | int lg_base = lg_tiny_min; |
| 107 | int lg_delta = lg_base; |
| 108 | |
| 109 | /* Outputs that we update as we go. */ |
| 110 | size_t lookup_maxclass = 0; |
| 111 | size_t small_maxclass = 0; |
| 112 | int lg_large_minclass = 0; |
| 113 | size_t large_maxclass = 0; |
| 114 | |
| 115 | /* Tiny size classes. */ |
| 116 | while (lg_base < lg_quantum) { |
| 117 | sc_t *sc = &sc_data->sc[index]; |
| 118 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
| 119 | lg_base, lg_delta, ndelta); |
| 120 | if (sc->lg_delta_lookup != 0) { |
| 121 | nlbins = index + 1; |
| 122 | } |
| 123 | if (sc->psz) { |
| 124 | npsizes++; |
| 125 | } |
| 126 | if (sc->bin) { |
| 127 | nbins++; |
| 128 | } |
| 129 | ntiny++; |
| 130 | /* Final written value is correct. */ |
| 131 | lg_tiny_maxclass = lg_base; |
| 132 | index++; |
| 133 | lg_delta = lg_base; |
| 134 | lg_base++; |
| 135 | } |
| 136 | |
| 137 | /* First non-tiny (pseudo) group. */ |
| 138 | if (ntiny != 0) { |
| 139 | sc_t *sc = &sc_data->sc[index]; |
| 140 | /* |
| 141 | * See the note in sc.h; the first non-tiny size class has an |
| 142 | * unusual encoding. |
| 143 | */ |
| 144 | lg_base--; |
| 145 | ndelta = 1; |
| 146 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
| 147 | lg_base, lg_delta, ndelta); |
| 148 | index++; |
| 149 | lg_base++; |
| 150 | lg_delta++; |
| 151 | if (sc->psz) { |
| 152 | npsizes++; |
| 153 | } |
| 154 | if (sc->bin) { |
| 155 | nbins++; |
| 156 | } |
| 157 | } |
| 158 | while (ndelta < ngroup) { |
| 159 | sc_t *sc = &sc_data->sc[index]; |
| 160 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
| 161 | lg_base, lg_delta, ndelta); |
| 162 | index++; |
| 163 | ndelta++; |
| 164 | if (sc->psz) { |
| 165 | npsizes++; |
| 166 | } |
| 167 | if (sc->bin) { |
| 168 | nbins++; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | /* All remaining groups. */ |
| 173 | lg_base = lg_base + lg_ngroup; |
| 174 | while (lg_base < ptr_bits - 1) { |
| 175 | ndelta = 1; |
| 176 | int ndelta_limit; |
| 177 | if (lg_base == ptr_bits - 2) { |
| 178 | ndelta_limit = ngroup - 1; |
| 179 | } else { |
| 180 | ndelta_limit = ngroup; |
| 181 | } |
| 182 | while (ndelta <= ndelta_limit) { |
| 183 | sc_t *sc = &sc_data->sc[index]; |
| 184 | size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index, |
| 185 | lg_base, lg_delta, ndelta); |
| 186 | if (sc->lg_delta_lookup != 0) { |
| 187 | nlbins = index + 1; |
| 188 | /* Final written value is correct. */ |
| 189 | lookup_maxclass = (ZU(1) << lg_base) |
| 190 | + (ZU(ndelta) << lg_delta); |
| 191 | } |
| 192 | if (sc->psz) { |
| 193 | npsizes++; |
| 194 | } |
| 195 | if (sc->bin) { |
| 196 | nbins++; |
| 197 | /* Final written value is correct. */ |
| 198 | small_maxclass = (ZU(1) << lg_base) |
| 199 | + (ZU(ndelta) << lg_delta); |
| 200 | if (lg_ngroup > 0) { |
| 201 | lg_large_minclass = lg_base + 1; |
| 202 | } else { |
| 203 | lg_large_minclass = lg_base + 2; |
| 204 | } |
| 205 | } |
| 206 | large_maxclass = (ZU(1) << lg_base) |
| 207 | + (ZU(ndelta) << lg_delta); |
| 208 | index++; |
| 209 | ndelta++; |
| 210 | } |
| 211 | lg_base++; |
| 212 | lg_delta++; |
| 213 | } |
| 214 | /* Additional outputs. */ |
| 215 | int nsizes = index; |
| 216 | unsigned lg_ceil_nsizes = lg_ceil(nsizes); |
| 217 | |
| 218 | /* Fill in the output data. */ |
| 219 | sc_data->ntiny = ntiny; |
| 220 | sc_data->nlbins = nlbins; |
| 221 | sc_data->nbins = nbins; |
| 222 | sc_data->nsizes = nsizes; |
| 223 | sc_data->lg_ceil_nsizes = lg_ceil_nsizes; |
| 224 | sc_data->npsizes = npsizes; |
| 225 | sc_data->lg_tiny_maxclass = lg_tiny_maxclass; |
| 226 | sc_data->lookup_maxclass = lookup_maxclass; |
| 227 | sc_data->small_maxclass = small_maxclass; |
| 228 | sc_data->lg_large_minclass = lg_large_minclass; |
| 229 | sc_data->large_minclass = (ZU(1) << lg_large_minclass); |
| 230 | sc_data->large_maxclass = large_maxclass; |
| 231 | |
| 232 | /* |
| 233 | * We compute these values in two ways: |
| 234 | * - Incrementally, as above. |
| 235 | * - In macros, in sc.h. |
| 236 | * The computation is easier when done incrementally, but putting it in |
| 237 | * a constant makes it available to the fast paths without having to |
| 238 | * touch the extra global cacheline. We assert, however, that the two |
| 239 | * computations are equivalent. |
| 240 | */ |
| 241 | assert(sc_data->npsizes == SC_NPSIZES); |
| 242 | assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS); |
| 243 | assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS); |
| 244 | assert(sc_data->large_minclass == SC_LARGE_MINCLASS); |
| 245 | assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS); |
| 246 | assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS); |
| 247 | |
| 248 | /* |
| 249 | * In the allocation fastpath, we want to assume that we can |
| 250 | * unconditionally subtract the requested allocation size from |
| 251 | * a ssize_t, and detect passing through 0 correctly. This |
| 252 | * results in optimal generated code. For this to work, the |
| 253 | * maximum allocation size must be less than SSIZE_MAX. |
| 254 | */ |
| 255 | assert(SC_LARGE_MAXCLASS < SSIZE_MAX); |
| 256 | } |
| 257 | |
| 258 | void |
| 259 | sc_data_init(sc_data_t *sc_data) { |
| 260 | assert(!sc_data->initialized); |
| 261 | |
| 262 | int lg_max_lookup = 12; |
| 263 | |
| 264 | size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN, |
| 265 | lg_max_lookup, LG_PAGE, 2); |
| 266 | |
| 267 | sc_data->initialized = true; |
| 268 | } |
| 269 | |
| 270 | static void |
| 271 | sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) { |
| 272 | size_t min_pgs = reg_size / PAGE; |
| 273 | if (reg_size % PAGE != 0) { |
| 274 | min_pgs++; |
| 275 | } |
| 276 | /* |
| 277 | * BITMAP_MAXBITS is actually determined by putting the smallest |
| 278 | * possible size-class on one page, so this can never be 0. |
| 279 | */ |
| 280 | size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE; |
| 281 | |
| 282 | assert(min_pgs <= max_pgs); |
| 283 | assert(min_pgs > 0); |
| 284 | assert(max_pgs >= 1); |
| 285 | if (pgs_guess < min_pgs) { |
| 286 | sc->pgs = (int)min_pgs; |
| 287 | } else if (pgs_guess > max_pgs) { |
| 288 | sc->pgs = (int)max_pgs; |
| 289 | } else { |
| 290 | sc->pgs = (int)pgs_guess; |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | void |
| 295 | sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) { |
| 296 | assert(data->initialized); |
| 297 | for (int i = 0; i < data->nsizes; i++) { |
| 298 | sc_t *sc = &data->sc[i]; |
| 299 | if (!sc->bin) { |
| 300 | break; |
| 301 | } |
| 302 | size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta, |
| 303 | sc->ndelta); |
| 304 | if (begin <= reg_size && reg_size <= end) { |
| 305 | sc_data_update_sc_slab_size(sc, reg_size, pgs); |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | void |
| 311 | sc_boot(sc_data_t *data) { |
| 312 | sc_data_init(data); |
| 313 | } |
| 314 | |