enc_loop.c source code [ClickHouse/contrib/base64/lib/arch/avx2/enc_loop.c]

1	static inline void
2	enc_loop_avx2_inner_first (const uint8_t s, uint8_t o)
3	{
4	// First load is done at s - 0 to not get a segfault:
5	__m256i src = _mm256_loadu_si256((__m256i ) s);
6
7	// Shift by 4 bytes, as required by enc_reshuffle:
8	src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(`0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`));
9
10	// Reshuffle, translate, store:
11	src = enc_reshuffle(src);
12	src = enc_translate(src);
13	_mm256_storeu_si256((__m256i ) o, src);
14
15	// Subsequent loads will be done at s - 4, set pointer for next round:
16	*s += `20`;
17	*o += `32`;
18	}
19
20	static inline void
21	enc_loop_avx2_inner (const uint8_t s, uint8_t o)
22	{
23	// Load input:
24	__m256i src = _mm256_loadu_si256((__m256i ) s);
25
26	// Reshuffle, translate, store:
27	src = enc_reshuffle(src);
28	src = enc_translate(src);
29	_mm256_storeu_si256((__m256i ) o, src);
30
31	*s += `24`;
32	*o += `32`;
33	}
34
35	static inline void
36	enc_loop_avx2 (const uint8_t *s, size_t slen, uint8_t *o, size_t olen)
37	{
38	if (*slen < `32`) {
39	return;
40	}
41
42	// Process blocks of 24 bytes at a time. Because blocks are loaded 32
43	// bytes at a time an offset of -4, ensure that there will be at least
44	// 4 remaining bytes after the last round, so that the final read will
45	// not pass beyond the bounds of the input buffer:
46	size_t rounds = (*slen - `4`) / `24`;
47
48	slen -= rounds `24`; // 24 bytes consumed per round
49	olen += rounds `32`; // 32 bytes produced per round
50
51	// The first loop iteration requires special handling to ensure that
52	// the read, which is done at an offset, does not underflow the buffer:
53	enc_loop_avx2_inner_first(s, o);
54	rounds--;
55
56	while (rounds > `0`) {
57	if (rounds >= `8`) {
58	enc_loop_avx2_inner(s, o);
59	enc_loop_avx2_inner(s, o);
60	enc_loop_avx2_inner(s, o);
61	enc_loop_avx2_inner(s, o);
62	enc_loop_avx2_inner(s, o);
63	enc_loop_avx2_inner(s, o);
64	enc_loop_avx2_inner(s, o);
65	enc_loop_avx2_inner(s, o);
66	rounds -= `8`;
67	continue;
68	}
69	if (rounds >= `4`) {
70	enc_loop_avx2_inner(s, o);
71	enc_loop_avx2_inner(s, o);
72	enc_loop_avx2_inner(s, o);
73	enc_loop_avx2_inner(s, o);
74	rounds -= `4`;
75	continue;
76	}
77	if (rounds >= `2`) {
78	enc_loop_avx2_inner(s, o);
79	enc_loop_avx2_inner(s, o);
80	rounds -= `2`;
81	continue;
82	}
83	enc_loop_avx2_inner(s, o);
84	break;
85	}
86
87	// Add the offset back:
88	*s += `4`;
89	}
90

Browse the source code of ClickHouse/contrib/base64/lib/arch/avx2/enc_loop.c