xsimd_rem_pio2.hpp source code [Velox/build/_deps/xsimd-src/include/xsimd/math/xsimd_rem_pio2.hpp]

1	/***************************************************************************
2	* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3	* Martin Renou *
4	* Copyright (c) QuantStack *
5	* Copyright (c) Serge Guelton *
6	* *
7	* Distributed under the terms of the BSD 3-Clause License. *
8	* *
9	* The full license is in the file LICENSE, distributed with this software. *
10	****************************************************************************/
11
12	#include <cmath>
13	#include <cstdint>
14	#include <cstring>
15
16	namespace xsimd
17	{
18	namespace detail
19	{
20
21	/ origin: boost/simd/arch/common/scalar/function/rem_pio2.hpp /
22	/*
23	* ====================================================
24	* copyright 2016 NumScale SAS
25	*
26	* Distributed under the Boost Software License, Version 1.0.
27	* (See copy at http://boost.org/LICENSE_1_0.txt)
28	* ====================================================
29	*/
30	#if defined(_MSC_VER)
31	#define ONCE0 \
32	__pragma(warning(push)) \
33	__pragma(warning(disable : 4127)) while (0) \
34	__pragma(warning(pop)) /**/
35	#else
36	#define ONCE0 while (0)
37	#endif
38
39	/*
40	* ====================================================
41	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
42	*
43	* Developed at SunPro, a Sun Microsystems, Inc. business.
44	* Permission to use, copy, modify, and distribute this
45	* software is freely granted, provided that this notice
46	* is preserved.
47	* ====================================================
48	*/
49
50	#if defined(__GNUC__) && defined(__BYTE_ORDER__)
51	#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
52	#define XSIMD_LITTLE_ENDIAN
53	#endif
54	#elif defined(_WIN32)
55	// We can safely assume that Windows is always little endian
56	#define XSIMD_LITTLE_ENDIAN
57	#elif defined(i386) \|\| defined(i486) \|\| defined(intel) \|\| defined(x86) \|\| defined(i86pc) \|\| defined(__alpha) \|\| defined(__osf__)
58	#define XSIMD_LITTLE_ENDIAN
59	#endif
60
61	#ifdef XSIMD_LITTLE_ENDIAN
62	#define LOW_WORD_IDX 0
63	#define HIGH_WORD_IDX sizeof(std::uint32_t)
64	#else
65	#define LOW_WORD_IDX sizeof(std::uint32_t)
66	#define HIGH_WORD_IDX 0
67	#endif
68
69	#define GET_HIGH_WORD(i, d) \
70	do \
71	{ \
72	double f = (d); \
73	std::memcpy(&(i), reinterpret_cast<char*>(&f) + HIGH_WORD_IDX, \
74	sizeof(std::uint32_t)); \
75	} \
76	ONCE0 \
77	/**/
78
79	#define GET_LOW_WORD(i, d) \
80	do \
81	{ \
82	double f = (d); \
83	std::memcpy(&(i), reinterpret_cast<char*>(&f) + LOW_WORD_IDX, \
84	sizeof(std::uint32_t)); \
85	} \
86	ONCE0 \
87	/**/
88
89	#define SET_HIGH_WORD(d, v) \
90	do \
91	{ \
92	double f = (d); \
93	std::uint32_t value = (v); \
94	std::memcpy(reinterpret_cast<char*>(&f) + HIGH_WORD_IDX, \
95	&value, sizeof(std::uint32_t)); \
96	(d) = f; \
97	} \
98	ONCE0 \
99	/**/
100
101	#define SET_LOW_WORD(d, v) \
102	do \
103	{ \
104	double f = (d); \
105	std::uint32_t value = (v); \
106	std::memcpy(reinterpret_cast<char*>(&f) + LOW_WORD_IDX, \
107	&value, sizeof(std::uint32_t)); \
108	(d) = f; \
109	} \
110	ONCE0 \
111	/**/
112
113	/*
114	* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
115	* double x[],y[]; int e0,nx,prec; int ipio2[];
116	*
117	* __kernel_rem_pio2 return the last three digits of N with
118	* y = x - N*pi/2
119	* so that \|y\| < pi/2.
120	*
121	* The method is to compute the integer (mod 8) and fraction parts of
122	* (2/pi)*x without doing the full multiplication. In general we
123	* skip the part of the product that are known to be a huge integer (
124	* more accurately, = 0 mod 8 ). Thus the number of operations are
125	* independent of the exponent of the input.
126	*
127	* (2/pi) is represented by an array of 24-bit integers in ipio2[].
128	*
129	* Input parameters:
130	* x[] The input value (must be positive) is broken into nx
131	* pieces of 24-bit integers in double precision format.
132	* x[i] will be the i-th 24 bit of x. The scaled exponent
133	* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
134	* match x's up to 24 bits.
135	*
136	* Example of breaking a double positive z into x[0]+x[1]+x[2]:
137	* e0 = ilogb(z)-23
138	* z = scalbn(z,-e0)
139	* for i = 0,1,2
140	* x[i] = floor(z)
141	* z = (z-x[i])2*24
142	*
143	*
144	* y[] ouput result in an array of double precision numbers.
145	* The dimension of y[] is:
146	* 24-bit precision 1
147	* 53-bit precision 2
148	* 64-bit precision 2
149	* 113-bit precision 3
150	* The actual value is the sum of them. Thus for 113-bit
151	* precison, one may have to do something like:
152	*
153	* long double t,w,r_head, r_tail;
154	* t = (long double)y[2] + (long double)y[1];
155	* w = (long double)y[0];
156	* r_head = t+w;
157	* r_tail = w - (r_head - t);
158	*
159	* e0 The exponent of x[0]
160	*
161	* nx dimension of x[]
162	*
163	* prec an integer indicating the precision:
164	* 0 24 bits (single)
165	* 1 53 bits (double)
166	* 2 64 bits (extended)
167	* 3 113 bits (quad)
168	*
169	* ipio2[]
170	* integer array, contains the (24i)-th to (24i+23)-th
171	* bit of 2/pi after binary point. The corresponding
172	* floating value is
173	*
174	* ipio2[i] * 2^(-24(i+1)).
175	*
176	* External function:
177	* double scalbn(), floor();
178	*
179	*
180	* Here is the description of some local variables:
181	*
182	* jk jk+1 is the initial number of terms of ipio2[] needed
183	* in the computation. The recommended value is 2,3,4,
184	* 6 for single, double, extended,and quad.
185	*
186	* jz local integer variable indicating the number of
187	* terms of ipio2[] used.
188	*
189	* jx nx - 1
190	*
191	* jv index for pointing to the suitable ipio2[] for the
192	* computation. In general, we want
193	* ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8
194	* is an integer. Thus
195	* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
196	* Hence jv = max(0,(e0-3)/24).
197	*
198	* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
199	*
200	* q[] double array with integral value, representing the
201	* 24-bits chunk of the product of x and 2/pi.
202	*
203	* q0 the corresponding exponent of q[0]. Note that the
204	* exponent for q[i] would be q0-24*i.
205	*
206	* PIo2[] double precision array, obtained by cutting pi/2
207	* into 24 bits chunks.
208	*
209	* f[] ipio2[] in floating point
210	*
211	* iq[] integer array by breaking up q[] in 24-bits chunk.
212	*
213	* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
214	*
215	* ih integer. If >0 it indicates q[] is >= 0.5, hence
216	* it also indicates the sign of the result.
217	*
218	*/
219
220	inline int32_t __kernel_rem_pio2(double* x, double* y, int32_t e0, int32_t nx, int32_t prec, const int32_t* ipio2) noexcept
221	{
222	static const int32_t init_jk[] = { `2`, `3`, `4`, `6` }; / initial value for jk /
223
224	static const double PIo2[] = {
225	`1.57079625129699707031e+00`, / 0x3FF921FB, 0x40000000 /
226	`7.54978941586159635335e-08`, / 0x3E74442D, 0x00000000 /
227	`5.39030252995776476554e-15`, / 0x3CF84698, 0x80000000 /
228	`3.28200341580791294123e-22`, / 0x3B78CC51, 0x60000000 /
229	`1.27065575308067607349e-29`, / 0x39F01B83, 0x80000000 /
230	`1.22933308981111328932e-36`, / 0x387A2520, 0x40000000 /
231	`2.73370053816464559624e-44`, / 0x36E38222, 0x80000000 /
232	`2.16741683877804819444e-51`, / 0x3569F31D, 0x00000000 /
233	};
234
235	static const double
236	zero
237	= `0.0`,
238	one = `1.0`,
239	two24 = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
240	twon24 = `5.96046447753906250000e-08`; / 0x3E700000, 0x00000000 /
241
242	int32_t jz, jx, jv, jp, jk, carry, n, iq[`20`], i, j, k, m, q0, ih;
243	double z, fw, f[`20`], fq[`20`], q[`20`];
244
245	/ initialize jk/
246	jk = init_jk[prec];
247	jp = jk;
248
249	/ determine jx,jv,q0, note that 3>q0 /
250	jx = nx - `1`;
251	jv = (e0 - `3`) / `24`;
252	if (jv < `0`)
253	jv = `0`;
254	q0 = e0 - `24` * (jv + `1`);
255
256	/ set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] /
257	j = jv - jx;
258	m = jx + jk;
259	for (i = `0`; i <= m; i++, j++)
260	f[i] = (j < `0`) ? zero : (double)ipio2[j];
261
262	/ compute q[0],q[1],...q[jk] /
263	for (i = `0`; i <= jk; i++)
264	{
265	for (j = `0`, fw = `0.0`; j <= jx; j++)
266	fw += x[j] * f[jx + i - j];
267	q[i] = fw;
268	}
269
270	jz = jk;
271
272	recompute:
273	/ distill q[] into iq[] reversingly /
274	for (i = `0`, j = jz, z = q[jz]; j > `0`; i++, j--)
275	{
276	fw = (double)((int32_t)(twon24 * z));
277	iq[i] = (int)(z - two24 * fw);
278	z = q[j - `1`] + fw;
279	}
280
281	/ compute n /
282	z = std::scalbn(x: z, n: q0); / actual value of z /
283	z -= `8.0` * std::floor(x: z * `0.125`); / trim off integer >= 8 /
284	n = (int32_t)z;
285	z -= (double)n;
286	ih = `0`;
287	if (q0 > `0`)
288	{ / need iq[jz-1] to determine n /
289	i = (iq[jz - `1`] >> (`24` - q0));
290	n += i;
291	iq[jz - `1`] -= i << (`24` - q0);
292	ih = iq[jz - `1`] >> (`23` - q0);
293	}
294	else if (q0 == `0`)
295	ih = iq[jz - `1`] >> `23`;
296	else if (z >= `0.5`)
297	ih = `2`;
298
299	if (ih > `0`)
300	{ / q > 0.5 /
301	n += `1`;
302	carry = `0`;
303	for (i = `0`; i < jz; i++)
304	{ / compute 1-q /
305	j = iq[i];
306	if (carry == `0`)
307	{
308	if (j != `0`)
309	{
310	carry = `1`;
311	iq[i] = `0x1000000` - j;
312	}
313	}
314	else
315	iq[i] = `0xffffff` - j;
316	}
317	if (q0 > `0`)
318	{ / rare case: chance is 1 in 12 /
319	switch (q0)
320	{
321	case `1`:
322	iq[jz - `1`] &= `0x7fffff`;
323	break;
324	case `2`:
325	iq[jz - `1`] &= `0x3fffff`;
326	break;
327	}
328	}
329	if (ih == `2`)
330	{
331	z = one - z;
332	if (carry != `0`)
333	z -= std::scalbn(x: one, n: q0);
334	}
335	}
336
337	/ check if recomputation is needed /
338	if (z == zero)
339	{
340	j = `0`;
341	for (i = jz - `1`; i >= jk; i--)
342	j \|= iq[i];
343	if (j == `0`)
344	{ / need recomputation /
345	for (k = `1`; iq[jk - k] == `0`; k++)
346	; / k = no. of terms needed /
347
348	for (i = jz + `1`; i <= jz + k; i++)
349	{ / add q[jz+1] to q[jz+k] /
350	f[jx + i] = (double)ipio2[jv + i];
351	for (j = `0`, fw = `0.0`; j <= jx; j++)
352	fw += x[j] * f[jx + i - j];
353	q[i] = fw;
354	}
355	jz += k;
356	goto recompute;
357	}
358	}
359
360	/ chop off zero terms /
361	if (z == `0.0`)
362	{
363	jz -= `1`;
364	q0 -= `24`;
365	while (iq[jz] == `0`)
366	{
367	jz--;
368	q0 -= `24`;
369	}
370	}
371	else
372	{ / break z into 24-bit if necessary /
373	z = std::scalbn(x: z, n: -q0);
374	if (z >= two24)
375	{
376	fw = (double)((int32_t)(twon24 * z));
377	iq[jz] = (int32_t)(z - two24 * fw);
378	jz += `1`;
379	q0 += `24`;
380	iq[jz] = (int32_t)fw;
381	}
382	else
383	iq[jz] = (int32_t)z;
384	}
385
386	/ convert integer "bit" chunk to floating-point value /
387	fw = scalbn(x: one, n: q0);
388	for (i = jz; i >= `0`; i--)
389	{
390	q[i] = fw * (double)iq[i];
391	fw *= twon24;
392	}
393
394	/ compute PIo2[0,...,jp]q[jz,...,0] /*
395	for (i = jz; i >= `0`; i--)
396	{
397	for (fw = `0.0`, k = `0`; k <= jp && k <= jz - i; k++)
398	fw += PIo2[k] * q[i + k];
399	fq[jz - i] = fw;
400	}
401
402	/ compress fq[] into y[] /
403	switch (prec)
404	{
405	case `0`:
406	fw = `0.0`;
407	for (i = jz; i >= `0`; i--)
408	fw += fq[i];
409	y[`0`] = (ih == `0`) ? fw : -fw;
410	break;
411	case `1`:
412	case `2`:
413	fw = `0.0`;
414	for (i = jz; i >= `0`; i--)
415	fw += fq[i];
416	y[`0`] = (ih == `0`) ? fw : -fw;
417	fw = fq[`0`] - fw;
418	for (i = `1`; i <= jz; i++)
419	fw += fq[i];
420	y[`1`] = (ih == `0`) ? fw : -fw;
421	break;
422	case `3`: / painful /
423	for (i = jz; i > `0`; i--)
424	{
425	fw = fq[i - `1`] + fq[i];
426	fq[i] += fq[i - `1`] - fw;
427	fq[i - `1`] = fw;
428	}
429	for (i = jz; i > `1`; i--)
430	{
431	fw = fq[i - `1`] + fq[i];
432	fq[i] += fq[i - `1`] - fw;
433	fq[i - `1`] = fw;
434	}
435	for (fw = `0.0`, i = jz; i >= `2`; i--)
436	fw += fq[i];
437	if (ih == `0`)
438	{
439	y[`0`] = fq[`0`];
440	y[`1`] = fq[`1`];
441	y[`2`] = fw;
442	}
443	else
444	{
445	y[`0`] = -fq[`0`];
446	y[`1`] = -fq[`1`];
447	y[`2`] = -fw;
448	}
449	}
450	return n & `7`;
451	}
452
453	inline std::int32_t __ieee754_rem_pio2(double x, double* y) noexcept
454	{
455	static const std::int32_t two_over_pi[] = {
456	`0xA2F983`,
457	`0x6E4E44`,
458	`0x1529FC`,
459	`0x2757D1`,
460	`0xF534DD`,
461	`0xC0DB62`,
462	`0x95993C`,
463	`0x439041`,
464	`0xFE5163`,
465	`0xABDEBB`,
466	`0xC561B7`,
467	`0x246E3A`,
468	`0x424DD2`,
469	`0xE00649`,
470	`0x2EEA09`,
471	`0xD1921C`,
472	`0xFE1DEB`,
473	`0x1CB129`,
474	`0xA73EE8`,
475	`0x8235F5`,
476	`0x2EBB44`,
477	`0x84E99C`,
478	`0x7026B4`,
479	`0x5F7E41`,
480	`0x3991D6`,
481	`0x398353`,
482	`0x39F49C`,
483	`0x845F8B`,
484	`0xBDF928`,
485	`0x3B1FF8`,
486	`0x97FFDE`,
487	`0x05980F`,
488	`0xEF2F11`,
489	`0x8B5A0A`,
490	`0x6D1F6D`,
491	`0x367ECF`,
492	`0x27CB09`,
493	`0xB74F46`,
494	`0x3F669E`,
495	`0x5FEA2D`,
496	`0x7527BA`,
497	`0xC7EBE5`,
498	`0xF17B3D`,
499	`0x0739F7`,
500	`0x8A5292`,
501	`0xEA6BFB`,
502	`0x5FB11F`,
503	`0x8D5D08`,
504	`0x560330`,
505	`0x46FC7B`,
506	`0x6BABF0`,
507	`0xCFBC20`,
508	`0x9AF436`,
509	`0x1DA9E3`,
510	`0x91615E`,
511	`0xE61B08`,
512	`0x659985`,
513	`0x5F14A0`,
514	`0x68408D`,
515	`0xFFD880`,
516	`0x4D7327`,
517	`0x310606`,
518	`0x1556CA`,
519	`0x73A8C9`,
520	`0x60E27B`,
521	`0xC08C6B`,
522	};
523
524	static const std::int32_t npio2_hw[] = {
525	`0x3FF921FB`,
526	`0x400921FB`,
527	`0x4012D97C`,
528	`0x401921FB`,
529	`0x401F6A7A`,
530	`0x4022D97C`,
531	`0x4025FDBB`,
532	`0x402921FB`,
533	`0x402C463A`,
534	`0x402F6A7A`,
535	`0x4031475C`,
536	`0x4032D97C`,
537	`0x40346B9C`,
538	`0x4035FDBB`,
539	`0x40378FDB`,
540	`0x403921FB`,
541	`0x403AB41B`,
542	`0x403C463A`,
543	`0x403DD85A`,
544	`0x403F6A7A`,
545	`0x40407E4C`,
546	`0x4041475C`,
547	`0x4042106C`,
548	`0x4042D97C`,
549	`0x4043A28C`,
550	`0x40446B9C`,
551	`0x404534AC`,
552	`0x4045FDBB`,
553	`0x4046C6CB`,
554	`0x40478FDB`,
555	`0x404858EB`,
556	`0x404921FB`,
557	};
558
559	/*
560	* invpio2: 53 bits of 2/pi
561	* pio2_1: first 33 bit of pi/2
562	* pio2_1t: pi/2 - pio2_1
563	* pio2_2: second 33 bit of pi/2
564	* pio2_2t: pi/2 - (pio2_1+pio2_2)
565	* pio2_3: third 33 bit of pi/2
566	* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
567	*/
568
569	static const double
570	zero
571	= `0.00000000000000000000e+00`, / 0x00000000, 0x00000000 /
572	half = `5.00000000000000000000e-01`, / 0x3FE00000, 0x00000000 /
573	two24 = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
574	invpio2 = `6.36619772367581382433e-01`, / 0x3FE45F30, 0x6DC9C883 /
575	pio2_1 = `1.57079632673412561417e+00`, / 0x3FF921FB, 0x54400000 /
576	pio2_1t = `6.07710050650619224932e-11`, / 0x3DD0B461, 0x1A626331 /
577	pio2_2 = `6.07710050630396597660e-11`, / 0x3DD0B461, 0x1A600000 /
578	pio2_2t = `2.02226624879595063154e-21`, / 0x3BA3198A, 0x2E037073 /
579	pio2_3 = `2.02226624871116645580e-21`, / 0x3BA3198A, 0x2E000000 /
580	pio2_3t = `8.47842766036889956997e-32`; / 0x397B839A, 0x252049C1 /
581
582	double z = `0.`, w, t, r, fn;
583	double tx[`3`];
584	std::int32_t e0, i, j, nx, n, ix, hx;
585	std::uint32_t low;
586
587	GET_HIGH_WORD(hx, x); / high word of x /
588	ix = hx & `0x7fffffff`;
589	if (ix <= `0x3fe921fb`) / \|x\| ~<= pi/4 , no need for reduction /
590	{
591	y[`0`] = x;
592	y[`1`] = `0`;
593	return `0`;
594	}
595	if (ix < `0x4002d97c`)
596	{ / \|x\| < 3pi/4, special case with n=+-1 /
597	if (hx > `0`)
598	{
599	z = x - pio2_1;
600	if (ix != `0x3ff921fb`)
601	{ / 33+53 bit pi is good enough /
602	y[`0`] = z - pio2_1t;
603	y[`1`] = (z - y[`0`]) - pio2_1t;
604	}
605	else
606	{ / near pi/2, use 33+33+53 bit pi /
607	z -= pio2_2;
608	y[`0`] = z - pio2_2t;
609	y[`1`] = (z - y[`0`]) - pio2_2t;
610	}
611	return `1`;
612	}
613	else
614	{ / negative x /
615	z = x + pio2_1;
616	if (ix != `0x3ff921fb`)
617	{ / 33+53 bit pi is good enough /
618	y[`0`] = z + pio2_1t;
619	y[`1`] = (z - y[`0`]) + pio2_1t;
620	}
621	else
622	{ / near pi/2, use 33+33+53 bit pi /
623	z += pio2_2;
624	y[`0`] = z + pio2_2t;
625	y[`1`] = (z - y[`0`]) + pio2_2t;
626	}
627
628	return -`1`;
629	}
630	}
631	if (ix <= `0x413921fb`)
632	{ / \|x\| ~<= 2^19(pi/2), medium_ size /*
633	t = std::fabs(x: x);
634	n = (std::int32_t)(t * invpio2 + half);
635	fn = (double)n;
636	r = t - fn * pio2_1;
637	w = fn * pio2_1t; / 1st round good to 85 bit /
638	if ((n < `32`) && (n > `0`) && (ix != npio2_hw[n - `1`]))
639	{
640	y[`0`] = r - w; / quick check no cancellation /
641	}
642	else
643	{
644	std::uint32_t high;
645	j = ix >> `20`;
646	y[`0`] = r - w;
647	GET_HIGH_WORD(high, y[`0`]);
648	i = j - static_cast<int32_t>((high >> `20`) & `0x7ff`);
649	if (i > `16`)
650	{ / 2nd iteration needed, good to 118 /
651	t = r;
652	w = fn * pio2_2;
653	r = t - w;
654	w = fn * pio2_2t - ((t - r) - w);
655	y[`0`] = r - w;
656	GET_HIGH_WORD(high, y[`0`]);
657	i = j - static_cast<int32_t>((high >> `20`) & `0x7ff`);
658	if (i > `49`)
659	{ / 3rd iteration need, 151 bits acc /
660	t = r; / will cover all possible cases /
661	w = fn * pio2_3;
662	r = t - w;
663	w = fn * pio2_3t - ((t - r) - w);
664	y[`0`] = r - w;
665	}
666	}
667	}
668	y[`1`] = (r - y[`0`]) - w;
669	if (hx < `0`)
670	{
671	y[`0`] = -y[`0`];
672	y[`1`] = -y[`1`];
673	return -n;
674	}
675	else
676	return n;
677	}
678	/*
679	* all other (large) arguments
680	*/
681	if (ix >= `0x7ff00000`)
682	{ / x is inf or NaN /
683	y[`0`] = y[`1`] = x - x;
684	return `0`;
685	}
686	/ set z = scalbn(\|x\|,ilogb(x)-23) /
687	GET_LOW_WORD(low, x);
688	SET_LOW_WORD(z, low);
689	e0 = (ix >> `20`) - `1046`; / e0 = ilogb(z)-23; /
690	SET_HIGH_WORD(z, static_cast<uint32_t>(ix - (e0 << `20`)));
691	for (i = `0`; i < `2`; i++)
692	{
693	tx[i] = (double)((std::int32_t)(z));
694	z = (z - tx[i]) * two24;
695	}
696	tx[`2`] = z;
697	nx = `3`;
698	while (tx[nx - `1`] == zero)
699	nx--; / skip zero term /
700	n = __kernel_rem_pio2(x: tx, y, e0, nx, prec: `2`, ipio2: two_over_pi);
701	if (hx < `0`)
702	{
703	y[`0`] = -y[`0`];
704	y[`1`] = -y[`1`];
705	return -n;
706	}
707	return n;
708	}
709	}
710
711	#undef XSIMD_LITTLE_ENDIAN
712	#undef SET_LOW_WORD
713	#undef SET_HIGH_WORD
714	#undef GET_LOW_WORD
715	#undef GET_HIGH_WORD
716	#undef HIGH_WORD_IDX
717	#undef LOW_WORD_IDX
718	#undef ONCE0
719	}
720

Browse the source code of Velox/build/_deps/xsimd-src/include/xsimd/math/xsimd_rem_pio2.hpp