core/stdarch/crates/core_arch/src/x86/
avx512ifma.rs

1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply packed unsigned 52-bit integers in each 64-bit element of
8/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
9/// unsigned integer from the intermediate result with the
10/// corresponding unsigned 64-bit integer in `a`, and store the
11/// results in `dst`.
12///
13/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64)
14#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19    unsafe { vpmadd52huq_512(a, b, c) }
20}
21
22/// Multiply packed unsigned 52-bit integers in each 64-bit element of
23/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
24/// unsigned integer from the intermediate result with the
25/// corresponding unsigned 64-bit integer in `a`, and store the
26/// results in `dst` using writemask `k` (elements are copied
27/// from `k` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64)
30#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
36}
37
38/// Multiply packed unsigned 52-bit integers in each 64-bit element of
39/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
40/// unsigned integer from the intermediate result with the
41/// corresponding unsigned 64-bit integer in `a`, and store the
42/// results in `dst` using writemask `k` (elements are zeroed
43/// out when the corresponding mask bit is not set).
44///
45/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64)
46#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
52}
53
54/// Multiply packed unsigned 52-bit integers in each 64-bit element of
55/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
56/// unsigned integer from the intermediate result with the
57/// corresponding unsigned 64-bit integer in `a`, and store the
58/// results in `dst`.
59///
60/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64)
61#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66    unsafe { vpmadd52luq_512(a, b, c) }
67}
68
69/// Multiply packed unsigned 52-bit integers in each 64-bit element of
70/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
71/// unsigned integer from the intermediate result with the
72/// corresponding unsigned 64-bit integer in `a`, and store the
73/// results in `dst` using writemask `k` (elements are copied
74/// from `k` when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64)
77#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
83}
84
85/// Multiply packed unsigned 52-bit integers in each 64-bit element of
86/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
87/// unsigned integer from the intermediate result with the
88/// corresponding unsigned 64-bit integer in `a`, and store the
89/// results in `dst` using writemask `k` (elements are zeroed
90/// out when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64)
93#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
99}
100
101/// Multiply packed unsigned 52-bit integers in each 64-bit element of
102/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
103/// unsigned integer from the intermediate result with the
104/// corresponding unsigned 64-bit integer in `a`, and store the
105/// results in `dst`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
108#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(
112    all(test, any(target_os = "linux", target_env = "msvc")),
113    assert_instr(vpmadd52huq)
114)]
115pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
116    unsafe { vpmadd52huq_256(a, b, c) }
117}
118
119/// Multiply packed unsigned 52-bit integers in each 64-bit element of
120/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
121/// unsigned integer from the intermediate result with the
122/// corresponding unsigned 64-bit integer in `a`, and store the
123/// results in `dst`.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64)
126#[inline]
127#[target_feature(enable = "avx512ifma,avx512vl")]
128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
129#[cfg_attr(test, assert_instr(vpmadd52huq))]
130pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
131    unsafe { vpmadd52huq_256(a, b, c) }
132}
133
134/// Multiply packed unsigned 52-bit integers in each 64-bit element of
135/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
136/// unsigned integer from the intermediate result with the
137/// corresponding unsigned 64-bit integer in `a`, and store the
138/// results in `dst` using writemask `k` (elements are copied
139/// from `k` when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64)
142#[inline]
143#[target_feature(enable = "avx512ifma,avx512vl")]
144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
145#[cfg_attr(test, assert_instr(vpmadd52huq))]
146pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
147    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
148}
149
150/// Multiply packed unsigned 52-bit integers in each 64-bit element of
151/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
152/// unsigned integer from the intermediate result with the
153/// corresponding unsigned 64-bit integer in `a`, and store the
154/// results in `dst` using writemask `k` (elements are zeroed
155/// out when the corresponding mask bit is not set).
156///
157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64)
158#[inline]
159#[target_feature(enable = "avx512ifma,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpmadd52huq))]
162pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
163    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
164}
165
166/// Multiply packed unsigned 52-bit integers in each 64-bit element of
167/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
168/// unsigned integer from the intermediate result with the
169/// corresponding unsigned 64-bit integer in `a`, and store the
170/// results in `dst`.
171///
172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
173#[inline]
174#[target_feature(enable = "avxifma")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176#[cfg_attr(
177    all(test, any(target_os = "linux", target_env = "msvc")),
178    assert_instr(vpmadd52luq)
179)]
180pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
181    unsafe { vpmadd52luq_256(a, b, c) }
182}
183
184/// Multiply packed unsigned 52-bit integers in each 64-bit element of
185/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
186/// unsigned integer from the intermediate result with the
187/// corresponding unsigned 64-bit integer in `a`, and store the
188/// results in `dst`.
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64)
191#[inline]
192#[target_feature(enable = "avx512ifma,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpmadd52luq))]
195pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
196    unsafe { vpmadd52luq_256(a, b, c) }
197}
198
199/// Multiply packed unsigned 52-bit integers in each 64-bit element of
200/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
201/// unsigned integer from the intermediate result with the
202/// corresponding unsigned 64-bit integer in `a`, and store the
203/// results in `dst` using writemask `k` (elements are copied
204/// from `k` when the corresponding mask bit is not set).
205///
206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64)
207#[inline]
208#[target_feature(enable = "avx512ifma,avx512vl")]
209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
210#[cfg_attr(test, assert_instr(vpmadd52luq))]
211pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
212    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
213}
214
215/// Multiply packed unsigned 52-bit integers in each 64-bit element of
216/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
217/// unsigned integer from the intermediate result with the
218/// corresponding unsigned 64-bit integer in `a`, and store the
219/// results in `dst` using writemask `k` (elements are zeroed
220/// out when the corresponding mask bit is not set).
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64)
223#[inline]
224#[target_feature(enable = "avx512ifma,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpmadd52luq))]
227pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
228    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
229}
230
231/// Multiply packed unsigned 52-bit integers in each 64-bit element of
232/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
233/// unsigned integer from the intermediate result with the
234/// corresponding unsigned 64-bit integer in `a`, and store the
235/// results in `dst`.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
238#[inline]
239#[target_feature(enable = "avxifma")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(
242    all(test, any(target_os = "linux", target_env = "msvc")),
243    assert_instr(vpmadd52huq)
244)]
245pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
246    unsafe { vpmadd52huq_128(a, b, c) }
247}
248
249/// Multiply packed unsigned 52-bit integers in each 64-bit element of
250/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
251/// unsigned integer from the intermediate result with the
252/// corresponding unsigned 64-bit integer in `a`, and store the
253/// results in `dst`.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64)
256#[inline]
257#[target_feature(enable = "avx512ifma,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpmadd52huq))]
260pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
261    unsafe { vpmadd52huq_128(a, b, c) }
262}
263
264/// Multiply packed unsigned 52-bit integers in each 64-bit element of
265/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
266/// unsigned integer from the intermediate result with the
267/// corresponding unsigned 64-bit integer in `a`, and store the
268/// results in `dst` using writemask `k` (elements are copied
269/// from `k` when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64)
272#[inline]
273#[target_feature(enable = "avx512ifma,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpmadd52huq))]
276pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
277    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
278}
279
280/// Multiply packed unsigned 52-bit integers in each 64-bit element of
281/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
282/// unsigned integer from the intermediate result with the
283/// corresponding unsigned 64-bit integer in `a`, and store the
284/// results in `dst` using writemask `k` (elements are zeroed
285/// out when the corresponding mask bit is not set).
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64)
288#[inline]
289#[target_feature(enable = "avx512ifma,avx512vl")]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291#[cfg_attr(test, assert_instr(vpmadd52huq))]
292pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
293    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
294}
295
296/// Multiply packed unsigned 52-bit integers in each 64-bit element of
297/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
298/// unsigned integer from the intermediate result with the
299/// corresponding unsigned 64-bit integer in `a`, and store the
300/// results in `dst`.
301///
302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
303#[inline]
304#[target_feature(enable = "avxifma")]
305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
306#[cfg_attr(
307    all(test, any(target_os = "linux", target_env = "msvc")),
308    assert_instr(vpmadd52luq)
309)]
310pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
311    unsafe { vpmadd52luq_128(a, b, c) }
312}
313
314/// Multiply packed unsigned 52-bit integers in each 64-bit element of
315/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
316/// unsigned integer from the intermediate result with the
317/// corresponding unsigned 64-bit integer in `a`, and store the
318/// results in `dst`.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64)
321#[inline]
322#[target_feature(enable = "avx512ifma,avx512vl")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr(vpmadd52luq))]
325pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
326    unsafe { vpmadd52luq_128(a, b, c) }
327}
328
329/// Multiply packed unsigned 52-bit integers in each 64-bit element of
330/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
331/// unsigned integer from the intermediate result with the
332/// corresponding unsigned 64-bit integer in `a`, and store the
333/// results in `dst` using writemask `k` (elements are copied
334/// from `k` when the corresponding mask bit is not set).
335///
336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64)
337#[inline]
338#[target_feature(enable = "avx512ifma,avx512vl")]
339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
340#[cfg_attr(test, assert_instr(vpmadd52luq))]
341pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
342    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
343}
344
345/// Multiply packed unsigned 52-bit integers in each 64-bit element of
346/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
347/// unsigned integer from the intermediate result with the
348/// corresponding unsigned 64-bit integer in `a`, and store the
349/// results in `dst` using writemask `k` (elements are zeroed
350/// out when the corresponding mask bit is not set).
351///
352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64)
353#[inline]
354#[target_feature(enable = "avx512ifma,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpmadd52luq))]
357pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
358    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
359}
360
361#[allow(improper_ctypes)]
362unsafe extern "C" {
363    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
364    fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
365    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
366    fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
367    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
368    fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
369    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
370    fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
371    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
372    fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
373    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
374    fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
375}
376
377#[cfg(test)]
378mod tests {
379
380    use stdarch_test::simd_test;
381
382    use crate::core_arch::x86::*;
383
384    const K: __mmask8 = 0b01101101;
385
386    #[simd_test(enable = "avx512ifma")]
387    unsafe fn test_mm512_madd52hi_epu64() {
388        let a = _mm512_set1_epi64(10 << 40);
389        let b = _mm512_set1_epi64((11 << 40) + 4);
390        let c = _mm512_set1_epi64((12 << 40) + 3);
391
392        let actual = _mm512_madd52hi_epu64(a, b, c);
393
394        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
395        let expected = _mm512_set1_epi64(11030549757952);
396
397        assert_eq_m512i(expected, actual);
398    }
399
400    #[simd_test(enable = "avx512ifma")]
401    unsafe fn test_mm512_mask_madd52hi_epu64() {
402        let a = _mm512_set1_epi64(10 << 40);
403        let b = _mm512_set1_epi64((11 << 40) + 4);
404        let c = _mm512_set1_epi64((12 << 40) + 3);
405
406        let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
407
408        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
409        let mut expected = _mm512_set1_epi64(11030549757952);
410        expected = _mm512_mask_blend_epi64(K, a, expected);
411
412        assert_eq_m512i(expected, actual);
413    }
414
415    #[simd_test(enable = "avx512ifma")]
416    unsafe fn test_mm512_maskz_madd52hi_epu64() {
417        let a = _mm512_set1_epi64(10 << 40);
418        let b = _mm512_set1_epi64((11 << 40) + 4);
419        let c = _mm512_set1_epi64((12 << 40) + 3);
420
421        let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
422
423        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
424        let mut expected = _mm512_set1_epi64(11030549757952);
425        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
426
427        assert_eq_m512i(expected, actual);
428    }
429
430    #[simd_test(enable = "avx512ifma")]
431    unsafe fn test_mm512_madd52lo_epu64() {
432        let a = _mm512_set1_epi64(10 << 40);
433        let b = _mm512_set1_epi64((11 << 40) + 4);
434        let c = _mm512_set1_epi64((12 << 40) + 3);
435
436        let actual = _mm512_madd52lo_epu64(a, b, c);
437
438        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
439        let expected = _mm512_set1_epi64(100055558127628);
440
441        assert_eq_m512i(expected, actual);
442    }
443
444    #[simd_test(enable = "avx512ifma")]
445    unsafe fn test_mm512_mask_madd52lo_epu64() {
446        let a = _mm512_set1_epi64(10 << 40);
447        let b = _mm512_set1_epi64((11 << 40) + 4);
448        let c = _mm512_set1_epi64((12 << 40) + 3);
449
450        let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
451
452        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
453        let mut expected = _mm512_set1_epi64(100055558127628);
454        expected = _mm512_mask_blend_epi64(K, a, expected);
455
456        assert_eq_m512i(expected, actual);
457    }
458
459    #[simd_test(enable = "avx512ifma")]
460    unsafe fn test_mm512_maskz_madd52lo_epu64() {
461        let a = _mm512_set1_epi64(10 << 40);
462        let b = _mm512_set1_epi64((11 << 40) + 4);
463        let c = _mm512_set1_epi64((12 << 40) + 3);
464
465        let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
466
467        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
468        let mut expected = _mm512_set1_epi64(100055558127628);
469        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
470
471        assert_eq_m512i(expected, actual);
472    }
473
474    #[simd_test(enable = "avxifma")]
475    unsafe fn test_mm256_madd52hi_avx_epu64() {
476        let a = _mm256_set1_epi64x(10 << 40);
477        let b = _mm256_set1_epi64x((11 << 40) + 4);
478        let c = _mm256_set1_epi64x((12 << 40) + 3);
479
480        let actual = _mm256_madd52hi_avx_epu64(a, b, c);
481
482        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
483        let expected = _mm256_set1_epi64x(11030549757952);
484
485        assert_eq_m256i(expected, actual);
486    }
487
488    #[simd_test(enable = "avx512ifma,avx512vl")]
489    unsafe fn test_mm256_madd52hi_epu64() {
490        let a = _mm256_set1_epi64x(10 << 40);
491        let b = _mm256_set1_epi64x((11 << 40) + 4);
492        let c = _mm256_set1_epi64x((12 << 40) + 3);
493
494        let actual = _mm256_madd52hi_epu64(a, b, c);
495
496        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
497        let expected = _mm256_set1_epi64x(11030549757952);
498
499        assert_eq_m256i(expected, actual);
500    }
501
502    #[simd_test(enable = "avx512ifma,avx512vl")]
503    unsafe fn test_mm256_mask_madd52hi_epu64() {
504        let a = _mm256_set1_epi64x(10 << 40);
505        let b = _mm256_set1_epi64x((11 << 40) + 4);
506        let c = _mm256_set1_epi64x((12 << 40) + 3);
507
508        let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
509
510        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
511        let mut expected = _mm256_set1_epi64x(11030549757952);
512        expected = _mm256_mask_blend_epi64(K, a, expected);
513
514        assert_eq_m256i(expected, actual);
515    }
516
517    #[simd_test(enable = "avx512ifma,avx512vl")]
518    unsafe fn test_mm256_maskz_madd52hi_epu64() {
519        let a = _mm256_set1_epi64x(10 << 40);
520        let b = _mm256_set1_epi64x((11 << 40) + 4);
521        let c = _mm256_set1_epi64x((12 << 40) + 3);
522
523        let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
524
525        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
526        let mut expected = _mm256_set1_epi64x(11030549757952);
527        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
528
529        assert_eq_m256i(expected, actual);
530    }
531
532    #[simd_test(enable = "avxifma")]
533    unsafe fn test_mm256_madd52lo_avx_epu64() {
534        let a = _mm256_set1_epi64x(10 << 40);
535        let b = _mm256_set1_epi64x((11 << 40) + 4);
536        let c = _mm256_set1_epi64x((12 << 40) + 3);
537
538        let actual = _mm256_madd52lo_avx_epu64(a, b, c);
539
540        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
541        let expected = _mm256_set1_epi64x(100055558127628);
542
543        assert_eq_m256i(expected, actual);
544    }
545
546    #[simd_test(enable = "avx512ifma,avx512vl")]
547    unsafe fn test_mm256_madd52lo_epu64() {
548        let a = _mm256_set1_epi64x(10 << 40);
549        let b = _mm256_set1_epi64x((11 << 40) + 4);
550        let c = _mm256_set1_epi64x((12 << 40) + 3);
551
552        let actual = _mm256_madd52lo_epu64(a, b, c);
553
554        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
555        let expected = _mm256_set1_epi64x(100055558127628);
556
557        assert_eq_m256i(expected, actual);
558    }
559
560    #[simd_test(enable = "avx512ifma,avx512vl")]
561    unsafe fn test_mm256_mask_madd52lo_epu64() {
562        let a = _mm256_set1_epi64x(10 << 40);
563        let b = _mm256_set1_epi64x((11 << 40) + 4);
564        let c = _mm256_set1_epi64x((12 << 40) + 3);
565
566        let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
567
568        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
569        let mut expected = _mm256_set1_epi64x(100055558127628);
570        expected = _mm256_mask_blend_epi64(K, a, expected);
571
572        assert_eq_m256i(expected, actual);
573    }
574
575    #[simd_test(enable = "avx512ifma,avx512vl")]
576    unsafe fn test_mm256_maskz_madd52lo_epu64() {
577        let a = _mm256_set1_epi64x(10 << 40);
578        let b = _mm256_set1_epi64x((11 << 40) + 4);
579        let c = _mm256_set1_epi64x((12 << 40) + 3);
580
581        let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
582
583        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
584        let mut expected = _mm256_set1_epi64x(100055558127628);
585        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
586
587        assert_eq_m256i(expected, actual);
588    }
589
590    #[simd_test(enable = "avxifma")]
591    unsafe fn test_mm_madd52hi_avx_epu64() {
592        let a = _mm_set1_epi64x(10 << 40);
593        let b = _mm_set1_epi64x((11 << 40) + 4);
594        let c = _mm_set1_epi64x((12 << 40) + 3);
595
596        let actual = _mm_madd52hi_avx_epu64(a, b, c);
597
598        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
599        let expected = _mm_set1_epi64x(11030549757952);
600
601        assert_eq_m128i(expected, actual);
602    }
603
604    #[simd_test(enable = "avx512ifma,avx512vl")]
605    unsafe fn test_mm_madd52hi_epu64() {
606        let a = _mm_set1_epi64x(10 << 40);
607        let b = _mm_set1_epi64x((11 << 40) + 4);
608        let c = _mm_set1_epi64x((12 << 40) + 3);
609
610        let actual = _mm_madd52hi_epu64(a, b, c);
611
612        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
613        let expected = _mm_set1_epi64x(11030549757952);
614
615        assert_eq_m128i(expected, actual);
616    }
617
618    #[simd_test(enable = "avx512ifma,avx512vl")]
619    unsafe fn test_mm_mask_madd52hi_epu64() {
620        let a = _mm_set1_epi64x(10 << 40);
621        let b = _mm_set1_epi64x((11 << 40) + 4);
622        let c = _mm_set1_epi64x((12 << 40) + 3);
623
624        let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
625
626        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
627        let mut expected = _mm_set1_epi64x(11030549757952);
628        expected = _mm_mask_blend_epi64(K, a, expected);
629
630        assert_eq_m128i(expected, actual);
631    }
632
633    #[simd_test(enable = "avx512ifma,avx512vl")]
634    unsafe fn test_mm_maskz_madd52hi_epu64() {
635        let a = _mm_set1_epi64x(10 << 40);
636        let b = _mm_set1_epi64x((11 << 40) + 4);
637        let c = _mm_set1_epi64x((12 << 40) + 3);
638
639        let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
640
641        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
642        let mut expected = _mm_set1_epi64x(11030549757952);
643        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
644
645        assert_eq_m128i(expected, actual);
646    }
647
648    #[simd_test(enable = "avxifma")]
649    unsafe fn test_mm_madd52lo_avx_epu64() {
650        let a = _mm_set1_epi64x(10 << 40);
651        let b = _mm_set1_epi64x((11 << 40) + 4);
652        let c = _mm_set1_epi64x((12 << 40) + 3);
653
654        let actual = _mm_madd52lo_avx_epu64(a, b, c);
655
656        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
657        let expected = _mm_set1_epi64x(100055558127628);
658
659        assert_eq_m128i(expected, actual);
660    }
661
662    #[simd_test(enable = "avx512ifma,avx512vl")]
663    unsafe fn test_mm_madd52lo_epu64() {
664        let a = _mm_set1_epi64x(10 << 40);
665        let b = _mm_set1_epi64x((11 << 40) + 4);
666        let c = _mm_set1_epi64x((12 << 40) + 3);
667
668        let actual = _mm_madd52lo_epu64(a, b, c);
669
670        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
671        let expected = _mm_set1_epi64x(100055558127628);
672
673        assert_eq_m128i(expected, actual);
674    }
675
676    #[simd_test(enable = "avx512ifma,avx512vl")]
677    unsafe fn test_mm_mask_madd52lo_epu64() {
678        let a = _mm_set1_epi64x(10 << 40);
679        let b = _mm_set1_epi64x((11 << 40) + 4);
680        let c = _mm_set1_epi64x((12 << 40) + 3);
681
682        let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
683
684        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
685        let mut expected = _mm_set1_epi64x(100055558127628);
686        expected = _mm_mask_blend_epi64(K, a, expected);
687
688        assert_eq_m128i(expected, actual);
689    }
690
691    #[simd_test(enable = "avx512ifma,avx512vl")]
692    unsafe fn test_mm_maskz_madd52lo_epu64() {
693        let a = _mm_set1_epi64x(10 << 40);
694        let b = _mm_set1_epi64x((11 << 40) + 4);
695        let c = _mm_set1_epi64x((12 << 40) + 3);
696
697        let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
698
699        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
700        let mut expected = _mm_set1_epi64x(100055558127628);
701        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
702
703        assert_eq_m128i(expected, actual);
704    }
705}