1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216 unsafe {
217 let a = a.as_i16x8();
218 let b = b.as_i16x8();
219 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220 }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232 unsafe {
233 let a = a.as_u8x16();
234 let b = b.as_u8x16();
235 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236 }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248 unsafe {
249 let a = a.as_i16x8();
250 let b = b.as_i16x8();
251 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252 }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264 unsafe {
265 let a = a.as_u8x16();
266 let b = b.as_u8x16();
267 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268 }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe {
283 let a = simd_cast::<_, i32x8>(a.as_i16x8());
284 let b = simd_cast::<_, i32x8>(b.as_i16x8());
285 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286 transmute(simd_cast::<i32x8, i16x8>(r))
287 }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301 unsafe {
302 let a = simd_cast::<_, u32x8>(a.as_u16x8());
303 let b = simd_cast::<_, u32x8>(b.as_u16x8());
304 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305 transmute(simd_cast::<u32x8, u16x8>(r))
306 }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334 unsafe {
335 let a = a.as_u64x2();
336 let b = b.as_u64x2();
337 let mask = u64x2::splat(u32::MAX.into());
338 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339 }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459 static_assert_uimm_bits!(IMM8, 8);
460 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468 const fn mask(shift: i32, i: u32) -> u32 {
469 let shift = shift as u32 & 0xff;
470 if shift > 15 { i } else { 16 - shift + i }
471 }
472 transmute::<i8x16, _>(simd_shuffle!(
473 i8x16::ZERO,
474 a.as_i8x16(),
475 [
476 mask(IMM8, 0),
477 mask(IMM8, 1),
478 mask(IMM8, 2),
479 mask(IMM8, 3),
480 mask(IMM8, 4),
481 mask(IMM8, 5),
482 mask(IMM8, 6),
483 mask(IMM8, 7),
484 mask(IMM8, 8),
485 mask(IMM8, 9),
486 mask(IMM8, 10),
487 mask(IMM8, 11),
488 mask(IMM8, 12),
489 mask(IMM8, 13),
490 mask(IMM8, 14),
491 mask(IMM8, 15),
492 ],
493 ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 unsafe {
506 static_assert_uimm_bits!(IMM8, 8);
507 _mm_slli_si128_impl::<IMM8>(a)
508 }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520 unsafe {
521 static_assert_uimm_bits!(IMM8, 8);
522 _mm_srli_si128_impl::<IMM8>(a)
523 }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535 static_assert_uimm_bits!(IMM8, 8);
536 unsafe {
537 if IMM8 >= 16 {
538 _mm_setzero_si128()
539 } else {
540 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541 }
542 }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566 static_assert_uimm_bits!(IMM8, 8);
567 unsafe {
568 if IMM8 >= 32 {
569 _mm_setzero_si128()
570 } else {
571 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572 }
573 }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_uimm_bits!(IMM8, 8);
598 unsafe {
599 if IMM8 >= 64 {
600 _mm_setzero_si128()
601 } else {
602 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603 }
604 }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629 static_assert_uimm_bits!(IMM8, 8);
630 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655 static_assert_uimm_bits!(IMM8, 8);
656 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680 static_assert_uimm_bits!(IMM8, 8);
681 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689 const fn mask(shift: i32, i: u32) -> u32 {
690 if (shift as u32) > 15 {
691 i + 16
692 } else {
693 i + (shift as u32)
694 }
695 }
696 let x: i8x16 = simd_shuffle!(
697 a.as_i8x16(),
698 i8x16::ZERO,
699 [
700 mask(IMM8, 0),
701 mask(IMM8, 1),
702 mask(IMM8, 2),
703 mask(IMM8, 3),
704 mask(IMM8, 4),
705 mask(IMM8, 5),
706 mask(IMM8, 6),
707 mask(IMM8, 7),
708 mask(IMM8, 8),
709 mask(IMM8, 9),
710 mask(IMM8, 10),
711 mask(IMM8, 11),
712 mask(IMM8, 12),
713 mask(IMM8, 13),
714 mask(IMM8, 14),
715 mask(IMM8, 15),
716 ],
717 );
718 transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731 static_assert_uimm_bits!(IMM8, 8);
732 unsafe {
733 if IMM8 >= 16 {
734 _mm_setzero_si128()
735 } else {
736 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737 }
738 }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763 static_assert_uimm_bits!(IMM8, 8);
764 unsafe {
765 if IMM8 >= 32 {
766 _mm_setzero_si128()
767 } else {
768 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769 }
770 }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795 static_assert_uimm_bits!(IMM8, 8);
796 unsafe {
797 if IMM8 >= 64 {
798 _mm_setzero_si128()
799 } else {
800 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801 }
802 }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826 unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973 unsafe {
974 let a = a.as_i32x4();
975 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976 }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988 unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012 unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033 unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045 unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067 e7: i16,
1068 e6: i16,
1069 e5: i16,
1070 e4: i16,
1071 e3: i16,
1072 e2: i16,
1073 e1: i16,
1074 e0: i16,
1075) -> __m128i {
1076 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087 e15: i8,
1088 e14: i8,
1089 e13: i8,
1090 e12: i8,
1091 e11: i8,
1092 e10: i8,
1093 e9: i8,
1094 e8: i8,
1095 e7: i8,
1096 e6: i8,
1097 e5: i8,
1098 e4: i8,
1099 e3: i8,
1100 e2: i8,
1101 e1: i8,
1102 e0: i8,
1103) -> __m128i {
1104 unsafe {
1105 #[rustfmt::skip]
1106 transmute(i8x16::new(
1107 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108 ))
1109 }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120 _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131 _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142 _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164 _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175 e7: i16,
1176 e6: i16,
1177 e5: i16,
1178 e4: i16,
1179 e3: i16,
1180 e2: i16,
1181 e1: i16,
1182 e0: i16,
1183) -> __m128i {
1184 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195 e15: i8,
1196 e14: i8,
1197 e13: i8,
1198 e12: i8,
1199 e11: i8,
1200 e10: i8,
1201 e9: i8,
1202 e8: i8,
1203 e7: i8,
1204 e6: i8,
1205 e5: i8,
1206 e4: i8,
1207 e3: i8,
1208 e2: i8,
1209 e1: i8,
1210 e0: i8,
1211) -> __m128i {
1212 #[rustfmt::skip]
1213 _mm_set_epi8(
1214 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215 )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226 const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(test, assert_instr(movaps))]
1247#[stable(feature = "simd_x86", since = "1.27.0")]
1248pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1249 *mem_addr
1250}
1251
1252#[inline]
1258#[target_feature(enable = "sse2")]
1259#[cfg_attr(test, assert_instr(movups))]
1260#[stable(feature = "simd_x86", since = "1.27.0")]
1261pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1262 let mut dst: __m128i = _mm_undefined_si128();
1263 ptr::copy_nonoverlapping(
1264 mem_addr as *const u8,
1265 ptr::addr_of_mut!(dst) as *mut u8,
1266 mem::size_of::<__m128i>(),
1267 );
1268 dst
1269}
1270
1271#[inline]
1282#[target_feature(enable = "sse2")]
1283#[cfg_attr(test, assert_instr(maskmovdqu))]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1286 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1287}
1288
1289#[inline]
1295#[target_feature(enable = "sse2")]
1296#[cfg_attr(test, assert_instr(movaps))]
1297#[stable(feature = "simd_x86", since = "1.27.0")]
1298pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1299 *mem_addr = a;
1300}
1301
1302#[inline]
1308#[target_feature(enable = "sse2")]
1309#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1312 mem_addr.write_unaligned(a);
1313}
1314
1315#[inline]
1321#[target_feature(enable = "sse2")]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1324 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1325}
1326
1327#[inline]
1342#[target_feature(enable = "sse2")]
1343#[cfg_attr(test, assert_instr(movntdq))]
1344#[stable(feature = "simd_x86", since = "1.27.0")]
1345pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1346 crate::arch::asm!(
1347 vps!("movntdq", ",{a}"),
1348 p = in(reg) mem_addr,
1349 a = in(xmm_reg) a,
1350 options(nostack, preserves_flags),
1351 );
1352}
1353
1354#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movnti))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1373 crate::arch::asm!(
1374 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1376 a = in(reg) a,
1377 options(nostack, preserves_flags),
1378 );
1379}
1380
1381#[inline]
1386#[target_feature(enable = "sse2")]
1387#[cfg_attr(
1389 all(test, not(target_env = "msvc"), target_arch = "x86_64"),
1390 assert_instr(movq)
1391)]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1394 unsafe {
1395 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1396 transmute(r)
1397 }
1398}
1399
1400#[inline]
1405#[target_feature(enable = "sse2")]
1406#[cfg_attr(test, assert_instr(packsswb))]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1409 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1410}
1411
1412#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packssdw))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1421 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packuswb))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1433 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1434}
1435
1436#[inline]
1440#[target_feature(enable = "sse2")]
1441#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1442#[rustc_legacy_const_generics(1)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1445 static_assert_uimm_bits!(IMM8, 3);
1446 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1447}
1448
1449#[inline]
1453#[target_feature(enable = "sse2")]
1454#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1455#[rustc_legacy_const_generics(2)]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1458 static_assert_uimm_bits!(IMM8, 3);
1459 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1460}
1461
1462#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pmovmskb))]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1470 unsafe {
1471 let z = i8x16::ZERO;
1472 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1473 simd_bitmask::<_, u16>(m) as u32 as i32
1474 }
1475}
1476
1477#[inline]
1481#[target_feature(enable = "sse2")]
1482#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1483#[rustc_legacy_const_generics(1)]
1484#[stable(feature = "simd_x86", since = "1.27.0")]
1485pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1486 static_assert_uimm_bits!(IMM8, 8);
1487 unsafe {
1488 let a = a.as_i32x4();
1489 let x: i32x4 = simd_shuffle!(
1490 a,
1491 a,
1492 [
1493 IMM8 as u32 & 0b11,
1494 (IMM8 as u32 >> 2) & 0b11,
1495 (IMM8 as u32 >> 4) & 0b11,
1496 (IMM8 as u32 >> 6) & 0b11,
1497 ],
1498 );
1499 transmute(x)
1500 }
1501}
1502
1503#[inline]
1511#[target_feature(enable = "sse2")]
1512#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1513#[rustc_legacy_const_generics(1)]
1514#[stable(feature = "simd_x86", since = "1.27.0")]
1515pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1516 static_assert_uimm_bits!(IMM8, 8);
1517 unsafe {
1518 let a = a.as_i16x8();
1519 let x: i16x8 = simd_shuffle!(
1520 a,
1521 a,
1522 [
1523 0,
1524 1,
1525 2,
1526 3,
1527 (IMM8 as u32 & 0b11) + 4,
1528 ((IMM8 as u32 >> 2) & 0b11) + 4,
1529 ((IMM8 as u32 >> 4) & 0b11) + 4,
1530 ((IMM8 as u32 >> 6) & 0b11) + 4,
1531 ],
1532 );
1533 transmute(x)
1534 }
1535}
1536
1537#[inline]
1545#[target_feature(enable = "sse2")]
1546#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1547#[rustc_legacy_const_generics(1)]
1548#[stable(feature = "simd_x86", since = "1.27.0")]
1549pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1550 static_assert_uimm_bits!(IMM8, 8);
1551 unsafe {
1552 let a = a.as_i16x8();
1553 let x: i16x8 = simd_shuffle!(
1554 a,
1555 a,
1556 [
1557 IMM8 as u32 & 0b11,
1558 (IMM8 as u32 >> 2) & 0b11,
1559 (IMM8 as u32 >> 4) & 0b11,
1560 (IMM8 as u32 >> 6) & 0b11,
1561 4,
1562 5,
1563 6,
1564 7,
1565 ],
1566 );
1567 transmute(x)
1568 }
1569}
1570
1571#[inline]
1575#[target_feature(enable = "sse2")]
1576#[cfg_attr(test, assert_instr(punpckhbw))]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1579 unsafe {
1580 transmute::<i8x16, _>(simd_shuffle!(
1581 a.as_i8x16(),
1582 b.as_i8x16(),
1583 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1584 ))
1585 }
1586}
1587
1588#[inline]
1592#[target_feature(enable = "sse2")]
1593#[cfg_attr(test, assert_instr(punpckhwd))]
1594#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1596 unsafe {
1597 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1598 transmute::<i16x8, _>(x)
1599 }
1600}
1601
1602#[inline]
1606#[target_feature(enable = "sse2")]
1607#[cfg_attr(test, assert_instr(unpckhps))]
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1609pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1610 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1611}
1612
1613#[inline]
1617#[target_feature(enable = "sse2")]
1618#[cfg_attr(test, assert_instr(unpckhpd))]
1619#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1621 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1622}
1623
1624#[inline]
1628#[target_feature(enable = "sse2")]
1629#[cfg_attr(test, assert_instr(punpcklbw))]
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1632 unsafe {
1633 transmute::<i8x16, _>(simd_shuffle!(
1634 a.as_i8x16(),
1635 b.as_i8x16(),
1636 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1637 ))
1638 }
1639}
1640
1641#[inline]
1645#[target_feature(enable = "sse2")]
1646#[cfg_attr(test, assert_instr(punpcklwd))]
1647#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1649 unsafe {
1650 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1651 transmute::<i16x8, _>(x)
1652 }
1653}
1654
1655#[inline]
1659#[target_feature(enable = "sse2")]
1660#[cfg_attr(test, assert_instr(unpcklps))]
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1663 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1664}
1665
1666#[inline]
1670#[target_feature(enable = "sse2")]
1671#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1674 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1675}
1676
1677#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(addsd))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1686 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1687}
1688
1689#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addpd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1698 unsafe { simd_add(a, b) }
1699}
1700
1701#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(divsd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1710 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1711}
1712
1713#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divpd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1722 unsafe { simd_div(a, b) }
1723}
1724
1725#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(maxsd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1734 unsafe { maxsd(a, b) }
1735}
1736
1737#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxpd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1746 unsafe { maxpd(a, b) }
1747}
1748
1749#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(minsd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1758 unsafe { minsd(a, b) }
1759}
1760
1761#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minpd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1770 unsafe { minpd(a, b) }
1771}
1772
1773#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(mulsd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1782 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1783}
1784
1785#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulpd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1794 unsafe { simd_mul(a, b) }
1795}
1796
1797#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(sqrtsd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1806 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1807}
1808
1809#[inline]
1813#[target_feature(enable = "sse2")]
1814#[cfg_attr(test, assert_instr(sqrtpd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1817 unsafe { simd_fsqrt(a) }
1818}
1819
1820#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(subsd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1829 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1830}
1831
1832#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subpd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1841 unsafe { simd_sub(a, b) }
1842}
1843
1844#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(andps))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1853 unsafe {
1854 let a: __m128i = transmute(a);
1855 let b: __m128i = transmute(b);
1856 transmute(_mm_and_si128(a, b))
1857 }
1858}
1859
1860#[inline]
1864#[target_feature(enable = "sse2")]
1865#[cfg_attr(test, assert_instr(andnps))]
1866#[stable(feature = "simd_x86", since = "1.27.0")]
1867pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1868 unsafe {
1869 let a: __m128i = transmute(a);
1870 let b: __m128i = transmute(b);
1871 transmute(_mm_andnot_si128(a, b))
1872 }
1873}
1874
1875#[inline]
1879#[target_feature(enable = "sse2")]
1880#[cfg_attr(test, assert_instr(orps))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1883 unsafe {
1884 let a: __m128i = transmute(a);
1885 let b: __m128i = transmute(b);
1886 transmute(_mm_or_si128(a, b))
1887 }
1888}
1889
1890#[inline]
1894#[target_feature(enable = "sse2")]
1895#[cfg_attr(test, assert_instr(xorps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1898 unsafe {
1899 let a: __m128i = transmute(a);
1900 let b: __m128i = transmute(b);
1901 transmute(_mm_xor_si128(a, b))
1902 }
1903}
1904
1905#[inline]
1910#[target_feature(enable = "sse2")]
1911#[cfg_attr(test, assert_instr(cmpeqsd))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1914 unsafe { cmpsd(a, b, 0) }
1915}
1916
1917#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpltsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1926 unsafe { cmpsd(a, b, 1) }
1927}
1928
1929#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmplesd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1938 unsafe { cmpsd(a, b, 2) }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmpltsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmplesd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1962 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpordsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1976 unsafe { cmpsd(a, b, 7) }
1977}
1978
1979#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmpunordsd))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1989 unsafe { cmpsd(a, b, 3) }
1990}
1991
1992#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpneqsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2001 unsafe { cmpsd(a, b, 4) }
2002}
2003
2004#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpnltsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2013 unsafe { cmpsd(a, b, 5) }
2014}
2015
2016#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnlesd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2025 unsafe { cmpsd(a, b, 6) }
2026}
2027
2028#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnltsd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2037 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2038}
2039
2040#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnlesd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2049 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052#[inline]
2056#[target_feature(enable = "sse2")]
2057#[cfg_attr(test, assert_instr(cmpeqpd))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2060 unsafe { cmppd(a, b, 0) }
2061}
2062
2063#[inline]
2067#[target_feature(enable = "sse2")]
2068#[cfg_attr(test, assert_instr(cmpltpd))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2071 unsafe { cmppd(a, b, 1) }
2072}
2073
2074#[inline]
2078#[target_feature(enable = "sse2")]
2079#[cfg_attr(test, assert_instr(cmplepd))]
2080#[stable(feature = "simd_x86", since = "1.27.0")]
2081pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2082 unsafe { cmppd(a, b, 2) }
2083}
2084
2085#[inline]
2089#[target_feature(enable = "sse2")]
2090#[cfg_attr(test, assert_instr(cmpltpd))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2093 _mm_cmplt_pd(b, a)
2094}
2095
2096#[inline]
2100#[target_feature(enable = "sse2")]
2101#[cfg_attr(test, assert_instr(cmplepd))]
2102#[stable(feature = "simd_x86", since = "1.27.0")]
2103pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2104 _mm_cmple_pd(b, a)
2105}
2106
2107#[inline]
2111#[target_feature(enable = "sse2")]
2112#[cfg_attr(test, assert_instr(cmpordpd))]
2113#[stable(feature = "simd_x86", since = "1.27.0")]
2114pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2115 unsafe { cmppd(a, b, 7) }
2116}
2117
2118#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpunordpd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmppd(a, b, 3) }
2127}
2128
2129#[inline]
2133#[target_feature(enable = "sse2")]
2134#[cfg_attr(test, assert_instr(cmpneqpd))]
2135#[stable(feature = "simd_x86", since = "1.27.0")]
2136pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2137 unsafe { cmppd(a, b, 4) }
2138}
2139
2140#[inline]
2144#[target_feature(enable = "sse2")]
2145#[cfg_attr(test, assert_instr(cmpnltpd))]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2148 unsafe { cmppd(a, b, 5) }
2149}
2150
2151#[inline]
2155#[target_feature(enable = "sse2")]
2156#[cfg_attr(test, assert_instr(cmpnlepd))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2159 unsafe { cmppd(a, b, 6) }
2160}
2161
2162#[inline]
2166#[target_feature(enable = "sse2")]
2167#[cfg_attr(test, assert_instr(cmpnltpd))]
2168#[stable(feature = "simd_x86", since = "1.27.0")]
2169pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2170 _mm_cmpnlt_pd(b, a)
2171}
2172
2173#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnlepd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2182 _mm_cmpnle_pd(b, a)
2183}
2184
2185#[inline]
2189#[target_feature(enable = "sse2")]
2190#[cfg_attr(test, assert_instr(comisd))]
2191#[stable(feature = "simd_x86", since = "1.27.0")]
2192pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2193 unsafe { comieqsd(a, b) }
2194}
2195
2196#[inline]
2200#[target_feature(enable = "sse2")]
2201#[cfg_attr(test, assert_instr(comisd))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2204 unsafe { comiltsd(a, b) }
2205}
2206
2207#[inline]
2211#[target_feature(enable = "sse2")]
2212#[cfg_attr(test, assert_instr(comisd))]
2213#[stable(feature = "simd_x86", since = "1.27.0")]
2214pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2215 unsafe { comilesd(a, b) }
2216}
2217
2218#[inline]
2222#[target_feature(enable = "sse2")]
2223#[cfg_attr(test, assert_instr(comisd))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2226 unsafe { comigtsd(a, b) }
2227}
2228
2229#[inline]
2233#[target_feature(enable = "sse2")]
2234#[cfg_attr(test, assert_instr(comisd))]
2235#[stable(feature = "simd_x86", since = "1.27.0")]
2236pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2237 unsafe { comigesd(a, b) }
2238}
2239
2240#[inline]
2244#[target_feature(enable = "sse2")]
2245#[cfg_attr(test, assert_instr(comisd))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2248 unsafe { comineqsd(a, b) }
2249}
2250
2251#[inline]
2255#[target_feature(enable = "sse2")]
2256#[cfg_attr(test, assert_instr(ucomisd))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2259 unsafe { ucomieqsd(a, b) }
2260}
2261
2262#[inline]
2266#[target_feature(enable = "sse2")]
2267#[cfg_attr(test, assert_instr(ucomisd))]
2268#[stable(feature = "simd_x86", since = "1.27.0")]
2269pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2270 unsafe { ucomiltsd(a, b) }
2271}
2272
2273#[inline]
2277#[target_feature(enable = "sse2")]
2278#[cfg_attr(test, assert_instr(ucomisd))]
2279#[stable(feature = "simd_x86", since = "1.27.0")]
2280pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2281 unsafe { ucomilesd(a, b) }
2282}
2283
2284#[inline]
2288#[target_feature(enable = "sse2")]
2289#[cfg_attr(test, assert_instr(ucomisd))]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2292 unsafe { ucomigtsd(a, b) }
2293}
2294
2295#[inline]
2299#[target_feature(enable = "sse2")]
2300#[cfg_attr(test, assert_instr(ucomisd))]
2301#[stable(feature = "simd_x86", since = "1.27.0")]
2302pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2303 unsafe { ucomigesd(a, b) }
2304}
2305
2306#[inline]
2310#[target_feature(enable = "sse2")]
2311#[cfg_attr(test, assert_instr(ucomisd))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2314 unsafe { ucomineqsd(a, b) }
2315}
2316
2317#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(cvtpd2ps))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2326 unsafe {
2327 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2328 let zero = f32x2::ZERO;
2329 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2330 }
2331}
2332
2333#[inline]
2339#[target_feature(enable = "sse2")]
2340#[cfg_attr(test, assert_instr(cvtps2pd))]
2341#[stable(feature = "simd_x86", since = "1.27.0")]
2342pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2343 unsafe {
2344 let a = a.as_f32x4();
2345 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2346 }
2347}
2348
2349#[inline]
2354#[target_feature(enable = "sse2")]
2355#[cfg_attr(test, assert_instr(cvtpd2dq))]
2356#[stable(feature = "simd_x86", since = "1.27.0")]
2357pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2358 unsafe { transmute(cvtpd2dq(a)) }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtsd2si))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2370 unsafe { cvtsd2si(a) }
2371}
2372
2373#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtsd2ss))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2384 unsafe { cvtsd2ss(a, b) }
2385}
2386
2387#[inline]
2391#[target_feature(enable = "sse2")]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2394 unsafe { simd_extract!(a, 0) }
2395}
2396
2397#[inline]
2404#[target_feature(enable = "sse2")]
2405#[cfg_attr(test, assert_instr(cvtss2sd))]
2406#[stable(feature = "simd_x86", since = "1.27.0")]
2407pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2408 unsafe { cvtss2sd(a, b) }
2409}
2410
2411#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvttpd2dq))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2420 unsafe { transmute(cvttpd2dq(a)) }
2421}
2422
2423#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttsd2si))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2432 unsafe { cvttsd2si(a) }
2433}
2434
2435#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttps2dq))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2444 unsafe { transmute(cvttps2dq(a)) }
2445}
2446
2447#[inline]
2452#[target_feature(enable = "sse2")]
2453#[stable(feature = "simd_x86", since = "1.27.0")]
2454pub fn _mm_set_sd(a: f64) -> __m128d {
2455 _mm_set_pd(0.0, a)
2456}
2457
2458#[inline]
2463#[target_feature(enable = "sse2")]
2464#[stable(feature = "simd_x86", since = "1.27.0")]
2465pub fn _mm_set1_pd(a: f64) -> __m128d {
2466 _mm_set_pd(a, a)
2467}
2468
2469#[inline]
2474#[target_feature(enable = "sse2")]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm_set_pd1(a: f64) -> __m128d {
2477 _mm_set_pd(a, a)
2478}
2479
2480#[inline]
2485#[target_feature(enable = "sse2")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2487pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2488 __m128d([b, a])
2489}
2490
2491#[inline]
2496#[target_feature(enable = "sse2")]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2499 _mm_set_pd(b, a)
2500}
2501
2502#[inline]
2507#[target_feature(enable = "sse2")]
2508#[cfg_attr(test, assert_instr(xorp))]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setzero_pd() -> __m128d {
2511 const { unsafe { mem::zeroed() } }
2512}
2513
2514#[inline]
2521#[target_feature(enable = "sse2")]
2522#[cfg_attr(test, assert_instr(movmskpd))]
2523#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2525 unsafe {
2528 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2529 simd_bitmask::<i64x2, u8>(mask).into()
2530 }
2531}
2532
2533#[inline]
2540#[target_feature(enable = "sse2")]
2541#[cfg_attr(test, assert_instr(movaps))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2543#[allow(clippy::cast_ptr_alignment)]
2544pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2545 *(mem_addr as *const __m128d)
2546}
2547
2548#[inline]
2553#[target_feature(enable = "sse2")]
2554#[cfg_attr(test, assert_instr(movsd))]
2555#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2557 _mm_setr_pd(*mem_addr, 0.)
2558}
2559
2560#[inline]
2566#[target_feature(enable = "sse2")]
2567#[cfg_attr(test, assert_instr(movhps))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2569pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2570 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2571}
2572
2573#[inline]
2579#[target_feature(enable = "sse2")]
2580#[cfg_attr(test, assert_instr(movlps))]
2581#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2583 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2584}
2585
2586#[inline]
2602#[target_feature(enable = "sse2")]
2603#[cfg_attr(test, assert_instr(movntpd))]
2604#[stable(feature = "simd_x86", since = "1.27.0")]
2605#[allow(clippy::cast_ptr_alignment)]
2606pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2607 crate::arch::asm!(
2608 vps!("movntpd", ",{a}"),
2609 p = in(reg) mem_addr,
2610 a = in(xmm_reg) a,
2611 options(nostack, preserves_flags),
2612 );
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2624 *mem_addr = simd_extract!(a, 0)
2625}
2626
2627#[inline]
2633#[target_feature(enable = "sse2")]
2634#[cfg_attr(test, assert_instr(movaps))]
2635#[stable(feature = "simd_x86", since = "1.27.0")]
2636#[allow(clippy::cast_ptr_alignment)]
2637pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2638 *(mem_addr as *mut __m128d) = a;
2639}
2640
2641#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2650pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2651 mem_addr.cast::<__m128d>().write_unaligned(a);
2652}
2653
2654#[inline]
2660#[target_feature(enable = "sse2")]
2661#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2662pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2663 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2664}
2665
2666#[inline]
2672#[target_feature(enable = "sse2")]
2673#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2674pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2675 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2676}
2677
2678#[inline]
2684#[target_feature(enable = "sse2")]
2685#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2686pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2687 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2688}
2689
2690#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86", since = "1.27.0")]
2698#[allow(clippy::cast_ptr_alignment)]
2699pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2700 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2701 *(mem_addr as *mut __m128d) = b;
2702}
2703
2704#[inline]
2710#[target_feature(enable = "sse2")]
2711#[stable(feature = "simd_x86", since = "1.27.0")]
2712#[allow(clippy::cast_ptr_alignment)]
2713pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2714 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2715 *(mem_addr as *mut __m128d) = b;
2716}
2717
2718#[inline]
2725#[target_feature(enable = "sse2")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[allow(clippy::cast_ptr_alignment)]
2728pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2729 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2730 *(mem_addr as *mut __m128d) = b;
2731}
2732
2733#[inline]
2738#[target_feature(enable = "sse2")]
2739#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhps))]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2742 *mem_addr = simd_extract!(a, 1);
2743}
2744
2745#[inline]
2750#[target_feature(enable = "sse2")]
2751#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2752#[stable(feature = "simd_x86", since = "1.27.0")]
2753pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2754 *mem_addr = simd_extract!(a, 0);
2755}
2756
2757#[inline]
2762#[target_feature(enable = "sse2")]
2763#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2766 let d = *mem_addr;
2767 _mm_setr_pd(d, d)
2768}
2769
2770#[inline]
2775#[target_feature(enable = "sse2")]
2776#[stable(feature = "simd_x86", since = "1.27.0")]
2778pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2779 _mm_load1_pd(mem_addr)
2780}
2781
2782#[inline]
2788#[target_feature(enable = "sse2")]
2789#[cfg_attr(test, assert_instr(movaps))]
2790#[stable(feature = "simd_x86", since = "1.27.0")]
2791pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2792 let a = _mm_load_pd(mem_addr);
2793 simd_shuffle!(a, a, [1, 0])
2794}
2795
2796#[inline]
2802#[target_feature(enable = "sse2")]
2803#[cfg_attr(test, assert_instr(movups))]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2805pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2806 let mut dst = _mm_undefined_pd();
2807 ptr::copy_nonoverlapping(
2808 mem_addr as *const u8,
2809 ptr::addr_of_mut!(dst) as *mut u8,
2810 mem::size_of::<__m128d>(),
2811 );
2812 dst
2813}
2814
2815#[inline]
2821#[target_feature(enable = "sse2")]
2822#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2823pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2824 transmute(i16x8::new(
2825 ptr::read_unaligned(mem_addr as *const i16),
2826 0,
2827 0,
2828 0,
2829 0,
2830 0,
2831 0,
2832 0,
2833 ))
2834}
2835
2836#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2845 transmute(i32x4::new(
2846 ptr::read_unaligned(mem_addr as *const i32),
2847 0,
2848 0,
2849 0,
2850 ))
2851}
2852
2853#[inline]
2859#[target_feature(enable = "sse2")]
2860#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2861pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2862 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2863}
2864
2865#[inline]
2871#[target_feature(enable = "sse2")]
2872#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2873#[rustc_legacy_const_generics(2)]
2874#[stable(feature = "simd_x86", since = "1.27.0")]
2875pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2876 static_assert_uimm_bits!(MASK, 8);
2877 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2878}
2879
2880#[inline]
2886#[target_feature(enable = "sse2")]
2887#[cfg_attr(test, assert_instr(movsd))]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2890 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86", since = "1.27.0")]
2900pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2901 unsafe { transmute(a) }
2902}
2903
2904#[inline]
2909#[target_feature(enable = "sse2")]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2911pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2912 unsafe { transmute(a) }
2913}
2914
2915#[inline]
2920#[target_feature(enable = "sse2")]
2921#[stable(feature = "simd_x86", since = "1.27.0")]
2922pub fn _mm_castps_pd(a: __m128) -> __m128d {
2923 unsafe { transmute(a) }
2924}
2925
2926#[inline]
2931#[target_feature(enable = "sse2")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm_castps_si128(a: __m128) -> __m128i {
2934 unsafe { transmute(a) }
2935}
2936
2937#[inline]
2942#[target_feature(enable = "sse2")]
2943#[stable(feature = "simd_x86", since = "1.27.0")]
2944pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2945 unsafe { transmute(a) }
2946}
2947
2948#[inline]
2953#[target_feature(enable = "sse2")]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2956 unsafe { transmute(a) }
2957}
2958
2959#[inline]
2965#[target_feature(enable = "sse2")]
2966#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub fn _mm_undefined_pd() -> __m128d {
2968 const { unsafe { mem::zeroed() } }
2969}
2970
2971#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86", since = "1.27.0")]
2979pub fn _mm_undefined_si128() -> __m128i {
2980 const { unsafe { mem::zeroed() } }
2981}
2982
2983#[inline]
2991#[target_feature(enable = "sse2")]
2992#[cfg_attr(test, assert_instr(unpckhpd))]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2995 unsafe { simd_shuffle!(a, b, [1, 3]) }
2996}
2997
2998#[inline]
3006#[target_feature(enable = "sse2")]
3007#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
3008#[stable(feature = "simd_x86", since = "1.27.0")]
3009pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3010 unsafe { simd_shuffle!(a, b, [0, 2]) }
3011}
3012
3013#[allow(improper_ctypes)]
3014unsafe extern "C" {
3015 #[link_name = "llvm.x86.sse2.pause"]
3016 fn pause();
3017 #[link_name = "llvm.x86.sse2.clflush"]
3018 fn clflush(p: *const u8);
3019 #[link_name = "llvm.x86.sse2.lfence"]
3020 fn lfence();
3021 #[link_name = "llvm.x86.sse2.mfence"]
3022 fn mfence();
3023 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3024 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3025 #[link_name = "llvm.x86.sse2.psad.bw"]
3026 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3027 #[link_name = "llvm.x86.sse2.psll.w"]
3028 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3029 #[link_name = "llvm.x86.sse2.psll.d"]
3030 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3031 #[link_name = "llvm.x86.sse2.psll.q"]
3032 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3033 #[link_name = "llvm.x86.sse2.psra.w"]
3034 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3035 #[link_name = "llvm.x86.sse2.psra.d"]
3036 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3037 #[link_name = "llvm.x86.sse2.psrl.w"]
3038 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3039 #[link_name = "llvm.x86.sse2.psrl.d"]
3040 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3041 #[link_name = "llvm.x86.sse2.psrl.q"]
3042 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3043 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3044 fn cvtps2dq(a: __m128) -> i32x4;
3045 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3046 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3047 #[link_name = "llvm.x86.sse2.packsswb.128"]
3048 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3049 #[link_name = "llvm.x86.sse2.packssdw.128"]
3050 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3051 #[link_name = "llvm.x86.sse2.packuswb.128"]
3052 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3053 #[link_name = "llvm.x86.sse2.max.sd"]
3054 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3055 #[link_name = "llvm.x86.sse2.max.pd"]
3056 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3057 #[link_name = "llvm.x86.sse2.min.sd"]
3058 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3059 #[link_name = "llvm.x86.sse2.min.pd"]
3060 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3061 #[link_name = "llvm.x86.sse2.cmp.sd"]
3062 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3063 #[link_name = "llvm.x86.sse2.cmp.pd"]
3064 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3065 #[link_name = "llvm.x86.sse2.comieq.sd"]
3066 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3067 #[link_name = "llvm.x86.sse2.comilt.sd"]
3068 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3069 #[link_name = "llvm.x86.sse2.comile.sd"]
3070 fn comilesd(a: __m128d, b: __m128d) -> i32;
3071 #[link_name = "llvm.x86.sse2.comigt.sd"]
3072 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3073 #[link_name = "llvm.x86.sse2.comige.sd"]
3074 fn comigesd(a: __m128d, b: __m128d) -> i32;
3075 #[link_name = "llvm.x86.sse2.comineq.sd"]
3076 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3077 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3078 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3079 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3080 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3081 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3082 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3083 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3084 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3085 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3086 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3087 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3088 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3089 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3090 fn cvtpd2dq(a: __m128d) -> i32x4;
3091 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3092 fn cvtsd2si(a: __m128d) -> i32;
3093 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3094 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3095 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3096 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3097 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3098 fn cvttpd2dq(a: __m128d) -> i32x4;
3099 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3100 fn cvttsd2si(a: __m128d) -> i32;
3101 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3102 fn cvttps2dq(a: __m128) -> i32x4;
3103}
3104
3105#[cfg(test)]
3106mod tests {
3107 use crate::{
3108 core_arch::{simd::*, x86::*},
3109 hint::black_box,
3110 };
3111 use std::{
3112 boxed, f32, f64,
3113 mem::{self, transmute},
3114 ptr,
3115 };
3116 use stdarch_test::simd_test;
3117
3118 const NAN: f64 = f64::NAN;
3119
3120 #[test]
3121 fn test_mm_pause() {
3122 unsafe { _mm_pause() }
3123 }
3124
3125 #[simd_test(enable = "sse2")]
3126 unsafe fn test_mm_clflush() {
3127 let x = 0_u8;
3128 _mm_clflush(ptr::addr_of!(x));
3129 }
3130
3131 #[simd_test(enable = "sse2")]
3132 #[cfg_attr(miri, ignore)]
3134 unsafe fn test_mm_lfence() {
3135 _mm_lfence();
3136 }
3137
3138 #[simd_test(enable = "sse2")]
3139 #[cfg_attr(miri, ignore)]
3141 unsafe fn test_mm_mfence() {
3142 _mm_mfence();
3143 }
3144
3145 #[simd_test(enable = "sse2")]
3146 unsafe fn test_mm_add_epi8() {
3147 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3148 #[rustfmt::skip]
3149 let b = _mm_setr_epi8(
3150 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3151 );
3152 let r = _mm_add_epi8(a, b);
3153 #[rustfmt::skip]
3154 let e = _mm_setr_epi8(
3155 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3156 );
3157 assert_eq_m128i(r, e);
3158 }
3159
3160 #[simd_test(enable = "sse2")]
3161 unsafe fn test_mm_add_epi8_overflow() {
3162 let a = _mm_set1_epi8(0x7F);
3163 let b = _mm_set1_epi8(1);
3164 let r = _mm_add_epi8(a, b);
3165 assert_eq_m128i(r, _mm_set1_epi8(-128));
3166 }
3167
3168 #[simd_test(enable = "sse2")]
3169 unsafe fn test_mm_add_epi16() {
3170 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3171 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3172 let r = _mm_add_epi16(a, b);
3173 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3174 assert_eq_m128i(r, e);
3175 }
3176
3177 #[simd_test(enable = "sse2")]
3178 unsafe fn test_mm_add_epi32() {
3179 let a = _mm_setr_epi32(0, 1, 2, 3);
3180 let b = _mm_setr_epi32(4, 5, 6, 7);
3181 let r = _mm_add_epi32(a, b);
3182 let e = _mm_setr_epi32(4, 6, 8, 10);
3183 assert_eq_m128i(r, e);
3184 }
3185
3186 #[simd_test(enable = "sse2")]
3187 unsafe fn test_mm_add_epi64() {
3188 let a = _mm_setr_epi64x(0, 1);
3189 let b = _mm_setr_epi64x(2, 3);
3190 let r = _mm_add_epi64(a, b);
3191 let e = _mm_setr_epi64x(2, 4);
3192 assert_eq_m128i(r, e);
3193 }
3194
3195 #[simd_test(enable = "sse2")]
3196 unsafe fn test_mm_adds_epi8() {
3197 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3198 #[rustfmt::skip]
3199 let b = _mm_setr_epi8(
3200 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3201 );
3202 let r = _mm_adds_epi8(a, b);
3203 #[rustfmt::skip]
3204 let e = _mm_setr_epi8(
3205 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3206 );
3207 assert_eq_m128i(r, e);
3208 }
3209
3210 #[simd_test(enable = "sse2")]
3211 unsafe fn test_mm_adds_epi8_saturate_positive() {
3212 let a = _mm_set1_epi8(0x7F);
3213 let b = _mm_set1_epi8(1);
3214 let r = _mm_adds_epi8(a, b);
3215 assert_eq_m128i(r, a);
3216 }
3217
3218 #[simd_test(enable = "sse2")]
3219 unsafe fn test_mm_adds_epi8_saturate_negative() {
3220 let a = _mm_set1_epi8(-0x80);
3221 let b = _mm_set1_epi8(-1);
3222 let r = _mm_adds_epi8(a, b);
3223 assert_eq_m128i(r, a);
3224 }
3225
3226 #[simd_test(enable = "sse2")]
3227 unsafe fn test_mm_adds_epi16() {
3228 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3229 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3230 let r = _mm_adds_epi16(a, b);
3231 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3232 assert_eq_m128i(r, e);
3233 }
3234
3235 #[simd_test(enable = "sse2")]
3236 unsafe fn test_mm_adds_epi16_saturate_positive() {
3237 let a = _mm_set1_epi16(0x7FFF);
3238 let b = _mm_set1_epi16(1);
3239 let r = _mm_adds_epi16(a, b);
3240 assert_eq_m128i(r, a);
3241 }
3242
3243 #[simd_test(enable = "sse2")]
3244 unsafe fn test_mm_adds_epi16_saturate_negative() {
3245 let a = _mm_set1_epi16(-0x8000);
3246 let b = _mm_set1_epi16(-1);
3247 let r = _mm_adds_epi16(a, b);
3248 assert_eq_m128i(r, a);
3249 }
3250
3251 #[simd_test(enable = "sse2")]
3252 unsafe fn test_mm_adds_epu8() {
3253 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3254 #[rustfmt::skip]
3255 let b = _mm_setr_epi8(
3256 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3257 );
3258 let r = _mm_adds_epu8(a, b);
3259 #[rustfmt::skip]
3260 let e = _mm_setr_epi8(
3261 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3262 );
3263 assert_eq_m128i(r, e);
3264 }
3265
3266 #[simd_test(enable = "sse2")]
3267 unsafe fn test_mm_adds_epu8_saturate() {
3268 let a = _mm_set1_epi8(!0);
3269 let b = _mm_set1_epi8(1);
3270 let r = _mm_adds_epu8(a, b);
3271 assert_eq_m128i(r, a);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_adds_epu16() {
3276 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3277 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3278 let r = _mm_adds_epu16(a, b);
3279 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3280 assert_eq_m128i(r, e);
3281 }
3282
3283 #[simd_test(enable = "sse2")]
3284 unsafe fn test_mm_adds_epu16_saturate() {
3285 let a = _mm_set1_epi16(!0);
3286 let b = _mm_set1_epi16(1);
3287 let r = _mm_adds_epu16(a, b);
3288 assert_eq_m128i(r, a);
3289 }
3290
3291 #[simd_test(enable = "sse2")]
3292 unsafe fn test_mm_avg_epu8() {
3293 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3294 let r = _mm_avg_epu8(a, b);
3295 assert_eq_m128i(r, _mm_set1_epi8(6));
3296 }
3297
3298 #[simd_test(enable = "sse2")]
3299 unsafe fn test_mm_avg_epu16() {
3300 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3301 let r = _mm_avg_epu16(a, b);
3302 assert_eq_m128i(r, _mm_set1_epi16(6));
3303 }
3304
3305 #[simd_test(enable = "sse2")]
3306 unsafe fn test_mm_madd_epi16() {
3307 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3308 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3309 let r = _mm_madd_epi16(a, b);
3310 let e = _mm_setr_epi32(29, 81, 149, 233);
3311 assert_eq_m128i(r, e);
3312
3313 let a = _mm_setr_epi16(
3316 i16::MAX,
3317 i16::MAX,
3318 i16::MIN,
3319 i16::MIN,
3320 i16::MIN,
3321 i16::MAX,
3322 0,
3323 0,
3324 );
3325 let b = _mm_setr_epi16(
3326 i16::MAX,
3327 i16::MAX,
3328 i16::MIN,
3329 i16::MIN,
3330 i16::MAX,
3331 i16::MIN,
3332 0,
3333 0,
3334 );
3335 let r = _mm_madd_epi16(a, b);
3336 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3337 assert_eq_m128i(r, e);
3338 }
3339
3340 #[simd_test(enable = "sse2")]
3341 unsafe fn test_mm_max_epi16() {
3342 let a = _mm_set1_epi16(1);
3343 let b = _mm_set1_epi16(-1);
3344 let r = _mm_max_epi16(a, b);
3345 assert_eq_m128i(r, a);
3346 }
3347
3348 #[simd_test(enable = "sse2")]
3349 unsafe fn test_mm_max_epu8() {
3350 let a = _mm_set1_epi8(1);
3351 let b = _mm_set1_epi8(!0);
3352 let r = _mm_max_epu8(a, b);
3353 assert_eq_m128i(r, b);
3354 }
3355
3356 #[simd_test(enable = "sse2")]
3357 unsafe fn test_mm_min_epi16() {
3358 let a = _mm_set1_epi16(1);
3359 let b = _mm_set1_epi16(-1);
3360 let r = _mm_min_epi16(a, b);
3361 assert_eq_m128i(r, b);
3362 }
3363
3364 #[simd_test(enable = "sse2")]
3365 unsafe fn test_mm_min_epu8() {
3366 let a = _mm_set1_epi8(1);
3367 let b = _mm_set1_epi8(!0);
3368 let r = _mm_min_epu8(a, b);
3369 assert_eq_m128i(r, a);
3370 }
3371
3372 #[simd_test(enable = "sse2")]
3373 unsafe fn test_mm_mulhi_epi16() {
3374 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3375 let r = _mm_mulhi_epi16(a, b);
3376 assert_eq_m128i(r, _mm_set1_epi16(-16));
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_mulhi_epu16() {
3381 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3382 let r = _mm_mulhi_epu16(a, b);
3383 assert_eq_m128i(r, _mm_set1_epi16(15));
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_mullo_epi16() {
3388 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3389 let r = _mm_mullo_epi16(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3391 }
3392
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_mul_epu32() {
3395 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3396 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3397 let r = _mm_mul_epu32(a, b);
3398 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3399 assert_eq_m128i(r, e);
3400 }
3401
3402 #[simd_test(enable = "sse2")]
3403 unsafe fn test_mm_sad_epu8() {
3404 #[rustfmt::skip]
3405 let a = _mm_setr_epi8(
3406 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3407 1, 2, 3, 4,
3408 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3409 1, 2, 3, 4,
3410 );
3411 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3412 let r = _mm_sad_epu8(a, b);
3413 let e = _mm_setr_epi64x(1020, 614);
3414 assert_eq_m128i(r, e);
3415 }
3416
3417 #[simd_test(enable = "sse2")]
3418 unsafe fn test_mm_sub_epi8() {
3419 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3420 let r = _mm_sub_epi8(a, b);
3421 assert_eq_m128i(r, _mm_set1_epi8(-1));
3422 }
3423
3424 #[simd_test(enable = "sse2")]
3425 unsafe fn test_mm_sub_epi16() {
3426 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3427 let r = _mm_sub_epi16(a, b);
3428 assert_eq_m128i(r, _mm_set1_epi16(-1));
3429 }
3430
3431 #[simd_test(enable = "sse2")]
3432 unsafe fn test_mm_sub_epi32() {
3433 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3434 let r = _mm_sub_epi32(a, b);
3435 assert_eq_m128i(r, _mm_set1_epi32(-1));
3436 }
3437
3438 #[simd_test(enable = "sse2")]
3439 unsafe fn test_mm_sub_epi64() {
3440 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3441 let r = _mm_sub_epi64(a, b);
3442 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3443 }
3444
3445 #[simd_test(enable = "sse2")]
3446 unsafe fn test_mm_subs_epi8() {
3447 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3448 let r = _mm_subs_epi8(a, b);
3449 assert_eq_m128i(r, _mm_set1_epi8(3));
3450 }
3451
3452 #[simd_test(enable = "sse2")]
3453 unsafe fn test_mm_subs_epi8_saturate_positive() {
3454 let a = _mm_set1_epi8(0x7F);
3455 let b = _mm_set1_epi8(-1);
3456 let r = _mm_subs_epi8(a, b);
3457 assert_eq_m128i(r, a);
3458 }
3459
3460 #[simd_test(enable = "sse2")]
3461 unsafe fn test_mm_subs_epi8_saturate_negative() {
3462 let a = _mm_set1_epi8(-0x80);
3463 let b = _mm_set1_epi8(1);
3464 let r = _mm_subs_epi8(a, b);
3465 assert_eq_m128i(r, a);
3466 }
3467
3468 #[simd_test(enable = "sse2")]
3469 unsafe fn test_mm_subs_epi16() {
3470 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3471 let r = _mm_subs_epi16(a, b);
3472 assert_eq_m128i(r, _mm_set1_epi16(3));
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epi16_saturate_positive() {
3477 let a = _mm_set1_epi16(0x7FFF);
3478 let b = _mm_set1_epi16(-1);
3479 let r = _mm_subs_epi16(a, b);
3480 assert_eq_m128i(r, a);
3481 }
3482
3483 #[simd_test(enable = "sse2")]
3484 unsafe fn test_mm_subs_epi16_saturate_negative() {
3485 let a = _mm_set1_epi16(-0x8000);
3486 let b = _mm_set1_epi16(1);
3487 let r = _mm_subs_epi16(a, b);
3488 assert_eq_m128i(r, a);
3489 }
3490
3491 #[simd_test(enable = "sse2")]
3492 unsafe fn test_mm_subs_epu8() {
3493 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3494 let r = _mm_subs_epu8(a, b);
3495 assert_eq_m128i(r, _mm_set1_epi8(3));
3496 }
3497
3498 #[simd_test(enable = "sse2")]
3499 unsafe fn test_mm_subs_epu8_saturate() {
3500 let a = _mm_set1_epi8(0);
3501 let b = _mm_set1_epi8(1);
3502 let r = _mm_subs_epu8(a, b);
3503 assert_eq_m128i(r, a);
3504 }
3505
3506 #[simd_test(enable = "sse2")]
3507 unsafe fn test_mm_subs_epu16() {
3508 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3509 let r = _mm_subs_epu16(a, b);
3510 assert_eq_m128i(r, _mm_set1_epi16(3));
3511 }
3512
3513 #[simd_test(enable = "sse2")]
3514 unsafe fn test_mm_subs_epu16_saturate() {
3515 let a = _mm_set1_epi16(0);
3516 let b = _mm_set1_epi16(1);
3517 let r = _mm_subs_epu16(a, b);
3518 assert_eq_m128i(r, a);
3519 }
3520
3521 #[simd_test(enable = "sse2")]
3522 unsafe fn test_mm_slli_si128() {
3523 #[rustfmt::skip]
3524 let a = _mm_setr_epi8(
3525 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3526 );
3527 let r = _mm_slli_si128::<1>(a);
3528 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3529 assert_eq_m128i(r, e);
3530
3531 #[rustfmt::skip]
3532 let a = _mm_setr_epi8(
3533 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3534 );
3535 let r = _mm_slli_si128::<15>(a);
3536 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3537 assert_eq_m128i(r, e);
3538
3539 #[rustfmt::skip]
3540 let a = _mm_setr_epi8(
3541 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3542 );
3543 let r = _mm_slli_si128::<16>(a);
3544 assert_eq_m128i(r, _mm_set1_epi8(0));
3545 }
3546
3547 #[simd_test(enable = "sse2")]
3548 unsafe fn test_mm_slli_epi16() {
3549 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3550 let r = _mm_slli_epi16::<4>(a);
3551 assert_eq_m128i(
3552 r,
3553 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3554 );
3555 let r = _mm_slli_epi16::<16>(a);
3556 assert_eq_m128i(r, _mm_set1_epi16(0));
3557 }
3558
3559 #[simd_test(enable = "sse2")]
3560 unsafe fn test_mm_sll_epi16() {
3561 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3562 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3563 assert_eq_m128i(
3564 r,
3565 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3566 );
3567 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3568 assert_eq_m128i(r, a);
3569 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3570 assert_eq_m128i(r, _mm_set1_epi16(0));
3571 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3572 assert_eq_m128i(r, _mm_set1_epi16(0));
3573 }
3574
3575 #[simd_test(enable = "sse2")]
3576 unsafe fn test_mm_slli_epi32() {
3577 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3578 let r = _mm_slli_epi32::<4>(a);
3579 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3580 let r = _mm_slli_epi32::<32>(a);
3581 assert_eq_m128i(r, _mm_set1_epi32(0));
3582 }
3583
3584 #[simd_test(enable = "sse2")]
3585 unsafe fn test_mm_sll_epi32() {
3586 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3587 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3588 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3589 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3590 assert_eq_m128i(r, a);
3591 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3592 assert_eq_m128i(r, _mm_set1_epi32(0));
3593 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3594 assert_eq_m128i(r, _mm_set1_epi32(0));
3595 }
3596
3597 #[simd_test(enable = "sse2")]
3598 unsafe fn test_mm_slli_epi64() {
3599 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3600 let r = _mm_slli_epi64::<4>(a);
3601 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3602 let r = _mm_slli_epi64::<64>(a);
3603 assert_eq_m128i(r, _mm_set1_epi64x(0));
3604 }
3605
3606 #[simd_test(enable = "sse2")]
3607 unsafe fn test_mm_sll_epi64() {
3608 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3609 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3610 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3611 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3612 assert_eq_m128i(r, a);
3613 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3614 assert_eq_m128i(r, _mm_set1_epi64x(0));
3615 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3616 assert_eq_m128i(r, _mm_set1_epi64x(0));
3617 }
3618
3619 #[simd_test(enable = "sse2")]
3620 unsafe fn test_mm_srai_epi16() {
3621 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3622 let r = _mm_srai_epi16::<4>(a);
3623 assert_eq_m128i(
3624 r,
3625 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3626 );
3627 let r = _mm_srai_epi16::<16>(a);
3628 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3629 }
3630
3631 #[simd_test(enable = "sse2")]
3632 unsafe fn test_mm_sra_epi16() {
3633 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3634 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3635 assert_eq_m128i(
3636 r,
3637 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3638 );
3639 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3640 assert_eq_m128i(r, a);
3641 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3642 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3643 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3644 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3645 }
3646
3647 #[simd_test(enable = "sse2")]
3648 unsafe fn test_mm_srai_epi32() {
3649 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3650 let r = _mm_srai_epi32::<4>(a);
3651 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3652 let r = _mm_srai_epi32::<32>(a);
3653 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3654 }
3655
3656 #[simd_test(enable = "sse2")]
3657 unsafe fn test_mm_sra_epi32() {
3658 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3659 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3660 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3661 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3662 assert_eq_m128i(r, a);
3663 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3664 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3665 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3666 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3667 }
3668
3669 #[simd_test(enable = "sse2")]
3670 unsafe fn test_mm_srli_si128() {
3671 #[rustfmt::skip]
3672 let a = _mm_setr_epi8(
3673 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3674 );
3675 let r = _mm_srli_si128::<1>(a);
3676 #[rustfmt::skip]
3677 let e = _mm_setr_epi8(
3678 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3679 );
3680 assert_eq_m128i(r, e);
3681
3682 #[rustfmt::skip]
3683 let a = _mm_setr_epi8(
3684 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3685 );
3686 let r = _mm_srli_si128::<15>(a);
3687 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3688 assert_eq_m128i(r, e);
3689
3690 #[rustfmt::skip]
3691 let a = _mm_setr_epi8(
3692 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3693 );
3694 let r = _mm_srli_si128::<16>(a);
3695 assert_eq_m128i(r, _mm_set1_epi8(0));
3696 }
3697
3698 #[simd_test(enable = "sse2")]
3699 unsafe fn test_mm_srli_epi16() {
3700 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3701 let r = _mm_srli_epi16::<4>(a);
3702 assert_eq_m128i(
3703 r,
3704 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3705 );
3706 let r = _mm_srli_epi16::<16>(a);
3707 assert_eq_m128i(r, _mm_set1_epi16(0));
3708 }
3709
3710 #[simd_test(enable = "sse2")]
3711 unsafe fn test_mm_srl_epi16() {
3712 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3713 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3714 assert_eq_m128i(
3715 r,
3716 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3717 );
3718 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3719 assert_eq_m128i(r, a);
3720 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3721 assert_eq_m128i(r, _mm_set1_epi16(0));
3722 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3723 assert_eq_m128i(r, _mm_set1_epi16(0));
3724 }
3725
3726 #[simd_test(enable = "sse2")]
3727 unsafe fn test_mm_srli_epi32() {
3728 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3729 let r = _mm_srli_epi32::<4>(a);
3730 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3731 let r = _mm_srli_epi32::<32>(a);
3732 assert_eq_m128i(r, _mm_set1_epi32(0));
3733 }
3734
3735 #[simd_test(enable = "sse2")]
3736 unsafe fn test_mm_srl_epi32() {
3737 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3738 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3739 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3740 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3741 assert_eq_m128i(r, a);
3742 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3743 assert_eq_m128i(r, _mm_set1_epi32(0));
3744 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3745 assert_eq_m128i(r, _mm_set1_epi32(0));
3746 }
3747
3748 #[simd_test(enable = "sse2")]
3749 unsafe fn test_mm_srli_epi64() {
3750 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3751 let r = _mm_srli_epi64::<4>(a);
3752 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3753 let r = _mm_srli_epi64::<64>(a);
3754 assert_eq_m128i(r, _mm_set1_epi64x(0));
3755 }
3756
3757 #[simd_test(enable = "sse2")]
3758 unsafe fn test_mm_srl_epi64() {
3759 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3760 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3761 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3762 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3763 assert_eq_m128i(r, a);
3764 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3765 assert_eq_m128i(r, _mm_set1_epi64x(0));
3766 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3767 assert_eq_m128i(r, _mm_set1_epi64x(0));
3768 }
3769
3770 #[simd_test(enable = "sse2")]
3771 unsafe fn test_mm_and_si128() {
3772 let a = _mm_set1_epi8(5);
3773 let b = _mm_set1_epi8(3);
3774 let r = _mm_and_si128(a, b);
3775 assert_eq_m128i(r, _mm_set1_epi8(1));
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_andnot_si128() {
3780 let a = _mm_set1_epi8(5);
3781 let b = _mm_set1_epi8(3);
3782 let r = _mm_andnot_si128(a, b);
3783 assert_eq_m128i(r, _mm_set1_epi8(2));
3784 }
3785
3786 #[simd_test(enable = "sse2")]
3787 unsafe fn test_mm_or_si128() {
3788 let a = _mm_set1_epi8(5);
3789 let b = _mm_set1_epi8(3);
3790 let r = _mm_or_si128(a, b);
3791 assert_eq_m128i(r, _mm_set1_epi8(7));
3792 }
3793
3794 #[simd_test(enable = "sse2")]
3795 unsafe fn test_mm_xor_si128() {
3796 let a = _mm_set1_epi8(5);
3797 let b = _mm_set1_epi8(3);
3798 let r = _mm_xor_si128(a, b);
3799 assert_eq_m128i(r, _mm_set1_epi8(6));
3800 }
3801
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_cmpeq_epi8() {
3804 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3805 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3806 let r = _mm_cmpeq_epi8(a, b);
3807 #[rustfmt::skip]
3808 assert_eq_m128i(
3809 r,
3810 _mm_setr_epi8(
3811 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3812 )
3813 );
3814 }
3815
3816 #[simd_test(enable = "sse2")]
3817 unsafe fn test_mm_cmpeq_epi16() {
3818 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3819 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3820 let r = _mm_cmpeq_epi16(a, b);
3821 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3822 }
3823
3824 #[simd_test(enable = "sse2")]
3825 unsafe fn test_mm_cmpeq_epi32() {
3826 let a = _mm_setr_epi32(0, 1, 2, 3);
3827 let b = _mm_setr_epi32(3, 2, 2, 0);
3828 let r = _mm_cmpeq_epi32(a, b);
3829 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3830 }
3831
3832 #[simd_test(enable = "sse2")]
3833 unsafe fn test_mm_cmpgt_epi8() {
3834 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3835 let b = _mm_set1_epi8(0);
3836 let r = _mm_cmpgt_epi8(a, b);
3837 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3838 assert_eq_m128i(r, e);
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 unsafe fn test_mm_cmpgt_epi16() {
3843 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3844 let b = _mm_set1_epi16(0);
3845 let r = _mm_cmpgt_epi16(a, b);
3846 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3847 assert_eq_m128i(r, e);
3848 }
3849
3850 #[simd_test(enable = "sse2")]
3851 unsafe fn test_mm_cmpgt_epi32() {
3852 let a = _mm_set_epi32(5, 0, 0, 0);
3853 let b = _mm_set1_epi32(0);
3854 let r = _mm_cmpgt_epi32(a, b);
3855 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3856 }
3857
3858 #[simd_test(enable = "sse2")]
3859 unsafe fn test_mm_cmplt_epi8() {
3860 let a = _mm_set1_epi8(0);
3861 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3862 let r = _mm_cmplt_epi8(a, b);
3863 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3864 assert_eq_m128i(r, e);
3865 }
3866
3867 #[simd_test(enable = "sse2")]
3868 unsafe fn test_mm_cmplt_epi16() {
3869 let a = _mm_set1_epi16(0);
3870 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3871 let r = _mm_cmplt_epi16(a, b);
3872 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3873 assert_eq_m128i(r, e);
3874 }
3875
3876 #[simd_test(enable = "sse2")]
3877 unsafe fn test_mm_cmplt_epi32() {
3878 let a = _mm_set1_epi32(0);
3879 let b = _mm_set_epi32(5, 0, 0, 0);
3880 let r = _mm_cmplt_epi32(a, b);
3881 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3882 }
3883
3884 #[simd_test(enable = "sse2")]
3885 unsafe fn test_mm_cvtepi32_pd() {
3886 let a = _mm_set_epi32(35, 25, 15, 5);
3887 let r = _mm_cvtepi32_pd(a);
3888 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3889 }
3890
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_cvtsi32_sd() {
3893 let a = _mm_set1_pd(3.5);
3894 let r = _mm_cvtsi32_sd(a, 5);
3895 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3896 }
3897
3898 #[simd_test(enable = "sse2")]
3899 unsafe fn test_mm_cvtepi32_ps() {
3900 let a = _mm_setr_epi32(1, 2, 3, 4);
3901 let r = _mm_cvtepi32_ps(a);
3902 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3903 }
3904
3905 #[simd_test(enable = "sse2")]
3906 unsafe fn test_mm_cvtps_epi32() {
3907 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3908 let r = _mm_cvtps_epi32(a);
3909 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3910 }
3911
3912 #[simd_test(enable = "sse2")]
3913 unsafe fn test_mm_cvtsi32_si128() {
3914 let r = _mm_cvtsi32_si128(5);
3915 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3916 }
3917
3918 #[simd_test(enable = "sse2")]
3919 unsafe fn test_mm_cvtsi128_si32() {
3920 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3921 assert_eq!(r, 5);
3922 }
3923
3924 #[simd_test(enable = "sse2")]
3925 unsafe fn test_mm_set_epi64x() {
3926 let r = _mm_set_epi64x(0, 1);
3927 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3928 }
3929
3930 #[simd_test(enable = "sse2")]
3931 unsafe fn test_mm_set_epi32() {
3932 let r = _mm_set_epi32(0, 1, 2, 3);
3933 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3934 }
3935
3936 #[simd_test(enable = "sse2")]
3937 unsafe fn test_mm_set_epi16() {
3938 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3939 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3940 }
3941
3942 #[simd_test(enable = "sse2")]
3943 unsafe fn test_mm_set_epi8() {
3944 #[rustfmt::skip]
3945 let r = _mm_set_epi8(
3946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3947 );
3948 #[rustfmt::skip]
3949 let e = _mm_setr_epi8(
3950 15, 14, 13, 12, 11, 10, 9, 8,
3951 7, 6, 5, 4, 3, 2, 1, 0,
3952 );
3953 assert_eq_m128i(r, e);
3954 }
3955
3956 #[simd_test(enable = "sse2")]
3957 unsafe fn test_mm_set1_epi64x() {
3958 let r = _mm_set1_epi64x(1);
3959 assert_eq_m128i(r, _mm_set1_epi64x(1));
3960 }
3961
3962 #[simd_test(enable = "sse2")]
3963 unsafe fn test_mm_set1_epi32() {
3964 let r = _mm_set1_epi32(1);
3965 assert_eq_m128i(r, _mm_set1_epi32(1));
3966 }
3967
3968 #[simd_test(enable = "sse2")]
3969 unsafe fn test_mm_set1_epi16() {
3970 let r = _mm_set1_epi16(1);
3971 assert_eq_m128i(r, _mm_set1_epi16(1));
3972 }
3973
3974 #[simd_test(enable = "sse2")]
3975 unsafe fn test_mm_set1_epi8() {
3976 let r = _mm_set1_epi8(1);
3977 assert_eq_m128i(r, _mm_set1_epi8(1));
3978 }
3979
3980 #[simd_test(enable = "sse2")]
3981 unsafe fn test_mm_setr_epi32() {
3982 let r = _mm_setr_epi32(0, 1, 2, 3);
3983 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3984 }
3985
3986 #[simd_test(enable = "sse2")]
3987 unsafe fn test_mm_setr_epi16() {
3988 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3989 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 unsafe fn test_mm_setr_epi8() {
3994 #[rustfmt::skip]
3995 let r = _mm_setr_epi8(
3996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3997 );
3998 #[rustfmt::skip]
3999 let e = _mm_setr_epi8(
4000 0, 1, 2, 3, 4, 5, 6, 7,
4001 8, 9, 10, 11, 12, 13, 14, 15,
4002 );
4003 assert_eq_m128i(r, e);
4004 }
4005
4006 #[simd_test(enable = "sse2")]
4007 unsafe fn test_mm_setzero_si128() {
4008 let r = _mm_setzero_si128();
4009 assert_eq_m128i(r, _mm_set1_epi64x(0));
4010 }
4011
4012 #[simd_test(enable = "sse2")]
4013 unsafe fn test_mm_loadl_epi64() {
4014 let a = _mm_setr_epi64x(6, 5);
4015 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4016 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4017 }
4018
4019 #[simd_test(enable = "sse2")]
4020 unsafe fn test_mm_load_si128() {
4021 let a = _mm_set_epi64x(5, 6);
4022 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4023 assert_eq_m128i(a, r);
4024 }
4025
4026 #[simd_test(enable = "sse2")]
4027 unsafe fn test_mm_loadu_si128() {
4028 let a = _mm_set_epi64x(5, 6);
4029 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4030 assert_eq_m128i(a, r);
4031 }
4032
4033 #[simd_test(enable = "sse2")]
4034 #[cfg_attr(miri, ignore)]
4037 unsafe fn test_mm_maskmoveu_si128() {
4038 let a = _mm_set1_epi8(9);
4039 #[rustfmt::skip]
4040 let mask = _mm_set_epi8(
4041 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4042 0, 0, 0, 0, 0, 0, 0, 0,
4043 );
4044 let mut r = _mm_set1_epi8(0);
4045 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4046 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4047 assert_eq_m128i(r, e);
4048 }
4049
4050 #[simd_test(enable = "sse2")]
4051 unsafe fn test_mm_store_si128() {
4052 let a = _mm_set1_epi8(9);
4053 let mut r = _mm_set1_epi8(0);
4054 _mm_store_si128(&mut r, a);
4055 assert_eq_m128i(r, a);
4056 }
4057
4058 #[simd_test(enable = "sse2")]
4059 unsafe fn test_mm_storeu_si128() {
4060 let a = _mm_set1_epi8(9);
4061 let mut r = _mm_set1_epi8(0);
4062 _mm_storeu_si128(&mut r, a);
4063 assert_eq_m128i(r, a);
4064 }
4065
4066 #[simd_test(enable = "sse2")]
4067 unsafe fn test_mm_storel_epi64() {
4068 let a = _mm_setr_epi64x(2, 9);
4069 let mut r = _mm_set1_epi8(0);
4070 _mm_storel_epi64(&mut r, a);
4071 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4072 }
4073
4074 #[simd_test(enable = "sse2")]
4075 #[cfg_attr(miri, ignore)]
4078 unsafe fn test_mm_stream_si128() {
4079 let a = _mm_setr_epi32(1, 2, 3, 4);
4080 let mut r = _mm_undefined_si128();
4081 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4082 assert_eq_m128i(r, a);
4083 }
4084
4085 #[simd_test(enable = "sse2")]
4086 #[cfg_attr(miri, ignore)]
4089 unsafe fn test_mm_stream_si32() {
4090 let a: i32 = 7;
4091 let mut mem = boxed::Box::<i32>::new(-1);
4092 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4093 assert_eq!(a, *mem);
4094 }
4095
4096 #[simd_test(enable = "sse2")]
4097 unsafe fn test_mm_move_epi64() {
4098 let a = _mm_setr_epi64x(5, 6);
4099 let r = _mm_move_epi64(a);
4100 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4101 }
4102
4103 #[simd_test(enable = "sse2")]
4104 unsafe fn test_mm_packs_epi16() {
4105 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4106 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4107 let r = _mm_packs_epi16(a, b);
4108 #[rustfmt::skip]
4109 assert_eq_m128i(
4110 r,
4111 _mm_setr_epi8(
4112 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4113 )
4114 );
4115 }
4116
4117 #[simd_test(enable = "sse2")]
4118 unsafe fn test_mm_packs_epi32() {
4119 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4120 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4121 let r = _mm_packs_epi32(a, b);
4122 assert_eq_m128i(
4123 r,
4124 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4125 );
4126 }
4127
4128 #[simd_test(enable = "sse2")]
4129 unsafe fn test_mm_packus_epi16() {
4130 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4131 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4132 let r = _mm_packus_epi16(a, b);
4133 assert_eq_m128i(
4134 r,
4135 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4136 );
4137 }
4138
4139 #[simd_test(enable = "sse2")]
4140 unsafe fn test_mm_extract_epi16() {
4141 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4142 let r1 = _mm_extract_epi16::<0>(a);
4143 let r2 = _mm_extract_epi16::<3>(a);
4144 assert_eq!(r1, 0xFFFF);
4145 assert_eq!(r2, 3);
4146 }
4147
4148 #[simd_test(enable = "sse2")]
4149 unsafe fn test_mm_insert_epi16() {
4150 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4151 let r = _mm_insert_epi16::<0>(a, 9);
4152 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4153 assert_eq_m128i(r, e);
4154 }
4155
4156 #[simd_test(enable = "sse2")]
4157 unsafe fn test_mm_movemask_epi8() {
4158 #[rustfmt::skip]
4159 let a = _mm_setr_epi8(
4160 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4161 0b0101, 0b1111_0000u8 as i8, 0, 0,
4162 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4163 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4164 );
4165 let r = _mm_movemask_epi8(a);
4166 assert_eq!(r, 0b10100110_00100101);
4167 }
4168
4169 #[simd_test(enable = "sse2")]
4170 unsafe fn test_mm_shuffle_epi32() {
4171 let a = _mm_setr_epi32(5, 10, 15, 20);
4172 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4173 let e = _mm_setr_epi32(20, 10, 10, 5);
4174 assert_eq_m128i(r, e);
4175 }
4176
4177 #[simd_test(enable = "sse2")]
4178 unsafe fn test_mm_shufflehi_epi16() {
4179 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4180 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4181 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4182 assert_eq_m128i(r, e);
4183 }
4184
4185 #[simd_test(enable = "sse2")]
4186 unsafe fn test_mm_shufflelo_epi16() {
4187 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4188 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4189 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4190 assert_eq_m128i(r, e);
4191 }
4192
4193 #[simd_test(enable = "sse2")]
4194 unsafe fn test_mm_unpackhi_epi8() {
4195 #[rustfmt::skip]
4196 let a = _mm_setr_epi8(
4197 0, 1, 2, 3, 4, 5, 6, 7,
4198 8, 9, 10, 11, 12, 13, 14, 15,
4199 );
4200 #[rustfmt::skip]
4201 let b = _mm_setr_epi8(
4202 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4203 );
4204 let r = _mm_unpackhi_epi8(a, b);
4205 #[rustfmt::skip]
4206 let e = _mm_setr_epi8(
4207 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4208 );
4209 assert_eq_m128i(r, e);
4210 }
4211
4212 #[simd_test(enable = "sse2")]
4213 unsafe fn test_mm_unpackhi_epi16() {
4214 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4215 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4216 let r = _mm_unpackhi_epi16(a, b);
4217 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4218 assert_eq_m128i(r, e);
4219 }
4220
4221 #[simd_test(enable = "sse2")]
4222 unsafe fn test_mm_unpackhi_epi32() {
4223 let a = _mm_setr_epi32(0, 1, 2, 3);
4224 let b = _mm_setr_epi32(4, 5, 6, 7);
4225 let r = _mm_unpackhi_epi32(a, b);
4226 let e = _mm_setr_epi32(2, 6, 3, 7);
4227 assert_eq_m128i(r, e);
4228 }
4229
4230 #[simd_test(enable = "sse2")]
4231 unsafe fn test_mm_unpackhi_epi64() {
4232 let a = _mm_setr_epi64x(0, 1);
4233 let b = _mm_setr_epi64x(2, 3);
4234 let r = _mm_unpackhi_epi64(a, b);
4235 let e = _mm_setr_epi64x(1, 3);
4236 assert_eq_m128i(r, e);
4237 }
4238
4239 #[simd_test(enable = "sse2")]
4240 unsafe fn test_mm_unpacklo_epi8() {
4241 #[rustfmt::skip]
4242 let a = _mm_setr_epi8(
4243 0, 1, 2, 3, 4, 5, 6, 7,
4244 8, 9, 10, 11, 12, 13, 14, 15,
4245 );
4246 #[rustfmt::skip]
4247 let b = _mm_setr_epi8(
4248 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4249 );
4250 let r = _mm_unpacklo_epi8(a, b);
4251 #[rustfmt::skip]
4252 let e = _mm_setr_epi8(
4253 0, 16, 1, 17, 2, 18, 3, 19,
4254 4, 20, 5, 21, 6, 22, 7, 23,
4255 );
4256 assert_eq_m128i(r, e);
4257 }
4258
4259 #[simd_test(enable = "sse2")]
4260 unsafe fn test_mm_unpacklo_epi16() {
4261 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4262 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4263 let r = _mm_unpacklo_epi16(a, b);
4264 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4265 assert_eq_m128i(r, e);
4266 }
4267
4268 #[simd_test(enable = "sse2")]
4269 unsafe fn test_mm_unpacklo_epi32() {
4270 let a = _mm_setr_epi32(0, 1, 2, 3);
4271 let b = _mm_setr_epi32(4, 5, 6, 7);
4272 let r = _mm_unpacklo_epi32(a, b);
4273 let e = _mm_setr_epi32(0, 4, 1, 5);
4274 assert_eq_m128i(r, e);
4275 }
4276
4277 #[simd_test(enable = "sse2")]
4278 unsafe fn test_mm_unpacklo_epi64() {
4279 let a = _mm_setr_epi64x(0, 1);
4280 let b = _mm_setr_epi64x(2, 3);
4281 let r = _mm_unpacklo_epi64(a, b);
4282 let e = _mm_setr_epi64x(0, 2);
4283 assert_eq_m128i(r, e);
4284 }
4285
4286 #[simd_test(enable = "sse2")]
4287 unsafe fn test_mm_add_sd() {
4288 let a = _mm_setr_pd(1.0, 2.0);
4289 let b = _mm_setr_pd(5.0, 10.0);
4290 let r = _mm_add_sd(a, b);
4291 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4292 }
4293
4294 #[simd_test(enable = "sse2")]
4295 unsafe fn test_mm_add_pd() {
4296 let a = _mm_setr_pd(1.0, 2.0);
4297 let b = _mm_setr_pd(5.0, 10.0);
4298 let r = _mm_add_pd(a, b);
4299 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4300 }
4301
4302 #[simd_test(enable = "sse2")]
4303 unsafe fn test_mm_div_sd() {
4304 let a = _mm_setr_pd(1.0, 2.0);
4305 let b = _mm_setr_pd(5.0, 10.0);
4306 let r = _mm_div_sd(a, b);
4307 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 unsafe fn test_mm_div_pd() {
4312 let a = _mm_setr_pd(1.0, 2.0);
4313 let b = _mm_setr_pd(5.0, 10.0);
4314 let r = _mm_div_pd(a, b);
4315 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4316 }
4317
4318 #[simd_test(enable = "sse2")]
4319 unsafe fn test_mm_max_sd() {
4320 let a = _mm_setr_pd(1.0, 2.0);
4321 let b = _mm_setr_pd(5.0, 10.0);
4322 let r = _mm_max_sd(a, b);
4323 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4324 }
4325
4326 #[simd_test(enable = "sse2")]
4327 unsafe fn test_mm_max_pd() {
4328 let a = _mm_setr_pd(1.0, 2.0);
4329 let b = _mm_setr_pd(5.0, 10.0);
4330 let r = _mm_max_pd(a, b);
4331 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4332
4333 let a = _mm_setr_pd(-0.0, 0.0);
4335 let b = _mm_setr_pd(0.0, 0.0);
4336 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4337 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4338 let a: [u8; 16] = transmute(a);
4339 let b: [u8; 16] = transmute(b);
4340 assert_eq!(r1, b);
4341 assert_eq!(r2, a);
4342 assert_ne!(a, b); }
4344
4345 #[simd_test(enable = "sse2")]
4346 unsafe fn test_mm_min_sd() {
4347 let a = _mm_setr_pd(1.0, 2.0);
4348 let b = _mm_setr_pd(5.0, 10.0);
4349 let r = _mm_min_sd(a, b);
4350 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4351 }
4352
4353 #[simd_test(enable = "sse2")]
4354 unsafe fn test_mm_min_pd() {
4355 let a = _mm_setr_pd(1.0, 2.0);
4356 let b = _mm_setr_pd(5.0, 10.0);
4357 let r = _mm_min_pd(a, b);
4358 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4359
4360 let a = _mm_setr_pd(-0.0, 0.0);
4362 let b = _mm_setr_pd(0.0, 0.0);
4363 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4364 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4365 let a: [u8; 16] = transmute(a);
4366 let b: [u8; 16] = transmute(b);
4367 assert_eq!(r1, b);
4368 assert_eq!(r2, a);
4369 assert_ne!(a, b); }
4371
4372 #[simd_test(enable = "sse2")]
4373 unsafe fn test_mm_mul_sd() {
4374 let a = _mm_setr_pd(1.0, 2.0);
4375 let b = _mm_setr_pd(5.0, 10.0);
4376 let r = _mm_mul_sd(a, b);
4377 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4378 }
4379
4380 #[simd_test(enable = "sse2")]
4381 unsafe fn test_mm_mul_pd() {
4382 let a = _mm_setr_pd(1.0, 2.0);
4383 let b = _mm_setr_pd(5.0, 10.0);
4384 let r = _mm_mul_pd(a, b);
4385 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4386 }
4387
4388 #[simd_test(enable = "sse2")]
4389 unsafe fn test_mm_sqrt_sd() {
4390 let a = _mm_setr_pd(1.0, 2.0);
4391 let b = _mm_setr_pd(5.0, 10.0);
4392 let r = _mm_sqrt_sd(a, b);
4393 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4394 }
4395
4396 #[simd_test(enable = "sse2")]
4397 unsafe fn test_mm_sqrt_pd() {
4398 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4399 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4400 }
4401
4402 #[simd_test(enable = "sse2")]
4403 unsafe fn test_mm_sub_sd() {
4404 let a = _mm_setr_pd(1.0, 2.0);
4405 let b = _mm_setr_pd(5.0, 10.0);
4406 let r = _mm_sub_sd(a, b);
4407 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4408 }
4409
4410 #[simd_test(enable = "sse2")]
4411 unsafe fn test_mm_sub_pd() {
4412 let a = _mm_setr_pd(1.0, 2.0);
4413 let b = _mm_setr_pd(5.0, 10.0);
4414 let r = _mm_sub_pd(a, b);
4415 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4416 }
4417
4418 #[simd_test(enable = "sse2")]
4419 unsafe fn test_mm_and_pd() {
4420 let a = transmute(u64x2::splat(5));
4421 let b = transmute(u64x2::splat(3));
4422 let r = _mm_and_pd(a, b);
4423 let e = transmute(u64x2::splat(1));
4424 assert_eq_m128d(r, e);
4425 }
4426
4427 #[simd_test(enable = "sse2")]
4428 unsafe fn test_mm_andnot_pd() {
4429 let a = transmute(u64x2::splat(5));
4430 let b = transmute(u64x2::splat(3));
4431 let r = _mm_andnot_pd(a, b);
4432 let e = transmute(u64x2::splat(2));
4433 assert_eq_m128d(r, e);
4434 }
4435
4436 #[simd_test(enable = "sse2")]
4437 unsafe fn test_mm_or_pd() {
4438 let a = transmute(u64x2::splat(5));
4439 let b = transmute(u64x2::splat(3));
4440 let r = _mm_or_pd(a, b);
4441 let e = transmute(u64x2::splat(7));
4442 assert_eq_m128d(r, e);
4443 }
4444
4445 #[simd_test(enable = "sse2")]
4446 unsafe fn test_mm_xor_pd() {
4447 let a = transmute(u64x2::splat(5));
4448 let b = transmute(u64x2::splat(3));
4449 let r = _mm_xor_pd(a, b);
4450 let e = transmute(u64x2::splat(6));
4451 assert_eq_m128d(r, e);
4452 }
4453
4454 #[simd_test(enable = "sse2")]
4455 unsafe fn test_mm_cmpeq_sd() {
4456 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4457 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4458 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4459 assert_eq_m128i(r, e);
4460 }
4461
4462 #[simd_test(enable = "sse2")]
4463 unsafe fn test_mm_cmplt_sd() {
4464 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4465 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4466 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4467 assert_eq_m128i(r, e);
4468 }
4469
4470 #[simd_test(enable = "sse2")]
4471 unsafe fn test_mm_cmple_sd() {
4472 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4473 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4474 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4475 assert_eq_m128i(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_cmpgt_sd() {
4480 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4481 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4482 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4483 assert_eq_m128i(r, e);
4484 }
4485
4486 #[simd_test(enable = "sse2")]
4487 unsafe fn test_mm_cmpge_sd() {
4488 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4489 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4490 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4491 assert_eq_m128i(r, e);
4492 }
4493
4494 #[simd_test(enable = "sse2")]
4495 unsafe fn test_mm_cmpord_sd() {
4496 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4497 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4498 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4499 assert_eq_m128i(r, e);
4500 }
4501
4502 #[simd_test(enable = "sse2")]
4503 unsafe fn test_mm_cmpunord_sd() {
4504 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4505 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4506 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4507 assert_eq_m128i(r, e);
4508 }
4509
4510 #[simd_test(enable = "sse2")]
4511 unsafe fn test_mm_cmpneq_sd() {
4512 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4513 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4514 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4515 assert_eq_m128i(r, e);
4516 }
4517
4518 #[simd_test(enable = "sse2")]
4519 unsafe fn test_mm_cmpnlt_sd() {
4520 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4521 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4522 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4523 assert_eq_m128i(r, e);
4524 }
4525
4526 #[simd_test(enable = "sse2")]
4527 unsafe fn test_mm_cmpnle_sd() {
4528 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4529 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4530 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4531 assert_eq_m128i(r, e);
4532 }
4533
4534 #[simd_test(enable = "sse2")]
4535 unsafe fn test_mm_cmpngt_sd() {
4536 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4537 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4538 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4539 assert_eq_m128i(r, e);
4540 }
4541
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_cmpnge_sd() {
4544 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4546 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4547 assert_eq_m128i(r, e);
4548 }
4549
4550 #[simd_test(enable = "sse2")]
4551 unsafe fn test_mm_cmpeq_pd() {
4552 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4553 let e = _mm_setr_epi64x(!0, 0);
4554 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4555 assert_eq_m128i(r, e);
4556 }
4557
4558 #[simd_test(enable = "sse2")]
4559 unsafe fn test_mm_cmplt_pd() {
4560 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4561 let e = _mm_setr_epi64x(0, !0);
4562 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4563 assert_eq_m128i(r, e);
4564 }
4565
4566 #[simd_test(enable = "sse2")]
4567 unsafe fn test_mm_cmple_pd() {
4568 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4569 let e = _mm_setr_epi64x(!0, !0);
4570 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4571 assert_eq_m128i(r, e);
4572 }
4573
4574 #[simd_test(enable = "sse2")]
4575 unsafe fn test_mm_cmpgt_pd() {
4576 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4577 let e = _mm_setr_epi64x(0, 0);
4578 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4579 assert_eq_m128i(r, e);
4580 }
4581
4582 #[simd_test(enable = "sse2")]
4583 unsafe fn test_mm_cmpge_pd() {
4584 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4585 let e = _mm_setr_epi64x(!0, 0);
4586 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4587 assert_eq_m128i(r, e);
4588 }
4589
4590 #[simd_test(enable = "sse2")]
4591 unsafe fn test_mm_cmpord_pd() {
4592 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4593 let e = _mm_setr_epi64x(0, !0);
4594 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4595 assert_eq_m128i(r, e);
4596 }
4597
4598 #[simd_test(enable = "sse2")]
4599 unsafe fn test_mm_cmpunord_pd() {
4600 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4601 let e = _mm_setr_epi64x(!0, 0);
4602 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4603 assert_eq_m128i(r, e);
4604 }
4605
4606 #[simd_test(enable = "sse2")]
4607 unsafe fn test_mm_cmpneq_pd() {
4608 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4609 let e = _mm_setr_epi64x(!0, !0);
4610 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4611 assert_eq_m128i(r, e);
4612 }
4613
4614 #[simd_test(enable = "sse2")]
4615 unsafe fn test_mm_cmpnlt_pd() {
4616 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4617 let e = _mm_setr_epi64x(0, 0);
4618 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4619 assert_eq_m128i(r, e);
4620 }
4621
4622 #[simd_test(enable = "sse2")]
4623 unsafe fn test_mm_cmpnle_pd() {
4624 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4625 let e = _mm_setr_epi64x(0, 0);
4626 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4627 assert_eq_m128i(r, e);
4628 }
4629
4630 #[simd_test(enable = "sse2")]
4631 unsafe fn test_mm_cmpngt_pd() {
4632 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4633 let e = _mm_setr_epi64x(0, !0);
4634 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4635 assert_eq_m128i(r, e);
4636 }
4637
4638 #[simd_test(enable = "sse2")]
4639 unsafe fn test_mm_cmpnge_pd() {
4640 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4641 let e = _mm_setr_epi64x(0, !0);
4642 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4643 assert_eq_m128i(r, e);
4644 }
4645
4646 #[simd_test(enable = "sse2")]
4647 unsafe fn test_mm_comieq_sd() {
4648 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4649 assert!(_mm_comieq_sd(a, b) != 0);
4650
4651 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4652 assert!(_mm_comieq_sd(a, b) == 0);
4653 }
4654
4655 #[simd_test(enable = "sse2")]
4656 unsafe fn test_mm_comilt_sd() {
4657 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4658 assert!(_mm_comilt_sd(a, b) == 0);
4659 }
4660
4661 #[simd_test(enable = "sse2")]
4662 unsafe fn test_mm_comile_sd() {
4663 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4664 assert!(_mm_comile_sd(a, b) != 0);
4665 }
4666
4667 #[simd_test(enable = "sse2")]
4668 unsafe fn test_mm_comigt_sd() {
4669 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4670 assert!(_mm_comigt_sd(a, b) == 0);
4671 }
4672
4673 #[simd_test(enable = "sse2")]
4674 unsafe fn test_mm_comige_sd() {
4675 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4676 assert!(_mm_comige_sd(a, b) != 0);
4677 }
4678
4679 #[simd_test(enable = "sse2")]
4680 unsafe fn test_mm_comineq_sd() {
4681 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4682 assert!(_mm_comineq_sd(a, b) == 0);
4683 }
4684
4685 #[simd_test(enable = "sse2")]
4686 unsafe fn test_mm_ucomieq_sd() {
4687 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4688 assert!(_mm_ucomieq_sd(a, b) != 0);
4689
4690 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4691 assert!(_mm_ucomieq_sd(a, b) == 0);
4692 }
4693
4694 #[simd_test(enable = "sse2")]
4695 unsafe fn test_mm_ucomilt_sd() {
4696 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4697 assert!(_mm_ucomilt_sd(a, b) == 0);
4698 }
4699
4700 #[simd_test(enable = "sse2")]
4701 unsafe fn test_mm_ucomile_sd() {
4702 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4703 assert!(_mm_ucomile_sd(a, b) != 0);
4704 }
4705
4706 #[simd_test(enable = "sse2")]
4707 unsafe fn test_mm_ucomigt_sd() {
4708 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4709 assert!(_mm_ucomigt_sd(a, b) == 0);
4710 }
4711
4712 #[simd_test(enable = "sse2")]
4713 unsafe fn test_mm_ucomige_sd() {
4714 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4715 assert!(_mm_ucomige_sd(a, b) != 0);
4716 }
4717
4718 #[simd_test(enable = "sse2")]
4719 unsafe fn test_mm_ucomineq_sd() {
4720 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4721 assert!(_mm_ucomineq_sd(a, b) == 0);
4722 }
4723
4724 #[simd_test(enable = "sse2")]
4725 unsafe fn test_mm_movemask_pd() {
4726 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4727 assert_eq!(r, 0b01);
4728
4729 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4730 assert_eq!(r, 0b11);
4731 }
4732
4733 #[repr(align(16))]
4734 struct Memory {
4735 data: [f64; 4],
4736 }
4737
4738 #[simd_test(enable = "sse2")]
4739 unsafe fn test_mm_load_pd() {
4740 let mem = Memory {
4741 data: [1.0f64, 2.0, 3.0, 4.0],
4742 };
4743 let vals = &mem.data;
4744 let d = vals.as_ptr();
4745
4746 let r = _mm_load_pd(d);
4747 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4748 }
4749
4750 #[simd_test(enable = "sse2")]
4751 unsafe fn test_mm_load_sd() {
4752 let a = 1.;
4753 let expected = _mm_setr_pd(a, 0.);
4754 let r = _mm_load_sd(&a);
4755 assert_eq_m128d(r, expected);
4756 }
4757
4758 #[simd_test(enable = "sse2")]
4759 unsafe fn test_mm_loadh_pd() {
4760 let a = _mm_setr_pd(1., 2.);
4761 let b = 3.;
4762 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4763 let r = _mm_loadh_pd(a, &b);
4764 assert_eq_m128d(r, expected);
4765 }
4766
4767 #[simd_test(enable = "sse2")]
4768 unsafe fn test_mm_loadl_pd() {
4769 let a = _mm_setr_pd(1., 2.);
4770 let b = 3.;
4771 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4772 let r = _mm_loadl_pd(a, &b);
4773 assert_eq_m128d(r, expected);
4774 }
4775
4776 #[simd_test(enable = "sse2")]
4777 #[cfg_attr(miri, ignore)]
4780 unsafe fn test_mm_stream_pd() {
4781 #[repr(align(128))]
4782 struct Memory {
4783 pub data: [f64; 2],
4784 }
4785 let a = _mm_set1_pd(7.0);
4786 let mut mem = Memory { data: [-1.0; 2] };
4787
4788 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4789 for i in 0..2 {
4790 assert_eq!(mem.data[i], get_m128d(a, i));
4791 }
4792 }
4793
4794 #[simd_test(enable = "sse2")]
4795 unsafe fn test_mm_store_sd() {
4796 let mut dest = 0.;
4797 let a = _mm_setr_pd(1., 2.);
4798 _mm_store_sd(&mut dest, a);
4799 assert_eq!(dest, _mm_cvtsd_f64(a));
4800 }
4801
4802 #[simd_test(enable = "sse2")]
4803 unsafe fn test_mm_store_pd() {
4804 let mut mem = Memory { data: [0.0f64; 4] };
4805 let vals = &mut mem.data;
4806 let a = _mm_setr_pd(1.0, 2.0);
4807 let d = vals.as_mut_ptr();
4808
4809 _mm_store_pd(d, *black_box(&a));
4810 assert_eq!(vals[0], 1.0);
4811 assert_eq!(vals[1], 2.0);
4812 }
4813
4814 #[simd_test(enable = "sse2")]
4815 unsafe fn test_mm_storeu_pd() {
4816 let mut mem = Memory { data: [0.0f64; 4] };
4817 let vals = &mut mem.data;
4818 let a = _mm_setr_pd(1.0, 2.0);
4819
4820 let mut ofs = 0;
4821 let mut p = vals.as_mut_ptr();
4822
4823 if (p as usize) & 0xf == 0 {
4825 ofs = 1;
4826 p = p.add(1);
4827 }
4828
4829 _mm_storeu_pd(p, *black_box(&a));
4830
4831 if ofs > 0 {
4832 assert_eq!(vals[ofs - 1], 0.0);
4833 }
4834 assert_eq!(vals[ofs + 0], 1.0);
4835 assert_eq!(vals[ofs + 1], 2.0);
4836 }
4837
4838 #[simd_test(enable = "sse2")]
4839 unsafe fn test_mm_storeu_si16() {
4840 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4841 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4842 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4843 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4844 assert_eq_m128i(r, e);
4845 }
4846
4847 #[simd_test(enable = "sse2")]
4848 unsafe fn test_mm_storeu_si32() {
4849 let a = _mm_setr_epi32(1, 2, 3, 4);
4850 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4851 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4852 let e = _mm_setr_epi32(1, 6, 7, 8);
4853 assert_eq_m128i(r, e);
4854 }
4855
4856 #[simd_test(enable = "sse2")]
4857 unsafe fn test_mm_storeu_si64() {
4858 let a = _mm_setr_epi64x(1, 2);
4859 let mut r = _mm_setr_epi64x(3, 4);
4860 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4861 let e = _mm_setr_epi64x(1, 4);
4862 assert_eq_m128i(r, e);
4863 }
4864
4865 #[simd_test(enable = "sse2")]
4866 unsafe fn test_mm_store1_pd() {
4867 let mut mem = Memory { data: [0.0f64; 4] };
4868 let vals = &mut mem.data;
4869 let a = _mm_setr_pd(1.0, 2.0);
4870 let d = vals.as_mut_ptr();
4871
4872 _mm_store1_pd(d, *black_box(&a));
4873 assert_eq!(vals[0], 1.0);
4874 assert_eq!(vals[1], 1.0);
4875 }
4876
4877 #[simd_test(enable = "sse2")]
4878 unsafe fn test_mm_store_pd1() {
4879 let mut mem = Memory { data: [0.0f64; 4] };
4880 let vals = &mut mem.data;
4881 let a = _mm_setr_pd(1.0, 2.0);
4882 let d = vals.as_mut_ptr();
4883
4884 _mm_store_pd1(d, *black_box(&a));
4885 assert_eq!(vals[0], 1.0);
4886 assert_eq!(vals[1], 1.0);
4887 }
4888
4889 #[simd_test(enable = "sse2")]
4890 unsafe fn test_mm_storer_pd() {
4891 let mut mem = Memory { data: [0.0f64; 4] };
4892 let vals = &mut mem.data;
4893 let a = _mm_setr_pd(1.0, 2.0);
4894 let d = vals.as_mut_ptr();
4895
4896 _mm_storer_pd(d, *black_box(&a));
4897 assert_eq!(vals[0], 2.0);
4898 assert_eq!(vals[1], 1.0);
4899 }
4900
4901 #[simd_test(enable = "sse2")]
4902 unsafe fn test_mm_storeh_pd() {
4903 let mut dest = 0.;
4904 let a = _mm_setr_pd(1., 2.);
4905 _mm_storeh_pd(&mut dest, a);
4906 assert_eq!(dest, get_m128d(a, 1));
4907 }
4908
4909 #[simd_test(enable = "sse2")]
4910 unsafe fn test_mm_storel_pd() {
4911 let mut dest = 0.;
4912 let a = _mm_setr_pd(1., 2.);
4913 _mm_storel_pd(&mut dest, a);
4914 assert_eq!(dest, _mm_cvtsd_f64(a));
4915 }
4916
4917 #[simd_test(enable = "sse2")]
4918 unsafe fn test_mm_loadr_pd() {
4919 let mut mem = Memory {
4920 data: [1.0f64, 2.0, 3.0, 4.0],
4921 };
4922 let vals = &mut mem.data;
4923 let d = vals.as_ptr();
4924
4925 let r = _mm_loadr_pd(d);
4926 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4927 }
4928
4929 #[simd_test(enable = "sse2")]
4930 unsafe fn test_mm_loadu_pd() {
4931 let mut mem = Memory {
4932 data: [1.0f64, 2.0, 3.0, 4.0],
4933 };
4934 let vals = &mut mem.data;
4935 let mut d = vals.as_ptr();
4936
4937 let mut offset = 0;
4939 if (d as usize) & 0xf == 0 {
4940 offset = 1;
4941 d = d.add(offset);
4942 }
4943
4944 let r = _mm_loadu_pd(d);
4945 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4946 assert_eq_m128d(r, e);
4947 }
4948
4949 #[simd_test(enable = "sse2")]
4950 unsafe fn test_mm_loadu_si16() {
4951 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4952 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4953 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4954 }
4955
4956 #[simd_test(enable = "sse2")]
4957 unsafe fn test_mm_loadu_si32() {
4958 let a = _mm_setr_epi32(1, 2, 3, 4);
4959 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4960 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 unsafe fn test_mm_loadu_si64() {
4965 let a = _mm_setr_epi64x(5, 6);
4966 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4967 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4968 }
4969
4970 #[simd_test(enable = "sse2")]
4971 unsafe fn test_mm_cvtpd_ps() {
4972 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4973 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4974
4975 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4976 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4977
4978 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4979 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4980
4981 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4982 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4983 }
4984
4985 #[simd_test(enable = "sse2")]
4986 unsafe fn test_mm_cvtps_pd() {
4987 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4988 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4989
4990 let r = _mm_cvtps_pd(_mm_setr_ps(
4991 f32::MAX,
4992 f32::INFINITY,
4993 f32::NEG_INFINITY,
4994 f32::MIN,
4995 ));
4996 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4997 }
4998
4999 #[simd_test(enable = "sse2")]
5000 unsafe fn test_mm_cvtpd_epi32() {
5001 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5002 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5003
5004 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5005 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5006
5007 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5008 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5009
5010 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5011 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5012
5013 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5014 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5015 }
5016
5017 #[simd_test(enable = "sse2")]
5018 unsafe fn test_mm_cvtsd_si32() {
5019 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5020 assert_eq!(r, -2);
5021
5022 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5023 assert_eq!(r, i32::MIN);
5024
5025 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5026 assert_eq!(r, i32::MIN);
5027 }
5028
5029 #[simd_test(enable = "sse2")]
5030 unsafe fn test_mm_cvtsd_ss() {
5031 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5032 let b = _mm_setr_pd(2.0, -5.0);
5033
5034 let r = _mm_cvtsd_ss(a, b);
5035
5036 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5037
5038 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5039 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5040
5041 let r = _mm_cvtsd_ss(a, b);
5042
5043 assert_eq_m128(
5044 r,
5045 _mm_setr_ps(
5046 f32::INFINITY,
5047 f32::NEG_INFINITY,
5048 f32::MAX,
5049 f32::NEG_INFINITY,
5050 ),
5051 );
5052 }
5053
5054 #[simd_test(enable = "sse2")]
5055 unsafe fn test_mm_cvtsd_f64() {
5056 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5057 assert_eq!(r, -1.1);
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 unsafe fn test_mm_cvtss_sd() {
5062 let a = _mm_setr_pd(-1.1, 2.2);
5063 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5064
5065 let r = _mm_cvtss_sd(a, b);
5066 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5067
5068 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5069 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5070
5071 let r = _mm_cvtss_sd(a, b);
5072 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5073 }
5074
5075 #[simd_test(enable = "sse2")]
5076 unsafe fn test_mm_cvttpd_epi32() {
5077 let a = _mm_setr_pd(-1.1, 2.2);
5078 let r = _mm_cvttpd_epi32(a);
5079 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5080
5081 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5082 let r = _mm_cvttpd_epi32(a);
5083 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5084 }
5085
5086 #[simd_test(enable = "sse2")]
5087 unsafe fn test_mm_cvttsd_si32() {
5088 let a = _mm_setr_pd(-1.1, 2.2);
5089 let r = _mm_cvttsd_si32(a);
5090 assert_eq!(r, -1);
5091
5092 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5093 let r = _mm_cvttsd_si32(a);
5094 assert_eq!(r, i32::MIN);
5095 }
5096
5097 #[simd_test(enable = "sse2")]
5098 unsafe fn test_mm_cvttps_epi32() {
5099 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5100 let r = _mm_cvttps_epi32(a);
5101 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5102
5103 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5104 let r = _mm_cvttps_epi32(a);
5105 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5106 }
5107
5108 #[simd_test(enable = "sse2")]
5109 unsafe fn test_mm_set_sd() {
5110 let r = _mm_set_sd(-1.0_f64);
5111 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5112 }
5113
5114 #[simd_test(enable = "sse2")]
5115 unsafe fn test_mm_set1_pd() {
5116 let r = _mm_set1_pd(-1.0_f64);
5117 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5118 }
5119
5120 #[simd_test(enable = "sse2")]
5121 unsafe fn test_mm_set_pd1() {
5122 let r = _mm_set_pd1(-2.0_f64);
5123 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5124 }
5125
5126 #[simd_test(enable = "sse2")]
5127 unsafe fn test_mm_set_pd() {
5128 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5129 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5130 }
5131
5132 #[simd_test(enable = "sse2")]
5133 unsafe fn test_mm_setr_pd() {
5134 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5135 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5136 }
5137
5138 #[simd_test(enable = "sse2")]
5139 unsafe fn test_mm_setzero_pd() {
5140 let r = _mm_setzero_pd();
5141 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5142 }
5143
5144 #[simd_test(enable = "sse2")]
5145 unsafe fn test_mm_load1_pd() {
5146 let d = -5.0;
5147 let r = _mm_load1_pd(&d);
5148 assert_eq_m128d(r, _mm_setr_pd(d, d));
5149 }
5150
5151 #[simd_test(enable = "sse2")]
5152 unsafe fn test_mm_load_pd1() {
5153 let d = -5.0;
5154 let r = _mm_load_pd1(&d);
5155 assert_eq_m128d(r, _mm_setr_pd(d, d));
5156 }
5157
5158 #[simd_test(enable = "sse2")]
5159 unsafe fn test_mm_unpackhi_pd() {
5160 let a = _mm_setr_pd(1.0, 2.0);
5161 let b = _mm_setr_pd(3.0, 4.0);
5162 let r = _mm_unpackhi_pd(a, b);
5163 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5164 }
5165
5166 #[simd_test(enable = "sse2")]
5167 unsafe fn test_mm_unpacklo_pd() {
5168 let a = _mm_setr_pd(1.0, 2.0);
5169 let b = _mm_setr_pd(3.0, 4.0);
5170 let r = _mm_unpacklo_pd(a, b);
5171 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5172 }
5173
5174 #[simd_test(enable = "sse2")]
5175 unsafe fn test_mm_shuffle_pd() {
5176 let a = _mm_setr_pd(1., 2.);
5177 let b = _mm_setr_pd(3., 4.);
5178 let expected = _mm_setr_pd(1., 3.);
5179 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5180 assert_eq_m128d(r, expected);
5181 }
5182
5183 #[simd_test(enable = "sse2")]
5184 unsafe fn test_mm_move_sd() {
5185 let a = _mm_setr_pd(1., 2.);
5186 let b = _mm_setr_pd(3., 4.);
5187 let expected = _mm_setr_pd(3., 2.);
5188 let r = _mm_move_sd(a, b);
5189 assert_eq_m128d(r, expected);
5190 }
5191
5192 #[simd_test(enable = "sse2")]
5193 unsafe fn test_mm_castpd_ps() {
5194 let a = _mm_set1_pd(0.);
5195 let expected = _mm_set1_ps(0.);
5196 let r = _mm_castpd_ps(a);
5197 assert_eq_m128(r, expected);
5198 }
5199
5200 #[simd_test(enable = "sse2")]
5201 unsafe fn test_mm_castpd_si128() {
5202 let a = _mm_set1_pd(0.);
5203 let expected = _mm_set1_epi64x(0);
5204 let r = _mm_castpd_si128(a);
5205 assert_eq_m128i(r, expected);
5206 }
5207
5208 #[simd_test(enable = "sse2")]
5209 unsafe fn test_mm_castps_pd() {
5210 let a = _mm_set1_ps(0.);
5211 let expected = _mm_set1_pd(0.);
5212 let r = _mm_castps_pd(a);
5213 assert_eq_m128d(r, expected);
5214 }
5215
5216 #[simd_test(enable = "sse2")]
5217 unsafe fn test_mm_castps_si128() {
5218 let a = _mm_set1_ps(0.);
5219 let expected = _mm_set1_epi32(0);
5220 let r = _mm_castps_si128(a);
5221 assert_eq_m128i(r, expected);
5222 }
5223
5224 #[simd_test(enable = "sse2")]
5225 unsafe fn test_mm_castsi128_pd() {
5226 let a = _mm_set1_epi64x(0);
5227 let expected = _mm_set1_pd(0.);
5228 let r = _mm_castsi128_pd(a);
5229 assert_eq_m128d(r, expected);
5230 }
5231
5232 #[simd_test(enable = "sse2")]
5233 unsafe fn test_mm_castsi128_ps() {
5234 let a = _mm_set1_epi32(0);
5235 let expected = _mm_set1_ps(0.);
5236 let r = _mm_castsi128_ps(a);
5237 assert_eq_m128(r, expected);
5238 }
5239}