1use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6    intrinsics::sqrtf32,
7    mem, ptr,
8};
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[inline]
18#[target_feature(enable = "sse")]
19#[cfg_attr(test, assert_instr(addss))]
20#[stable(feature = "simd_x86", since = "1.27.0")]
21pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
22    simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b))
23}
24
25#[inline]
30#[target_feature(enable = "sse")]
31#[cfg_attr(test, assert_instr(addps))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
34    simd_add(a, b)
35}
36
37#[inline]
42#[target_feature(enable = "sse")]
43#[cfg_attr(test, assert_instr(subss))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
46    simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b))
47}
48
49#[inline]
54#[target_feature(enable = "sse")]
55#[cfg_attr(test, assert_instr(subps))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
58    simd_sub(a, b)
59}
60
61#[inline]
66#[target_feature(enable = "sse")]
67#[cfg_attr(test, assert_instr(mulss))]
68#[stable(feature = "simd_x86", since = "1.27.0")]
69pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
70    simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b))
71}
72
73#[inline]
78#[target_feature(enable = "sse")]
79#[cfg_attr(test, assert_instr(mulps))]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
82    simd_mul(a, b)
83}
84
85#[inline]
90#[target_feature(enable = "sse")]
91#[cfg_attr(test, assert_instr(divss))]
92#[stable(feature = "simd_x86", since = "1.27.0")]
93pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
94    simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b))
95}
96
97#[inline]
102#[target_feature(enable = "sse")]
103#[cfg_attr(test, assert_instr(divps))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
106    simd_div(a, b)
107}
108
109#[inline]
114#[target_feature(enable = "sse")]
115#[cfg_attr(test, assert_instr(sqrtss))]
116#[stable(feature = "simd_x86", since = "1.27.0")]
117pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 {
118    simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a)))
119}
120
121#[inline]
126#[target_feature(enable = "sse")]
127#[cfg_attr(test, assert_instr(sqrtps))]
128#[stable(feature = "simd_x86", since = "1.27.0")]
129pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 {
130    simd_fsqrt(a)
131}
132
133#[inline]
138#[target_feature(enable = "sse")]
139#[cfg_attr(test, assert_instr(rcpss))]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 {
142    rcpss(a)
143}
144
145#[inline]
150#[target_feature(enable = "sse")]
151#[cfg_attr(test, assert_instr(rcpps))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
154    rcpps(a)
155}
156
157#[inline]
162#[target_feature(enable = "sse")]
163#[cfg_attr(test, assert_instr(rsqrtss))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 {
166    rsqrtss(a)
167}
168
169#[inline]
174#[target_feature(enable = "sse")]
175#[cfg_attr(test, assert_instr(rsqrtps))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 {
178    rsqrtps(a)
179}
180
181#[inline]
187#[target_feature(enable = "sse")]
188#[cfg_attr(test, assert_instr(minss))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
191    minss(a, b)
192}
193
194#[inline]
199#[target_feature(enable = "sse")]
200#[cfg_attr(test, assert_instr(minps))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
203    minps(a, b)
205}
206
207#[inline]
213#[target_feature(enable = "sse")]
214#[cfg_attr(test, assert_instr(maxss))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
217    maxss(a, b)
218}
219
220#[inline]
225#[target_feature(enable = "sse")]
226#[cfg_attr(test, assert_instr(maxps))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
229    maxps(a, b)
231}
232
233#[inline]
237#[target_feature(enable = "sse")]
238#[cfg_attr(
240    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
241    assert_instr(andps)
242)]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
245    let a: __m128i = mem::transmute(a);
246    let b: __m128i = mem::transmute(b);
247    mem::transmute(simd_and(a, b))
248}
249
250#[inline]
257#[target_feature(enable = "sse")]
258#[cfg_attr(
261    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
262    assert_instr(andnps)
263)]
264#[stable(feature = "simd_x86", since = "1.27.0")]
265pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
266    let a: __m128i = mem::transmute(a);
267    let b: __m128i = mem::transmute(b);
268    let mask: __m128i = mem::transmute(i32x4::splat(-1));
269    mem::transmute(simd_and(simd_xor(mask, a), b))
270}
271
272#[inline]
276#[target_feature(enable = "sse")]
277#[cfg_attr(
279    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
280    assert_instr(orps)
281)]
282#[stable(feature = "simd_x86", since = "1.27.0")]
283pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
284    let a: __m128i = mem::transmute(a);
285    let b: __m128i = mem::transmute(b);
286    mem::transmute(simd_or(a, b))
287}
288
289#[inline]
294#[target_feature(enable = "sse")]
295#[cfg_attr(
297    all(test, any(target_arch = "x86_64", target_feature = "sse2")),
298    assert_instr(xorps)
299)]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
302    let a: __m128i = mem::transmute(a);
303    let b: __m128i = mem::transmute(b);
304    mem::transmute(simd_xor(a, b))
305}
306
307#[inline]
313#[target_feature(enable = "sse")]
314#[cfg_attr(test, assert_instr(cmpeqss))]
315#[stable(feature = "simd_x86", since = "1.27.0")]
316pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 {
317    cmpss(a, b, 0)
318}
319
320#[inline]
327#[target_feature(enable = "sse")]
328#[cfg_attr(test, assert_instr(cmpltss))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 {
331    cmpss(a, b, 1)
332}
333
334#[inline]
341#[target_feature(enable = "sse")]
342#[cfg_attr(test, assert_instr(cmpless))]
343#[stable(feature = "simd_x86", since = "1.27.0")]
344pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 {
345    cmpss(a, b, 2)
346}
347
348#[inline]
355#[target_feature(enable = "sse")]
356#[cfg_attr(test, assert_instr(cmpltss))]
357#[stable(feature = "simd_x86", since = "1.27.0")]
358pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 {
359    simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3])
360}
361
362#[inline]
369#[target_feature(enable = "sse")]
370#[cfg_attr(test, assert_instr(cmpless))]
371#[stable(feature = "simd_x86", since = "1.27.0")]
372pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 {
373    simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3])
374}
375
376#[inline]
383#[target_feature(enable = "sse")]
384#[cfg_attr(test, assert_instr(cmpneqss))]
385#[stable(feature = "simd_x86", since = "1.27.0")]
386pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 {
387    cmpss(a, b, 4)
388}
389
390#[inline]
397#[target_feature(enable = "sse")]
398#[cfg_attr(test, assert_instr(cmpnltss))]
399#[stable(feature = "simd_x86", since = "1.27.0")]
400pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 {
401    cmpss(a, b, 5)
402}
403
404#[inline]
411#[target_feature(enable = "sse")]
412#[cfg_attr(test, assert_instr(cmpnless))]
413#[stable(feature = "simd_x86", since = "1.27.0")]
414pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 {
415    cmpss(a, b, 6)
416}
417
418#[inline]
425#[target_feature(enable = "sse")]
426#[cfg_attr(test, assert_instr(cmpnltss))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 {
429    simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3])
430}
431
432#[inline]
439#[target_feature(enable = "sse")]
440#[cfg_attr(test, assert_instr(cmpnless))]
441#[stable(feature = "simd_x86", since = "1.27.0")]
442pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 {
443    simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3])
444}
445
446#[inline]
453#[target_feature(enable = "sse")]
454#[cfg_attr(test, assert_instr(cmpordss))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 {
457    cmpss(a, b, 7)
458}
459
460#[inline]
467#[target_feature(enable = "sse")]
468#[cfg_attr(test, assert_instr(cmpunordss))]
469#[stable(feature = "simd_x86", since = "1.27.0")]
470pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 {
471    cmpss(a, b, 3)
472}
473
474#[inline]
480#[target_feature(enable = "sse")]
481#[cfg_attr(test, assert_instr(cmpeqps))]
482#[stable(feature = "simd_x86", since = "1.27.0")]
483pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 {
484    cmpps(a, b, 0)
485}
486
487#[inline]
493#[target_feature(enable = "sse")]
494#[cfg_attr(test, assert_instr(cmpltps))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 {
497    cmpps(a, b, 1)
498}
499
500#[inline]
507#[target_feature(enable = "sse")]
508#[cfg_attr(test, assert_instr(cmpleps))]
509#[stable(feature = "simd_x86", since = "1.27.0")]
510pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 {
511    cmpps(a, b, 2)
512}
513
514#[inline]
520#[target_feature(enable = "sse")]
521#[cfg_attr(test, assert_instr(cmpltps))]
522#[stable(feature = "simd_x86", since = "1.27.0")]
523pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 {
524    cmpps(b, a, 1)
525}
526
527#[inline]
534#[target_feature(enable = "sse")]
535#[cfg_attr(test, assert_instr(cmpleps))]
536#[stable(feature = "simd_x86", since = "1.27.0")]
537pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 {
538    cmpps(b, a, 2)
539}
540
541#[inline]
547#[target_feature(enable = "sse")]
548#[cfg_attr(test, assert_instr(cmpneqps))]
549#[stable(feature = "simd_x86", since = "1.27.0")]
550pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 {
551    cmpps(a, b, 4)
552}
553
554#[inline]
561#[target_feature(enable = "sse")]
562#[cfg_attr(test, assert_instr(cmpnltps))]
563#[stable(feature = "simd_x86", since = "1.27.0")]
564pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 {
565    cmpps(a, b, 5)
566}
567
568#[inline]
575#[target_feature(enable = "sse")]
576#[cfg_attr(test, assert_instr(cmpnleps))]
577#[stable(feature = "simd_x86", since = "1.27.0")]
578pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 {
579    cmpps(a, b, 6)
580}
581
582#[inline]
589#[target_feature(enable = "sse")]
590#[cfg_attr(test, assert_instr(cmpnltps))]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 {
593    cmpps(b, a, 5)
594}
595
596#[inline]
603#[target_feature(enable = "sse")]
604#[cfg_attr(test, assert_instr(cmpnleps))]
605#[stable(feature = "simd_x86", since = "1.27.0")]
606pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 {
607    cmpps(b, a, 6)
608}
609
610#[inline]
617#[target_feature(enable = "sse")]
618#[cfg_attr(test, assert_instr(cmpordps))]
619#[stable(feature = "simd_x86", since = "1.27.0")]
620pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 {
621    cmpps(b, a, 7)
622}
623
624#[inline]
631#[target_feature(enable = "sse")]
632#[cfg_attr(test, assert_instr(cmpunordps))]
633#[stable(feature = "simd_x86", since = "1.27.0")]
634pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 {
635    cmpps(b, a, 3)
636}
637
638#[inline]
643#[target_feature(enable = "sse")]
644#[cfg_attr(test, assert_instr(comiss))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 {
647    comieq_ss(a, b)
648}
649
650#[inline]
655#[target_feature(enable = "sse")]
656#[cfg_attr(test, assert_instr(comiss))]
657#[stable(feature = "simd_x86", since = "1.27.0")]
658pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 {
659    comilt_ss(a, b)
660}
661
662#[inline]
668#[target_feature(enable = "sse")]
669#[cfg_attr(test, assert_instr(comiss))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 {
672    comile_ss(a, b)
673}
674
675#[inline]
681#[target_feature(enable = "sse")]
682#[cfg_attr(test, assert_instr(comiss))]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 {
685    comigt_ss(a, b)
686}
687
688#[inline]
694#[target_feature(enable = "sse")]
695#[cfg_attr(test, assert_instr(comiss))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 {
698    comige_ss(a, b)
699}
700
701#[inline]
706#[target_feature(enable = "sse")]
707#[cfg_attr(test, assert_instr(comiss))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 {
710    comineq_ss(a, b)
711}
712
713#[inline]
719#[target_feature(enable = "sse")]
720#[cfg_attr(test, assert_instr(ucomiss))]
721#[stable(feature = "simd_x86", since = "1.27.0")]
722pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 {
723    ucomieq_ss(a, b)
724}
725
726#[inline]
733#[target_feature(enable = "sse")]
734#[cfg_attr(test, assert_instr(ucomiss))]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 {
737    ucomilt_ss(a, b)
738}
739
740#[inline]
747#[target_feature(enable = "sse")]
748#[cfg_attr(test, assert_instr(ucomiss))]
749#[stable(feature = "simd_x86", since = "1.27.0")]
750pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 {
751    ucomile_ss(a, b)
752}
753
754#[inline]
761#[target_feature(enable = "sse")]
762#[cfg_attr(test, assert_instr(ucomiss))]
763#[stable(feature = "simd_x86", since = "1.27.0")]
764pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 {
765    ucomigt_ss(a, b)
766}
767
768#[inline]
775#[target_feature(enable = "sse")]
776#[cfg_attr(test, assert_instr(ucomiss))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 {
779    ucomige_ss(a, b)
780}
781
782#[inline]
788#[target_feature(enable = "sse")]
789#[cfg_attr(test, assert_instr(ucomiss))]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 {
792    ucomineq_ss(a, b)
793}
794
795#[inline]
805#[target_feature(enable = "sse")]
806#[cfg_attr(test, assert_instr(cvtss2si))]
807#[stable(feature = "simd_x86", since = "1.27.0")]
808pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 {
809    cvtss2si(a)
810}
811
812#[inline]
816#[target_feature(enable = "sse")]
817#[cfg_attr(test, assert_instr(cvtss2si))]
818#[stable(feature = "simd_x86", since = "1.27.0")]
819pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 {
820    _mm_cvtss_si32(a)
821}
822
823#[inline]
835#[target_feature(enable = "sse")]
836#[cfg_attr(test, assert_instr(cvttss2si))]
837#[stable(feature = "simd_x86", since = "1.27.0")]
838pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 {
839    cvttss2si(a)
840}
841
842#[inline]
846#[target_feature(enable = "sse")]
847#[cfg_attr(test, assert_instr(cvttss2si))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 {
850    _mm_cvttss_si32(a)
851}
852
853#[inline]
857#[target_feature(enable = "sse")]
858#[stable(feature = "simd_x86", since = "1.27.0")]
861pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 {
862    simd_extract!(a, 0)
863}
864
865#[inline]
873#[target_feature(enable = "sse")]
874#[cfg_attr(test, assert_instr(cvtsi2ss))]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
877    cvtsi2ss(a, b)
878}
879
880#[inline]
884#[target_feature(enable = "sse")]
885#[cfg_attr(test, assert_instr(cvtsi2ss))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
888    _mm_cvtsi32_ss(a, b)
889}
890
891#[inline]
896#[target_feature(enable = "sse")]
897#[cfg_attr(test, assert_instr(movss))]
898#[stable(feature = "simd_x86", since = "1.27.0")]
899pub unsafe fn _mm_set_ss(a: f32) -> __m128 {
900    __m128([a, 0.0, 0.0, 0.0])
901}
902
903#[inline]
907#[target_feature(enable = "sse")]
908#[cfg_attr(test, assert_instr(shufps))]
909#[stable(feature = "simd_x86", since = "1.27.0")]
910pub unsafe fn _mm_set1_ps(a: f32) -> __m128 {
911    __m128([a, a, a, a])
912}
913
914#[inline]
918#[target_feature(enable = "sse")]
919#[cfg_attr(test, assert_instr(shufps))]
920#[stable(feature = "simd_x86", since = "1.27.0")]
921pub unsafe fn _mm_set_ps1(a: f32) -> __m128 {
922    _mm_set1_ps(a)
923}
924
925#[inline]
945#[target_feature(enable = "sse")]
946#[cfg_attr(test, assert_instr(unpcklps))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
949    __m128([d, c, b, a])
950}
951
952#[inline]
963#[target_feature(enable = "sse")]
964#[cfg_attr(
965    all(test, any(target_env = "msvc", target_arch = "x86_64")),
966    assert_instr(unpcklps)
967)]
968#[cfg_attr(
970    all(test, all(not(target_env = "msvc"), target_arch = "x86")),
971    assert_instr(movaps)
972)]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
975    __m128([a, b, c, d])
976}
977
978#[inline]
982#[target_feature(enable = "sse")]
983#[cfg_attr(test, assert_instr(xorps))]
984#[stable(feature = "simd_x86", since = "1.27.0")]
985pub unsafe fn _mm_setzero_ps() -> __m128 {
986    const { mem::zeroed() }
987}
988
989#[inline]
992#[allow(non_snake_case)]
993#[unstable(feature = "stdarch_x86_mm_shuffle", issue = "111147")]
994pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
995    ((z << 6) | (y << 4) | (x << 2) | w) as i32
996}
997
998#[inline]
1012#[target_feature(enable = "sse")]
1013#[cfg_attr(test, assert_instr(shufps, MASK = 3))]
1014#[rustc_legacy_const_generics(2)]
1015#[stable(feature = "simd_x86", since = "1.27.0")]
1016pub unsafe fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
1017    static_assert_uimm_bits!(MASK, 8);
1018    simd_shuffle!(
1019        a,
1020        b,
1021        [
1022            MASK as u32 & 0b11,
1023            (MASK as u32 >> 2) & 0b11,
1024            ((MASK as u32 >> 4) & 0b11) + 4,
1025            ((MASK as u32 >> 6) & 0b11) + 4,
1026        ],
1027    )
1028}
1029
1030#[inline]
1035#[target_feature(enable = "sse")]
1036#[cfg_attr(test, assert_instr(unpckhps))]
1037#[stable(feature = "simd_x86", since = "1.27.0")]
1038pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
1039    simd_shuffle!(a, b, [2, 6, 3, 7])
1040}
1041
1042#[inline]
1047#[target_feature(enable = "sse")]
1048#[cfg_attr(test, assert_instr(unpcklps))]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
1051    simd_shuffle!(a, b, [0, 4, 1, 5])
1052}
1053
1054#[inline]
1059#[target_feature(enable = "sse")]
1060#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhlps))]
1061#[stable(feature = "simd_x86", since = "1.27.0")]
1062pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
1063    simd_shuffle!(a, b, [6, 7, 2, 3])
1065}
1066
1067#[inline]
1072#[target_feature(enable = "sse")]
1073#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1074#[stable(feature = "simd_x86", since = "1.27.0")]
1075pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1076    simd_shuffle!(a, b, [0, 1, 4, 5])
1077}
1078
1079#[inline]
1086#[target_feature(enable = "sse")]
1087#[cfg_attr(test, assert_instr(movmskps))]
1088#[stable(feature = "simd_x86", since = "1.27.0")]
1089pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
1090    let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
1093    simd_bitmask::<i32x4, u8>(mask).into()
1094}
1095
1096#[inline]
1103#[target_feature(enable = "sse")]
1104#[cfg_attr(test, assert_instr(movss))]
1105#[stable(feature = "simd_x86", since = "1.27.0")]
1106pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
1107    __m128([*p, 0.0, 0.0, 0.0])
1108}
1109
1110#[inline]
1118#[target_feature(enable = "sse")]
1119#[cfg_attr(test, assert_instr(movss))]
1120#[stable(feature = "simd_x86", since = "1.27.0")]
1121pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
1122    let a = *p;
1123    __m128([a, a, a, a])
1124}
1125
1126#[inline]
1130#[target_feature(enable = "sse")]
1131#[cfg_attr(test, assert_instr(movss))]
1132#[stable(feature = "simd_x86", since = "1.27.0")]
1133pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
1134    _mm_load1_ps(p)
1135}
1136
1137#[inline]
1148#[target_feature(enable = "sse")]
1149#[cfg_attr(test, assert_instr(movaps))]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1151#[allow(clippy::cast_ptr_alignment)]
1152pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
1153    *(p as *const __m128)
1154}
1155
1156#[inline]
1166#[target_feature(enable = "sse")]
1167#[cfg_attr(test, assert_instr(movups))]
1168#[stable(feature = "simd_x86", since = "1.27.0")]
1169pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
1170    let mut dst = _mm_undefined_ps();
1173    ptr::copy_nonoverlapping(
1174        p as *const u8,
1175        ptr::addr_of_mut!(dst) as *mut u8,
1176        mem::size_of::<__m128>(),
1177    );
1178    dst
1179}
1180
1181#[inline]
1203#[target_feature(enable = "sse")]
1204#[cfg_attr(test, assert_instr(movaps))]
1205#[stable(feature = "simd_x86", since = "1.27.0")]
1206pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
1207    let a = _mm_load_ps(p);
1208    simd_shuffle!(a, a, [3, 2, 1, 0])
1209}
1210
1211#[inline]
1217#[target_feature(enable = "sse")]
1218#[cfg_attr(test, assert_instr(movss))]
1219#[stable(feature = "simd_x86", since = "1.27.0")]
1220pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
1221    *p = simd_extract!(a, 0);
1222}
1223
1224#[inline]
1243#[target_feature(enable = "sse")]
1244#[cfg_attr(test, assert_instr(movaps))]
1245#[stable(feature = "simd_x86", since = "1.27.0")]
1246#[allow(clippy::cast_ptr_alignment)]
1247pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
1248    let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
1249    *(p as *mut __m128) = b;
1250}
1251
1252#[inline]
1256#[target_feature(enable = "sse")]
1257#[cfg_attr(test, assert_instr(movaps))]
1258#[stable(feature = "simd_x86", since = "1.27.0")]
1259pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
1260    _mm_store1_ps(p, a);
1261}
1262
1263#[inline]
1275#[target_feature(enable = "sse")]
1276#[cfg_attr(test, assert_instr(movaps))]
1277#[stable(feature = "simd_x86", since = "1.27.0")]
1278#[allow(clippy::cast_ptr_alignment)]
1279pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
1280    *(p as *mut __m128) = a;
1281}
1282
1283#[inline]
1291#[target_feature(enable = "sse")]
1292#[cfg_attr(test, assert_instr(movups))]
1293#[stable(feature = "simd_x86", since = "1.27.0")]
1294pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
1295    ptr::copy_nonoverlapping(
1296        ptr::addr_of!(a) as *const u8,
1297        p as *mut u8,
1298        mem::size_of::<__m128>(),
1299    );
1300}
1301
1302#[inline]
1319#[target_feature(enable = "sse")]
1320#[cfg_attr(test, assert_instr(movaps))]
1321#[stable(feature = "simd_x86", since = "1.27.0")]
1322#[allow(clippy::cast_ptr_alignment)]
1323pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
1324    let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
1325    *(p as *mut __m128) = b;
1326}
1327
1328#[inline]
1338#[target_feature(enable = "sse")]
1339#[cfg_attr(test, assert_instr(movss))]
1340#[stable(feature = "simd_x86", since = "1.27.0")]
1341pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
1342    simd_shuffle!(a, b, [4, 1, 2, 3])
1343}
1344
1345#[inline]
1413#[target_feature(enable = "sse")]
1414#[cfg_attr(test, assert_instr(sfence))]
1415#[stable(feature = "simd_x86", since = "1.27.0")]
1416pub unsafe fn _mm_sfence() {
1417    sfence()
1418}
1419
1420#[inline]
1435#[target_feature(enable = "sse")]
1436#[cfg_attr(test, assert_instr(stmxcsr))]
1437#[stable(feature = "simd_x86", since = "1.27.0")]
1438#[deprecated(
1439    since = "1.75.0",
1440    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1441)]
1442pub unsafe fn _mm_getcsr() -> u32 {
1443    let mut result = 0_i32;
1444    stmxcsr(ptr::addr_of_mut!(result) as *mut i8);
1445    result as u32
1446}
1447
1448#[inline]
1582#[target_feature(enable = "sse")]
1583#[cfg_attr(test, assert_instr(ldmxcsr))]
1584#[stable(feature = "simd_x86", since = "1.27.0")]
1585#[deprecated(
1586    since = "1.75.0",
1587    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1588)]
1589pub unsafe fn _mm_setcsr(val: u32) {
1590    ldmxcsr(ptr::addr_of!(val) as *const i8);
1591}
1592
1593#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub const _MM_EXCEPT_INVALID: u32 = 0x0001;
1596#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub const _MM_EXCEPT_DENORM: u32 = 0x0002;
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1601pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004;
1602#[stable(feature = "simd_x86", since = "1.27.0")]
1604pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008;
1605#[stable(feature = "simd_x86", since = "1.27.0")]
1607pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010;
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1610pub const _MM_EXCEPT_INEXACT: u32 = 0x0020;
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1613pub const _MM_EXCEPT_MASK: u32 = 0x003f;
1614
1615#[stable(feature = "simd_x86", since = "1.27.0")]
1617pub const _MM_MASK_INVALID: u32 = 0x0080;
1618#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub const _MM_MASK_DENORM: u32 = 0x0100;
1621#[stable(feature = "simd_x86", since = "1.27.0")]
1623pub const _MM_MASK_DIV_ZERO: u32 = 0x0200;
1624#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub const _MM_MASK_OVERFLOW: u32 = 0x0400;
1627#[stable(feature = "simd_x86", since = "1.27.0")]
1629pub const _MM_MASK_UNDERFLOW: u32 = 0x0800;
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1632pub const _MM_MASK_INEXACT: u32 = 0x1000;
1633#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub const _MM_MASK_MASK: u32 = 0x1f80;
1636
1637#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub const _MM_ROUND_NEAREST: u32 = 0x0000;
1640#[stable(feature = "simd_x86", since = "1.27.0")]
1642pub const _MM_ROUND_DOWN: u32 = 0x2000;
1643#[stable(feature = "simd_x86", since = "1.27.0")]
1645pub const _MM_ROUND_UP: u32 = 0x4000;
1646#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000;
1649
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub const _MM_ROUND_MASK: u32 = 0x6000;
1653
1654#[stable(feature = "simd_x86", since = "1.27.0")]
1656pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000;
1657#[stable(feature = "simd_x86", since = "1.27.0")]
1659pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000;
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000;
1663
1664#[inline]
1668#[allow(deprecated)] #[allow(non_snake_case)]
1670#[target_feature(enable = "sse")]
1671#[stable(feature = "simd_x86", since = "1.27.0")]
1672#[deprecated(
1673    since = "1.75.0",
1674    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1675)]
1676pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 {
1677    _mm_getcsr() & _MM_MASK_MASK
1678}
1679
1680#[inline]
1684#[allow(deprecated)] #[allow(non_snake_case)]
1686#[target_feature(enable = "sse")]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688#[deprecated(
1689    since = "1.75.0",
1690    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1691)]
1692pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 {
1693    _mm_getcsr() & _MM_EXCEPT_MASK
1694}
1695
1696#[inline]
1700#[allow(deprecated)] #[allow(non_snake_case)]
1702#[target_feature(enable = "sse")]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704#[deprecated(
1705    since = "1.75.0",
1706    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1707)]
1708pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 {
1709    _mm_getcsr() & _MM_FLUSH_ZERO_MASK
1710}
1711
1712#[inline]
1716#[allow(deprecated)] #[allow(non_snake_case)]
1718#[target_feature(enable = "sse")]
1719#[stable(feature = "simd_x86", since = "1.27.0")]
1720#[deprecated(
1721    since = "1.75.0",
1722    note = "see `_mm_getcsr` documentation - use inline assembly instead"
1723)]
1724pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 {
1725    _mm_getcsr() & _MM_ROUND_MASK
1726}
1727
1728#[inline]
1732#[allow(deprecated)] #[allow(non_snake_case)]
1734#[target_feature(enable = "sse")]
1735#[stable(feature = "simd_x86", since = "1.27.0")]
1736#[deprecated(
1737    since = "1.75.0",
1738    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1739)]
1740pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) {
1741    _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x)
1742}
1743
1744#[inline]
1748#[allow(deprecated)] #[allow(non_snake_case)]
1750#[target_feature(enable = "sse")]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752#[deprecated(
1753    since = "1.75.0",
1754    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1755)]
1756pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) {
1757    _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x)
1758}
1759
1760#[inline]
1764#[allow(deprecated)] #[allow(non_snake_case)]
1766#[target_feature(enable = "sse")]
1767#[stable(feature = "simd_x86", since = "1.27.0")]
1768#[deprecated(
1769    since = "1.75.0",
1770    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1771)]
1772pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) {
1773    let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x;
1774    _mm_setcsr(val)
1776}
1777
1778#[inline]
1782#[allow(deprecated)] #[allow(non_snake_case)]
1784#[target_feature(enable = "sse")]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[deprecated(
1787    since = "1.75.0",
1788    note = "see `_mm_setcsr` documentation - use inline assembly instead"
1789)]
1790pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) {
1791    _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x)
1792}
1793
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1796pub const _MM_HINT_T0: i32 = 3;
1797
1798#[stable(feature = "simd_x86", since = "1.27.0")]
1800pub const _MM_HINT_T1: i32 = 2;
1801
1802#[stable(feature = "simd_x86", since = "1.27.0")]
1804pub const _MM_HINT_T2: i32 = 1;
1805
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1808pub const _MM_HINT_NTA: i32 = 0;
1809
1810#[stable(feature = "simd_x86", since = "1.27.0")]
1812pub const _MM_HINT_ET0: i32 = 7;
1813
1814#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub const _MM_HINT_ET1: i32 = 6;
1817
1818#[inline]
1861#[target_feature(enable = "sse")]
1862#[cfg_attr(test, assert_instr(prefetcht0, STRATEGY = _MM_HINT_T0))]
1863#[cfg_attr(test, assert_instr(prefetcht1, STRATEGY = _MM_HINT_T1))]
1864#[cfg_attr(test, assert_instr(prefetcht2, STRATEGY = _MM_HINT_T2))]
1865#[cfg_attr(test, assert_instr(prefetchnta, STRATEGY = _MM_HINT_NTA))]
1866#[rustc_legacy_const_generics(1)]
1867#[stable(feature = "simd_x86", since = "1.27.0")]
1868pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
1869    static_assert_uimm_bits!(STRATEGY, 3);
1870    prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
1873}
1874
1875#[inline]
1881#[target_feature(enable = "sse")]
1882#[stable(feature = "simd_x86", since = "1.27.0")]
1883pub unsafe fn _mm_undefined_ps() -> __m128 {
1884    const { mem::zeroed() }
1885}
1886
1887#[inline]
1891#[allow(non_snake_case)]
1892#[target_feature(enable = "sse")]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894pub unsafe fn _MM_TRANSPOSE4_PS(
1895    row0: &mut __m128,
1896    row1: &mut __m128,
1897    row2: &mut __m128,
1898    row3: &mut __m128,
1899) {
1900    let tmp0 = _mm_unpacklo_ps(*row0, *row1);
1901    let tmp2 = _mm_unpacklo_ps(*row2, *row3);
1902    let tmp1 = _mm_unpackhi_ps(*row0, *row1);
1903    let tmp3 = _mm_unpackhi_ps(*row2, *row3);
1904
1905    *row0 = _mm_movelh_ps(tmp0, tmp2);
1906    *row1 = _mm_movehl_ps(tmp2, tmp0);
1907    *row2 = _mm_movelh_ps(tmp1, tmp3);
1908    *row3 = _mm_movehl_ps(tmp3, tmp1);
1909}
1910
1911#[allow(improper_ctypes)]
1912extern "C" {
1913    #[link_name = "llvm.x86.sse.rcp.ss"]
1914    fn rcpss(a: __m128) -> __m128;
1915    #[link_name = "llvm.x86.sse.rcp.ps"]
1916    fn rcpps(a: __m128) -> __m128;
1917    #[link_name = "llvm.x86.sse.rsqrt.ss"]
1918    fn rsqrtss(a: __m128) -> __m128;
1919    #[link_name = "llvm.x86.sse.rsqrt.ps"]
1920    fn rsqrtps(a: __m128) -> __m128;
1921    #[link_name = "llvm.x86.sse.min.ss"]
1922    fn minss(a: __m128, b: __m128) -> __m128;
1923    #[link_name = "llvm.x86.sse.min.ps"]
1924    fn minps(a: __m128, b: __m128) -> __m128;
1925    #[link_name = "llvm.x86.sse.max.ss"]
1926    fn maxss(a: __m128, b: __m128) -> __m128;
1927    #[link_name = "llvm.x86.sse.max.ps"]
1928    fn maxps(a: __m128, b: __m128) -> __m128;
1929    #[link_name = "llvm.x86.sse.cmp.ps"]
1930    fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
1931    #[link_name = "llvm.x86.sse.comieq.ss"]
1932    fn comieq_ss(a: __m128, b: __m128) -> i32;
1933    #[link_name = "llvm.x86.sse.comilt.ss"]
1934    fn comilt_ss(a: __m128, b: __m128) -> i32;
1935    #[link_name = "llvm.x86.sse.comile.ss"]
1936    fn comile_ss(a: __m128, b: __m128) -> i32;
1937    #[link_name = "llvm.x86.sse.comigt.ss"]
1938    fn comigt_ss(a: __m128, b: __m128) -> i32;
1939    #[link_name = "llvm.x86.sse.comige.ss"]
1940    fn comige_ss(a: __m128, b: __m128) -> i32;
1941    #[link_name = "llvm.x86.sse.comineq.ss"]
1942    fn comineq_ss(a: __m128, b: __m128) -> i32;
1943    #[link_name = "llvm.x86.sse.ucomieq.ss"]
1944    fn ucomieq_ss(a: __m128, b: __m128) -> i32;
1945    #[link_name = "llvm.x86.sse.ucomilt.ss"]
1946    fn ucomilt_ss(a: __m128, b: __m128) -> i32;
1947    #[link_name = "llvm.x86.sse.ucomile.ss"]
1948    fn ucomile_ss(a: __m128, b: __m128) -> i32;
1949    #[link_name = "llvm.x86.sse.ucomigt.ss"]
1950    fn ucomigt_ss(a: __m128, b: __m128) -> i32;
1951    #[link_name = "llvm.x86.sse.ucomige.ss"]
1952    fn ucomige_ss(a: __m128, b: __m128) -> i32;
1953    #[link_name = "llvm.x86.sse.ucomineq.ss"]
1954    fn ucomineq_ss(a: __m128, b: __m128) -> i32;
1955    #[link_name = "llvm.x86.sse.cvtss2si"]
1956    fn cvtss2si(a: __m128) -> i32;
1957    #[link_name = "llvm.x86.sse.cvttss2si"]
1958    fn cvttss2si(a: __m128) -> i32;
1959    #[link_name = "llvm.x86.sse.cvtsi2ss"]
1960    fn cvtsi2ss(a: __m128, b: i32) -> __m128;
1961    #[link_name = "llvm.x86.sse.sfence"]
1962    fn sfence();
1963    #[link_name = "llvm.x86.sse.stmxcsr"]
1964    fn stmxcsr(p: *mut i8);
1965    #[link_name = "llvm.x86.sse.ldmxcsr"]
1966    fn ldmxcsr(p: *const i8);
1967    #[link_name = "llvm.prefetch"]
1968    fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
1969    #[link_name = "llvm.x86.sse.cmp.ss"]
1970    fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
1971}
1972
1973#[inline]
1989#[target_feature(enable = "sse")]
1990#[cfg_attr(test, assert_instr(movntps))]
1991#[stable(feature = "simd_x86", since = "1.27.0")]
1992#[allow(clippy::cast_ptr_alignment)]
1993pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
1994    crate::arch::asm!(
1995        vps!("movntps", ",{a}"),
1996        p = in(reg) mem_addr,
1997        a = in(xmm_reg) a,
1998        options(nostack, preserves_flags),
1999    );
2000}
2001
2002#[cfg(test)]
2003mod tests {
2004    use crate::{hint::black_box, mem::transmute, ptr};
2005    use std::boxed;
2006    use stdarch_test::simd_test;
2007
2008    use crate::core_arch::{simd::*, x86::*};
2009
2010    const NAN: f32 = f32::NAN;
2011
2012    #[simd_test(enable = "sse")]
2013    unsafe fn test_mm_add_ps() {
2014        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2015        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2016        let r = _mm_add_ps(a, b);
2017        assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0));
2018    }
2019
2020    #[simd_test(enable = "sse")]
2021    unsafe fn test_mm_add_ss() {
2022        let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
2023        let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
2024        let r = _mm_add_ss(a, b);
2025        assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
2026    }
2027
2028    #[simd_test(enable = "sse")]
2029    unsafe fn test_mm_sub_ps() {
2030        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2031        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2032        let r = _mm_sub_ps(a, b);
2033        assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0));
2034    }
2035
2036    #[simd_test(enable = "sse")]
2037    unsafe fn test_mm_sub_ss() {
2038        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2039        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2040        let r = _mm_sub_ss(a, b);
2041        assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
2042    }
2043
2044    #[simd_test(enable = "sse")]
2045    unsafe fn test_mm_mul_ps() {
2046        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2047        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2048        let r = _mm_mul_ps(a, b);
2049        assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0));
2050    }
2051
2052    #[simd_test(enable = "sse")]
2053    unsafe fn test_mm_mul_ss() {
2054        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2055        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2056        let r = _mm_mul_ss(a, b);
2057        assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
2058    }
2059
2060    #[simd_test(enable = "sse")]
2061    unsafe fn test_mm_div_ps() {
2062        let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
2063        let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
2064        let r = _mm_div_ps(a, b);
2065        assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0));
2066    }
2067
2068    #[simd_test(enable = "sse")]
2069    unsafe fn test_mm_div_ss() {
2070        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2071        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2072        let r = _mm_div_ss(a, b);
2073        assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
2074    }
2075
2076    #[simd_test(enable = "sse")]
2077    unsafe fn test_mm_sqrt_ss() {
2078        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2079        let r = _mm_sqrt_ss(a);
2080        let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
2081        assert_eq_m128(r, e);
2082    }
2083
2084    #[simd_test(enable = "sse")]
2085    unsafe fn test_mm_sqrt_ps() {
2086        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2087        let r = _mm_sqrt_ps(a);
2088        let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
2089        assert_eq_m128(r, e);
2090    }
2091
2092    #[simd_test(enable = "sse")]
2093    unsafe fn test_mm_rcp_ss() {
2094        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2095        let r = _mm_rcp_ss(a);
2096        let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
2097        let rel_err = 0.00048828125;
2098        assert_approx_eq!(get_m128(r, 0), get_m128(e, 0), 2. * rel_err);
2099        for i in 1..4 {
2100            assert_eq!(get_m128(r, i), get_m128(e, i));
2101        }
2102    }
2103
2104    #[simd_test(enable = "sse")]
2105    unsafe fn test_mm_rcp_ps() {
2106        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2107        let r = _mm_rcp_ps(a);
2108        let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
2109        let rel_err = 0.00048828125;
2110        for i in 0..4 {
2111            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2112        }
2113    }
2114
2115    #[simd_test(enable = "sse")]
2116    unsafe fn test_mm_rsqrt_ss() {
2117        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2118        let r = _mm_rsqrt_ss(a);
2119        let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
2120        let rel_err = 0.00048828125;
2121        for i in 0..4 {
2122            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2123        }
2124    }
2125
2126    #[simd_test(enable = "sse")]
2127    unsafe fn test_mm_rsqrt_ps() {
2128        let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
2129        let r = _mm_rsqrt_ps(a);
2130        let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
2131        let rel_err = 0.00048828125;
2132        for i in 0..4 {
2133            assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err);
2134        }
2135    }
2136
2137    #[simd_test(enable = "sse")]
2138    unsafe fn test_mm_min_ss() {
2139        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2140        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2141        let r = _mm_min_ss(a, b);
2142        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2143    }
2144
2145    #[simd_test(enable = "sse")]
2146    unsafe fn test_mm_min_ps() {
2147        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2148        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2149        let r = _mm_min_ps(a, b);
2150        assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
2151
2152        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2158        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2159        let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
2160        let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
2161        let a: [u8; 16] = transmute(a);
2162        let b: [u8; 16] = transmute(b);
2163        assert_eq!(r1, b);
2164        assert_eq!(r2, a);
2165        assert_ne!(a, b); }
2167
2168    #[simd_test(enable = "sse")]
2169    unsafe fn test_mm_max_ss() {
2170        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2171        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2172        let r = _mm_max_ss(a, b);
2173        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
2174    }
2175
2176    #[simd_test(enable = "sse")]
2177    unsafe fn test_mm_max_ps() {
2178        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
2179        let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
2180        let r = _mm_max_ps(a, b);
2181        assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
2182
2183        let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
2185        let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
2186        let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
2187        let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
2188        let a: [u8; 16] = transmute(a);
2189        let b: [u8; 16] = transmute(b);
2190        assert_eq!(r1, b);
2191        assert_eq!(r2, a);
2192        assert_ne!(a, b); }
2194
2195    #[simd_test(enable = "sse")]
2196    unsafe fn test_mm_and_ps() {
2197        let a = transmute(u32x4::splat(0b0011));
2198        let b = transmute(u32x4::splat(0b0101));
2199        let r = _mm_and_ps(*black_box(&a), *black_box(&b));
2200        let e = transmute(u32x4::splat(0b0001));
2201        assert_eq_m128(r, e);
2202    }
2203
2204    #[simd_test(enable = "sse")]
2205    unsafe fn test_mm_andnot_ps() {
2206        let a = transmute(u32x4::splat(0b0011));
2207        let b = transmute(u32x4::splat(0b0101));
2208        let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
2209        let e = transmute(u32x4::splat(0b0100));
2210        assert_eq_m128(r, e);
2211    }
2212
2213    #[simd_test(enable = "sse")]
2214    unsafe fn test_mm_or_ps() {
2215        let a = transmute(u32x4::splat(0b0011));
2216        let b = transmute(u32x4::splat(0b0101));
2217        let r = _mm_or_ps(*black_box(&a), *black_box(&b));
2218        let e = transmute(u32x4::splat(0b0111));
2219        assert_eq_m128(r, e);
2220    }
2221
2222    #[simd_test(enable = "sse")]
2223    unsafe fn test_mm_xor_ps() {
2224        let a = transmute(u32x4::splat(0b0011));
2225        let b = transmute(u32x4::splat(0b0101));
2226        let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
2227        let e = transmute(u32x4::splat(0b0110));
2228        assert_eq_m128(r, e);
2229    }
2230
2231    #[simd_test(enable = "sse")]
2232    unsafe fn test_mm_cmpeq_ss() {
2233        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2234        let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
2235        let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
2236        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
2237        assert_eq!(r, e);
2238
2239        let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2240        let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
2241        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
2242        assert_eq!(r2, e2);
2243    }
2244
2245    #[simd_test(enable = "sse")]
2246    unsafe fn test_mm_cmplt_ss() {
2247        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2248        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2249        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2250        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2251
2252        let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
2257        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2258        assert_eq!(rb, eb);
2259
2260        let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
2261        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2262        assert_eq!(rc, ec);
2263
2264        let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
2265        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2266        assert_eq!(rd, ed);
2267    }
2268
2269    #[simd_test(enable = "sse")]
2270    unsafe fn test_mm_cmple_ss() {
2271        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2272        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2273        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2274        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2275
2276        let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
2281        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2282        assert_eq!(rb, eb);
2283
2284        let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
2285        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2286        assert_eq!(rc, ec);
2287
2288        let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
2289        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2290        assert_eq!(rd, ed);
2291    }
2292
2293    #[simd_test(enable = "sse")]
2294    unsafe fn test_mm_cmpgt_ss() {
2295        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2296        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2297        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2298        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2299
2300        let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
2305        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2306        assert_eq!(rb, eb);
2307
2308        let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
2309        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2310        assert_eq!(rc, ec);
2311
2312        let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
2313        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2314        assert_eq!(rd, ed);
2315    }
2316
2317    #[simd_test(enable = "sse")]
2318    unsafe fn test_mm_cmpge_ss() {
2319        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2320        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2321        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2322        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2323
2324        let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
2329        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2330        assert_eq!(rb, eb);
2331
2332        let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
2333        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2334        assert_eq!(rc, ec);
2335
2336        let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
2337        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2338        assert_eq!(rd, ed);
2339    }
2340
2341    #[simd_test(enable = "sse")]
2342    unsafe fn test_mm_cmpneq_ss() {
2343        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2344        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2345        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2346        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2347
2348        let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
2353        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2354        assert_eq!(rb, eb);
2355
2356        let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
2357        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2358        assert_eq!(rc, ec);
2359
2360        let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
2361        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2362        assert_eq!(rd, ed);
2363    }
2364
2365    #[simd_test(enable = "sse")]
2366    unsafe fn test_mm_cmpnlt_ss() {
2367        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2373        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2374        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2375        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2376
2377        let b1 = !0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
2382        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2383        assert_eq!(rb, eb);
2384
2385        let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
2386        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2387        assert_eq!(rc, ec);
2388
2389        let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
2390        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2391        assert_eq!(rd, ed);
2392    }
2393
2394    #[simd_test(enable = "sse")]
2395    unsafe fn test_mm_cmpnle_ss() {
2396        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2402        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2403        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2404        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2405
2406        let b1 = !0u32; let c1 = 0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
2411        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2412        assert_eq!(rb, eb);
2413
2414        let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
2415        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2416        assert_eq!(rc, ec);
2417
2418        let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
2419        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2420        assert_eq!(rd, ed);
2421    }
2422
2423    #[simd_test(enable = "sse")]
2424    unsafe fn test_mm_cmpngt_ss() {
2425        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2431        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2432        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2433        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2434
2435        let b1 = 0u32; let c1 = !0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
2440        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2441        assert_eq!(rb, eb);
2442
2443        let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
2444        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2445        assert_eq!(rc, ec);
2446
2447        let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
2448        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2449        assert_eq!(rd, ed);
2450    }
2451
2452    #[simd_test(enable = "sse")]
2453    unsafe fn test_mm_cmpnge_ss() {
2454        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2460        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2461        let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
2462        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2463
2464        let b1 = 0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
2469        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2470        assert_eq!(rb, eb);
2471
2472        let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
2473        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2474        assert_eq!(rc, ec);
2475
2476        let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
2477        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2478        assert_eq!(rd, ed);
2479    }
2480
2481    #[simd_test(enable = "sse")]
2482    unsafe fn test_mm_cmpord_ss() {
2483        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2484        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2485        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2486        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2487
2488        let b1 = !0u32; let c1 = 0u32; let d1 = !0u32; let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
2493        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2494        assert_eq!(rb, eb);
2495
2496        let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
2497        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2498        assert_eq!(rc, ec);
2499
2500        let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
2501        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2502        assert_eq!(rd, ed);
2503    }
2504
2505    #[simd_test(enable = "sse")]
2506    unsafe fn test_mm_cmpunord_ss() {
2507        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
2508        let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
2509        let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
2510        let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
2511
2512        let b1 = 0u32; let c1 = !0u32; let d1 = 0u32; let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
2517        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
2518        assert_eq!(rb, eb);
2519
2520        let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
2521        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
2522        assert_eq!(rc, ec);
2523
2524        let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
2525        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
2526        assert_eq!(rd, ed);
2527    }
2528
2529    #[simd_test(enable = "sse")]
2530    unsafe fn test_mm_cmpeq_ps() {
2531        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2532        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2533        let tru = !0u32;
2534        let fls = 0u32;
2535
2536        let e = u32x4::new(fls, fls, tru, fls);
2537        let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
2538        assert_eq!(r, e);
2539    }
2540
2541    #[simd_test(enable = "sse")]
2542    unsafe fn test_mm_cmplt_ps() {
2543        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2544        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2545        let tru = !0u32;
2546        let fls = 0u32;
2547
2548        let e = u32x4::new(tru, fls, fls, fls);
2549        let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
2550        assert_eq!(r, e);
2551    }
2552
2553    #[simd_test(enable = "sse")]
2554    unsafe fn test_mm_cmple_ps() {
2555        let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
2556        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2557        let tru = !0u32;
2558        let fls = 0u32;
2559
2560        let e = u32x4::new(tru, fls, tru, fls);
2561        let r: u32x4 = transmute(_mm_cmple_ps(a, b));
2562        assert_eq!(r, e);
2563    }
2564
2565    #[simd_test(enable = "sse")]
2566    unsafe fn test_mm_cmpgt_ps() {
2567        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2568        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2569        let tru = !0u32;
2570        let fls = 0u32;
2571
2572        let e = u32x4::new(fls, tru, fls, fls);
2573        let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
2574        assert_eq!(r, e);
2575    }
2576
2577    #[simd_test(enable = "sse")]
2578    unsafe fn test_mm_cmpge_ps() {
2579        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2580        let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
2581        let tru = !0u32;
2582        let fls = 0u32;
2583
2584        let e = u32x4::new(fls, tru, tru, fls);
2585        let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
2586        assert_eq!(r, e);
2587    }
2588
2589    #[simd_test(enable = "sse")]
2590    unsafe fn test_mm_cmpneq_ps() {
2591        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2592        let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
2593        let tru = !0u32;
2594        let fls = 0u32;
2595
2596        let e = u32x4::new(tru, tru, fls, tru);
2597        let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
2598        assert_eq!(r, e);
2599    }
2600
2601    #[simd_test(enable = "sse")]
2602    unsafe fn test_mm_cmpnlt_ps() {
2603        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2604        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2605        let tru = !0u32;
2606        let fls = 0u32;
2607
2608        let e = u32x4::new(fls, tru, tru, tru);
2609        let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
2610        assert_eq!(r, e);
2611    }
2612
2613    #[simd_test(enable = "sse")]
2614    unsafe fn test_mm_cmpnle_ps() {
2615        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2616        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2617        let tru = !0u32;
2618        let fls = 0u32;
2619
2620        let e = u32x4::new(fls, tru, fls, tru);
2621        let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
2622        assert_eq!(r, e);
2623    }
2624
2625    #[simd_test(enable = "sse")]
2626    unsafe fn test_mm_cmpngt_ps() {
2627        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2628        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2629        let tru = !0u32;
2630        let fls = 0u32;
2631
2632        let e = u32x4::new(tru, fls, tru, tru);
2633        let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
2634        assert_eq!(r, e);
2635    }
2636
2637    #[simd_test(enable = "sse")]
2638    unsafe fn test_mm_cmpnge_ps() {
2639        let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
2640        let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
2641        let tru = !0u32;
2642        let fls = 0u32;
2643
2644        let e = u32x4::new(tru, fls, fls, tru);
2645        let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
2646        assert_eq!(r, e);
2647    }
2648
2649    #[simd_test(enable = "sse")]
2650    unsafe fn test_mm_cmpord_ps() {
2651        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2652        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2653        let tru = !0u32;
2654        let fls = 0u32;
2655
2656        let e = u32x4::new(tru, fls, fls, fls);
2657        let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
2658        assert_eq!(r, e);
2659    }
2660
2661    #[simd_test(enable = "sse")]
2662    unsafe fn test_mm_cmpunord_ps() {
2663        let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
2664        let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
2665        let tru = !0u32;
2666        let fls = 0u32;
2667
2668        let e = u32x4::new(fls, tru, tru, tru);
2669        let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
2670        assert_eq!(r, e);
2671    }
2672
2673    #[simd_test(enable = "sse")]
2674    unsafe fn test_mm_comieq_ss() {
2675        let aa = &[3.0f32, 12.0, 23.0, NAN];
2676        let bb = &[3.0f32, 47.5, 1.5, NAN];
2677
2678        let ee = &[1i32, 0, 0, 0];
2679
2680        for i in 0..4 {
2681            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2682            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2683
2684            let r = _mm_comieq_ss(a, b);
2685
2686            assert_eq!(
2687                ee[i], r,
2688                "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2689                a, b, r, ee[i], i
2690            );
2691        }
2692    }
2693
2694    #[simd_test(enable = "sse")]
2695    unsafe fn test_mm_comilt_ss() {
2696        let aa = &[3.0f32, 12.0, 23.0, NAN];
2697        let bb = &[3.0f32, 47.5, 1.5, NAN];
2698
2699        let ee = &[0i32, 1, 0, 0];
2700
2701        for i in 0..4 {
2702            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2703            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2704
2705            let r = _mm_comilt_ss(a, b);
2706
2707            assert_eq!(
2708                ee[i], r,
2709                "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2710                a, b, r, ee[i], i
2711            );
2712        }
2713    }
2714
2715    #[simd_test(enable = "sse")]
2716    unsafe fn test_mm_comile_ss() {
2717        let aa = &[3.0f32, 12.0, 23.0, NAN];
2718        let bb = &[3.0f32, 47.5, 1.5, NAN];
2719
2720        let ee = &[1i32, 1, 0, 0];
2721
2722        for i in 0..4 {
2723            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2724            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2725
2726            let r = _mm_comile_ss(a, b);
2727
2728            assert_eq!(
2729                ee[i], r,
2730                "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2731                a, b, r, ee[i], i
2732            );
2733        }
2734    }
2735
2736    #[simd_test(enable = "sse")]
2737    unsafe fn test_mm_comigt_ss() {
2738        let aa = &[3.0f32, 12.0, 23.0, NAN];
2739        let bb = &[3.0f32, 47.5, 1.5, NAN];
2740
2741        let ee = &[1i32, 0, 1, 0];
2742
2743        for i in 0..4 {
2744            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2745            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2746
2747            let r = _mm_comige_ss(a, b);
2748
2749            assert_eq!(
2750                ee[i], r,
2751                "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2752                a, b, r, ee[i], i
2753            );
2754        }
2755    }
2756
2757    #[simd_test(enable = "sse")]
2758    unsafe fn test_mm_comineq_ss() {
2759        let aa = &[3.0f32, 12.0, 23.0, NAN];
2760        let bb = &[3.0f32, 47.5, 1.5, NAN];
2761
2762        let ee = &[0i32, 1, 1, 1];
2763
2764        for i in 0..4 {
2765            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2766            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2767
2768            let r = _mm_comineq_ss(a, b);
2769
2770            assert_eq!(
2771                ee[i], r,
2772                "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2773                a, b, r, ee[i], i
2774            );
2775        }
2776    }
2777
2778    #[simd_test(enable = "sse")]
2779    unsafe fn test_mm_ucomieq_ss() {
2780        let aa = &[3.0f32, 12.0, 23.0, NAN];
2781        let bb = &[3.0f32, 47.5, 1.5, NAN];
2782
2783        let ee = &[1i32, 0, 0, 0];
2784
2785        for i in 0..4 {
2786            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2787            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2788
2789            let r = _mm_ucomieq_ss(a, b);
2790
2791            assert_eq!(
2792                ee[i], r,
2793                "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2794                a, b, r, ee[i], i
2795            );
2796        }
2797    }
2798
2799    #[simd_test(enable = "sse")]
2800    unsafe fn test_mm_ucomilt_ss() {
2801        let aa = &[3.0f32, 12.0, 23.0, NAN];
2802        let bb = &[3.0f32, 47.5, 1.5, NAN];
2803
2804        let ee = &[0i32, 1, 0, 0];
2805
2806        for i in 0..4 {
2807            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2808            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2809
2810            let r = _mm_ucomilt_ss(a, b);
2811
2812            assert_eq!(
2813                ee[i], r,
2814                "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2815                a, b, r, ee[i], i
2816            );
2817        }
2818    }
2819
2820    #[simd_test(enable = "sse")]
2821    unsafe fn test_mm_ucomile_ss() {
2822        let aa = &[3.0f32, 12.0, 23.0, NAN];
2823        let bb = &[3.0f32, 47.5, 1.5, NAN];
2824
2825        let ee = &[1i32, 1, 0, 0];
2826
2827        for i in 0..4 {
2828            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2829            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2830
2831            let r = _mm_ucomile_ss(a, b);
2832
2833            assert_eq!(
2834                ee[i], r,
2835                "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
2836                a, b, r, ee[i], i
2837            );
2838        }
2839    }
2840
2841    #[simd_test(enable = "sse")]
2842    unsafe fn test_mm_ucomigt_ss() {
2843        let aa = &[3.0f32, 12.0, 23.0, NAN];
2844        let bb = &[3.0f32, 47.5, 1.5, NAN];
2845
2846        let ee = &[0i32, 0, 1, 0];
2847
2848        for i in 0..4 {
2849            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2850            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2851
2852            let r = _mm_ucomigt_ss(a, b);
2853
2854            assert_eq!(
2855                ee[i], r,
2856                "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
2857                a, b, r, ee[i], i
2858            );
2859        }
2860    }
2861
2862    #[simd_test(enable = "sse")]
2863    unsafe fn test_mm_ucomige_ss() {
2864        let aa = &[3.0f32, 12.0, 23.0, NAN];
2865        let bb = &[3.0f32, 47.5, 1.5, NAN];
2866
2867        let ee = &[1i32, 0, 1, 0];
2868
2869        for i in 0..4 {
2870            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2871            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2872
2873            let r = _mm_ucomige_ss(a, b);
2874
2875            assert_eq!(
2876                ee[i], r,
2877                "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
2878                a, b, r, ee[i], i
2879            );
2880        }
2881    }
2882
2883    #[simd_test(enable = "sse")]
2884    unsafe fn test_mm_ucomineq_ss() {
2885        let aa = &[3.0f32, 12.0, 23.0, NAN];
2886        let bb = &[3.0f32, 47.5, 1.5, NAN];
2887
2888        let ee = &[0i32, 1, 1, 1];
2889
2890        for i in 0..4 {
2891            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2892            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2893
2894            let r = _mm_ucomineq_ss(a, b);
2895
2896            assert_eq!(
2897                ee[i], r,
2898                "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2899                a, b, r, ee[i], i
2900            );
2901        }
2902    }
2903
2904    #[allow(deprecated)] #[simd_test(enable = "sse")]
2906    #[cfg_attr(miri, ignore)] unsafe fn test_mm_comieq_ss_vs_ucomieq_ss() {
2908        let aa = &[3.0f32, NAN, 23.0, NAN];
2911        let bb = &[3.0f32, 47.5, NAN, NAN];
2912
2913        let ee = &[1i32, 0, 0, 0];
2914        let exc = &[0u32, 1, 1, 1]; for i in 0..4 {
2917            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
2918            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
2919
2920            _MM_SET_EXCEPTION_STATE(0);
2921            let r1 = _mm_comieq_ss(*black_box(&a), b);
2922            let s1 = _MM_GET_EXCEPTION_STATE();
2923
2924            _MM_SET_EXCEPTION_STATE(0);
2925            let r2 = _mm_ucomieq_ss(*black_box(&a), b);
2926            let s2 = _MM_GET_EXCEPTION_STATE();
2927
2928            assert_eq!(
2929                ee[i], r1,
2930                "_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2931                a, b, r1, ee[i], i
2932            );
2933            assert_eq!(
2934                ee[i], r2,
2935                "_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})",
2936                a, b, r2, ee[i], i
2937            );
2938            assert_eq!(
2939                s1,
2940                exc[i] * _MM_EXCEPT_INVALID,
2941                "_mm_comieq_ss() set exception flags: {} (i={})",
2942                s1,
2943                i
2944            );
2945            assert_eq!(
2946                s2,
2947                0, "_mm_ucomieq_ss() set exception flags: {} (i={})",
2949                s2,
2950                i
2951            );
2952        }
2953    }
2954
2955    #[simd_test(enable = "sse")]
2956    unsafe fn test_mm_cvtss_si32() {
2957        let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
2958        let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
2959        for i in 0..inputs.len() {
2960            let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
2961            let e = result[i];
2962            let r = _mm_cvtss_si32(x);
2963            assert_eq!(
2964                e, r,
2965                "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
2966                i, x, r, e
2967            );
2968        }
2969    }
2970
2971    #[simd_test(enable = "sse")]
2972    unsafe fn test_mm_cvttss_si32() {
2973        let inputs = &[
2974            (42.0f32, 42i32),
2975            (-31.4, -31),
2976            (-33.5, -33),
2977            (-34.5, -34),
2978            (10.999, 10),
2979            (-5.99, -5),
2980            (4.0e10, i32::MIN),
2981            (4.0e-10, 0),
2982            (NAN, i32::MIN),
2983            (2147483500.1, 2147483520),
2984        ];
2985        for (i, &(xi, e)) in inputs.iter().enumerate() {
2986            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
2987            let r = _mm_cvttss_si32(x);
2988            assert_eq!(
2989                e, r,
2990                "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
2991                i, x, r, e
2992            );
2993        }
2994    }
2995
2996    #[simd_test(enable = "sse")]
2997    unsafe fn test_mm_cvtsi32_ss() {
2998        let inputs = &[
2999            (4555i32, 4555.0f32),
3000            (322223333, 322223330.0),
3001            (-432, -432.0),
3002            (-322223333, -322223330.0),
3003        ];
3004
3005        for &(x, f) in inputs.iter() {
3006            let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3007            let r = _mm_cvtsi32_ss(a, x);
3008            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
3009            assert_eq_m128(e, r);
3010        }
3011    }
3012
3013    #[simd_test(enable = "sse")]
3014    unsafe fn test_mm_cvtss_f32() {
3015        let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
3016        assert_eq!(_mm_cvtss_f32(a), 312.0134);
3017    }
3018
3019    #[simd_test(enable = "sse")]
3020    unsafe fn test_mm_set_ss() {
3021        let r = _mm_set_ss(black_box(4.25));
3022        assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
3023    }
3024
3025    #[simd_test(enable = "sse")]
3026    unsafe fn test_mm_set1_ps() {
3027        let r1 = _mm_set1_ps(black_box(4.25));
3028        let r2 = _mm_set_ps1(black_box(4.25));
3029        assert_eq!(get_m128(r1, 0), 4.25);
3030        assert_eq!(get_m128(r1, 1), 4.25);
3031        assert_eq!(get_m128(r1, 2), 4.25);
3032        assert_eq!(get_m128(r1, 3), 4.25);
3033        assert_eq!(get_m128(r2, 0), 4.25);
3034        assert_eq!(get_m128(r2, 1), 4.25);
3035        assert_eq!(get_m128(r2, 2), 4.25);
3036        assert_eq!(get_m128(r2, 3), 4.25);
3037    }
3038
3039    #[simd_test(enable = "sse")]
3040    unsafe fn test_mm_set_ps() {
3041        let r = _mm_set_ps(
3042            black_box(1.0),
3043            black_box(2.0),
3044            black_box(3.0),
3045            black_box(4.0),
3046        );
3047        assert_eq!(get_m128(r, 0), 4.0);
3048        assert_eq!(get_m128(r, 1), 3.0);
3049        assert_eq!(get_m128(r, 2), 2.0);
3050        assert_eq!(get_m128(r, 3), 1.0);
3051    }
3052
3053    #[simd_test(enable = "sse")]
3054    unsafe fn test_mm_setr_ps() {
3055        let r = _mm_setr_ps(
3056            black_box(1.0),
3057            black_box(2.0),
3058            black_box(3.0),
3059            black_box(4.0),
3060        );
3061        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3062    }
3063
3064    #[simd_test(enable = "sse")]
3065    unsafe fn test_mm_setzero_ps() {
3066        let r = *black_box(&_mm_setzero_ps());
3067        assert_eq_m128(r, _mm_set1_ps(0.0));
3068    }
3069
3070    #[simd_test(enable = "sse")]
3071    unsafe fn test_mm_shuffle() {
3072        assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
3073        assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
3074        assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
3075    }
3076
3077    #[simd_test(enable = "sse")]
3078    unsafe fn test_mm_shuffle_ps() {
3079        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3080        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3081        let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
3082        assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
3083    }
3084
3085    #[simd_test(enable = "sse")]
3086    unsafe fn test_mm_unpackhi_ps() {
3087        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3088        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3089        let r = _mm_unpackhi_ps(a, b);
3090        assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0));
3091    }
3092
3093    #[simd_test(enable = "sse")]
3094    unsafe fn test_mm_unpacklo_ps() {
3095        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3096        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3097        let r = _mm_unpacklo_ps(a, b);
3098        assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0));
3099    }
3100
3101    #[simd_test(enable = "sse")]
3102    unsafe fn test_mm_movehl_ps() {
3103        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3104        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3105        let r = _mm_movehl_ps(a, b);
3106        assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0));
3107    }
3108
3109    #[simd_test(enable = "sse")]
3110    unsafe fn test_mm_movelh_ps() {
3111        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3112        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3113        let r = _mm_movelh_ps(a, b);
3114        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
3115    }
3116
3117    #[simd_test(enable = "sse")]
3118    unsafe fn test_mm_load_ss() {
3119        let a = 42.0f32;
3120        let r = _mm_load_ss(ptr::addr_of!(a));
3121        assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
3122    }
3123
3124    #[simd_test(enable = "sse")]
3125    unsafe fn test_mm_load1_ps() {
3126        let a = 42.0f32;
3127        let r = _mm_load1_ps(ptr::addr_of!(a));
3128        assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
3129    }
3130
3131    #[simd_test(enable = "sse")]
3132    unsafe fn test_mm_load_ps() {
3133        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3134
3135        let mut p = vals.as_ptr();
3136        let mut fixup = 0.0f32;
3137
3138        let unalignment = (p as usize) & 0xf;
3142        if unalignment != 0 {
3143            let delta = (16 - unalignment) >> 2;
3144            fixup = delta as f32;
3145            p = p.add(delta);
3146        }
3147
3148        let r = _mm_load_ps(p);
3149        let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
3150        assert_eq_m128(r, e);
3151    }
3152
3153    #[simd_test(enable = "sse")]
3154    unsafe fn test_mm_loadu_ps() {
3155        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3156        let p = vals.as_ptr().add(3);
3157        let r = _mm_loadu_ps(black_box(p));
3158        assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
3159    }
3160
3161    #[simd_test(enable = "sse")]
3162    unsafe fn test_mm_loadr_ps() {
3163        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
3164
3165        let mut p = vals.as_ptr();
3166        let mut fixup = 0.0f32;
3167
3168        let unalignment = (p as usize) & 0xf;
3172        if unalignment != 0 {
3173            let delta = (16 - unalignment) >> 2;
3174            fixup = delta as f32;
3175            p = p.add(delta);
3176        }
3177
3178        let r = _mm_loadr_ps(p);
3179        let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
3180        assert_eq_m128(r, e);
3181    }
3182
3183    #[simd_test(enable = "sse")]
3184    unsafe fn test_mm_store_ss() {
3185        let mut vals = [0.0f32; 8];
3186        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3187        _mm_store_ss(vals.as_mut_ptr().add(1), a);
3188
3189        assert_eq!(vals[0], 0.0);
3190        assert_eq!(vals[1], 1.0);
3191        assert_eq!(vals[2], 0.0);
3192    }
3193
3194    #[simd_test(enable = "sse")]
3195    unsafe fn test_mm_store1_ps() {
3196        let mut vals = [0.0f32; 8];
3197        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3198
3199        let mut ofs = 0;
3200        let mut p = vals.as_mut_ptr();
3201
3202        if (p as usize) & 0xf != 0 {
3203            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3204            p = p.add(ofs);
3205        }
3206
3207        _mm_store1_ps(p, *black_box(&a));
3208
3209        if ofs > 0 {
3210            assert_eq!(vals[ofs - 1], 0.0);
3211        }
3212        assert_eq!(vals[ofs + 0], 1.0);
3213        assert_eq!(vals[ofs + 1], 1.0);
3214        assert_eq!(vals[ofs + 2], 1.0);
3215        assert_eq!(vals[ofs + 3], 1.0);
3216        assert_eq!(vals[ofs + 4], 0.0);
3217    }
3218
3219    #[simd_test(enable = "sse")]
3220    unsafe fn test_mm_store_ps() {
3221        let mut vals = [0.0f32; 8];
3222        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3223
3224        let mut ofs = 0;
3225        let mut p = vals.as_mut_ptr();
3226
3227        if (p as usize) & 0xf != 0 {
3229            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3230            p = p.add(ofs);
3231        }
3232
3233        _mm_store_ps(p, *black_box(&a));
3234
3235        if ofs > 0 {
3236            assert_eq!(vals[ofs - 1], 0.0);
3237        }
3238        assert_eq!(vals[ofs + 0], 1.0);
3239        assert_eq!(vals[ofs + 1], 2.0);
3240        assert_eq!(vals[ofs + 2], 3.0);
3241        assert_eq!(vals[ofs + 3], 4.0);
3242        assert_eq!(vals[ofs + 4], 0.0);
3243    }
3244
3245    #[simd_test(enable = "sse")]
3246    unsafe fn test_mm_storer_ps() {
3247        let mut vals = [0.0f32; 8];
3248        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3249
3250        let mut ofs = 0;
3251        let mut p = vals.as_mut_ptr();
3252
3253        if (p as usize) & 0xf != 0 {
3255            ofs = (16 - ((p as usize) & 0xf)) >> 2;
3256            p = p.add(ofs);
3257        }
3258
3259        _mm_storer_ps(p, *black_box(&a));
3260
3261        if ofs > 0 {
3262            assert_eq!(vals[ofs - 1], 0.0);
3263        }
3264        assert_eq!(vals[ofs + 0], 4.0);
3265        assert_eq!(vals[ofs + 1], 3.0);
3266        assert_eq!(vals[ofs + 2], 2.0);
3267        assert_eq!(vals[ofs + 3], 1.0);
3268        assert_eq!(vals[ofs + 4], 0.0);
3269    }
3270
3271    #[simd_test(enable = "sse")]
3272    unsafe fn test_mm_storeu_ps() {
3273        let mut vals = [0.0f32; 8];
3274        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3275
3276        let mut ofs = 0;
3277        let mut p = vals.as_mut_ptr();
3278
3279        if (p as usize) & 0xf == 0 {
3281            ofs = 1;
3282            p = p.add(1);
3283        }
3284
3285        _mm_storeu_ps(p, *black_box(&a));
3286
3287        if ofs > 0 {
3288            assert_eq!(vals[ofs - 1], 0.0);
3289        }
3290        assert_eq!(vals[ofs + 0], 1.0);
3291        assert_eq!(vals[ofs + 1], 2.0);
3292        assert_eq!(vals[ofs + 2], 3.0);
3293        assert_eq!(vals[ofs + 3], 4.0);
3294        assert_eq!(vals[ofs + 4], 0.0);
3295    }
3296
3297    #[simd_test(enable = "sse")]
3298    unsafe fn test_mm_move_ss() {
3299        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3300        let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3301
3302        let r = _mm_move_ss(a, b);
3303        let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0);
3304        assert_eq_m128(e, r);
3305    }
3306
3307    #[simd_test(enable = "sse")]
3308    unsafe fn test_mm_movemask_ps() {
3309        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
3310        assert_eq!(r, 0b0101);
3311
3312        let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
3313        assert_eq!(r, 0b0111);
3314    }
3315
3316    #[simd_test(enable = "sse")]
3317    #[cfg_attr(miri, ignore)]
3319    unsafe fn test_mm_sfence() {
3320        _mm_sfence();
3321    }
3322
3323    #[allow(deprecated)] #[simd_test(enable = "sse")]
3325    #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_1() {
3327        let saved_csr = _mm_getcsr();
3328
3329        let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3330        let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
3331
3332        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
3333        let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3334
3335        _mm_setcsr(saved_csr);
3336
3337        let exp = _mm_setr_ps(0.0, 0.0, 0.0, 1.0);
3338        assert_eq_m128(r, exp); }
3340
3341    #[allow(deprecated)] #[simd_test(enable = "sse")]
3343    #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_2() {
3345        let saved_csr = _mm_getcsr();
3348
3349        let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3350        let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0);
3351
3352        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF);
3353        let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3354
3355        _mm_setcsr(saved_csr);
3356
3357        let exp = _mm_setr_ps(1.1e-39, 0.0, 0.0, 1.0);
3358        assert_eq_m128(r, exp); }
3360
3361    #[allow(deprecated)] #[simd_test(enable = "sse")]
3363    #[cfg_attr(miri, ignore)] unsafe fn test_mm_getcsr_setcsr_underflow() {
3365        _MM_SET_EXCEPTION_STATE(0);
3366
3367        let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0);
3368        let b = _mm_setr_ps(1e-5, 0.0, 0.0, 1.0);
3369
3370        assert_eq!(_MM_GET_EXCEPTION_STATE(), 0); let r = _mm_mul_ps(*black_box(&a), *black_box(&b));
3373
3374        let exp = _mm_setr_ps(1.1e-41, 0.0, 0.0, 1.0);
3375        assert_eq_m128(r, exp);
3376
3377        let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0;
3378        assert!(underflow);
3379    }
3380
3381    #[simd_test(enable = "sse")]
3382    unsafe fn test_MM_TRANSPOSE4_PS() {
3383        let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3384        let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
3385        let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
3386        let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0);
3387
3388        _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d);
3389
3390        assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0));
3391        assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0));
3392        assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0));
3393        assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0));
3394    }
3395
3396    #[repr(align(16))]
3397    struct Memory {
3398        pub data: [f32; 4],
3399    }
3400
3401    #[simd_test(enable = "sse")]
3402    #[cfg_attr(miri, ignore)]
3405    unsafe fn test_mm_stream_ps() {
3406        let a = _mm_set1_ps(7.0);
3407        let mut mem = Memory { data: [-1.0; 4] };
3408
3409        _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
3410        for i in 0..4 {
3411            assert_eq!(mem.data[i], get_m128(a, i));
3412        }
3413    }
3414}