1use crate::{
17    core_arch::{simd::*, x86::*},
18    intrinsics::simd::*,
19    mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34    simd_add(a, b)
35}
36
37#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46    simd_add(a, b)
47}
48
49#[inline]
54#[target_feature(enable = "avx")]
55#[cfg_attr(test, assert_instr(vandp))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
59    let a: u64x4 = transmute(a);
60    let b: u64x4 = transmute(b);
61    transmute(simd_and(a, b))
62}
63
64#[inline]
69#[target_feature(enable = "avx")]
70#[cfg_attr(test, assert_instr(vandps))]
71#[stable(feature = "simd_x86", since = "1.27.0")]
72pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
73    let a: u32x8 = transmute(a);
74    let b: u32x8 = transmute(b);
75    transmute(simd_and(a, b))
76}
77
78#[inline]
83#[target_feature(enable = "avx")]
84#[cfg_attr(test, assert_instr(vorp))]
86#[stable(feature = "simd_x86", since = "1.27.0")]
87pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
88    let a: u64x4 = transmute(a);
89    let b: u64x4 = transmute(b);
90    transmute(simd_or(a, b))
91}
92
93#[inline]
98#[target_feature(enable = "avx")]
99#[cfg_attr(test, assert_instr(vorps))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
102    let a: u32x8 = transmute(a);
103    let b: u32x8 = transmute(b);
104    transmute(simd_or(a, b))
105}
106
107#[inline]
112#[target_feature(enable = "avx")]
113#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
114#[rustc_legacy_const_generics(2)]
115#[stable(feature = "simd_x86", since = "1.27.0")]
116pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
117    static_assert_uimm_bits!(MASK, 8);
118    simd_shuffle!(
119        a,
120        b,
121        [
122            MASK as u32 & 0b1,
123            ((MASK as u32 >> 1) & 0b1) + 4,
124            ((MASK as u32 >> 2) & 0b1) + 2,
125            ((MASK as u32 >> 3) & 0b1) + 6,
126        ],
127    )
128}
129
130#[inline]
135#[target_feature(enable = "avx")]
136#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
137#[rustc_legacy_const_generics(2)]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
140    static_assert_uimm_bits!(MASK, 8);
141    simd_shuffle!(
142        a,
143        b,
144        [
145            MASK as u32 & 0b11,
146            (MASK as u32 >> 2) & 0b11,
147            ((MASK as u32 >> 4) & 0b11) + 8,
148            ((MASK as u32 >> 6) & 0b11) + 8,
149            (MASK as u32 & 0b11) + 4,
150            ((MASK as u32 >> 2) & 0b11) + 4,
151            ((MASK as u32 >> 4) & 0b11) + 12,
152            ((MASK as u32 >> 6) & 0b11) + 12,
153        ],
154    )
155}
156
157#[inline]
162#[target_feature(enable = "avx")]
163#[cfg_attr(test, assert_instr(vandnp))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
166    let a: u64x4 = transmute(a);
167    let b: u64x4 = transmute(b);
168    transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
169}
170
171#[inline]
177#[target_feature(enable = "avx")]
178#[cfg_attr(test, assert_instr(vandnps))]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
181    let a: u32x8 = transmute(a);
182    let b: u32x8 = transmute(b);
183    transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
184}
185
186#[inline]
191#[target_feature(enable = "avx")]
192#[cfg_attr(test, assert_instr(vmaxpd))]
193#[stable(feature = "simd_x86", since = "1.27.0")]
194pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
195    vmaxpd(a, b)
196}
197
198#[inline]
203#[target_feature(enable = "avx")]
204#[cfg_attr(test, assert_instr(vmaxps))]
205#[stable(feature = "simd_x86", since = "1.27.0")]
206pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
207    vmaxps(a, b)
208}
209
210#[inline]
215#[target_feature(enable = "avx")]
216#[cfg_attr(test, assert_instr(vminpd))]
217#[stable(feature = "simd_x86", since = "1.27.0")]
218pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
219    vminpd(a, b)
220}
221
222#[inline]
227#[target_feature(enable = "avx")]
228#[cfg_attr(test, assert_instr(vminps))]
229#[stable(feature = "simd_x86", since = "1.27.0")]
230pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
231    vminps(a, b)
232}
233
234#[inline]
239#[target_feature(enable = "avx")]
240#[cfg_attr(test, assert_instr(vmulpd))]
241#[stable(feature = "simd_x86", since = "1.27.0")]
242pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
243    simd_mul(a, b)
244}
245
246#[inline]
251#[target_feature(enable = "avx")]
252#[cfg_attr(test, assert_instr(vmulps))]
253#[stable(feature = "simd_x86", since = "1.27.0")]
254pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
255    simd_mul(a, b)
256}
257
258#[inline]
263#[target_feature(enable = "avx")]
264#[cfg_attr(test, assert_instr(vaddsubpd))]
265#[stable(feature = "simd_x86", since = "1.27.0")]
266pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
267    let a = a.as_f64x4();
268    let b = b.as_f64x4();
269    let add = simd_add(a, b);
270    let sub = simd_sub(a, b);
271    simd_shuffle!(add, sub, [4, 1, 6, 3])
272}
273
274#[inline]
279#[target_feature(enable = "avx")]
280#[cfg_attr(test, assert_instr(vaddsubps))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
283    let a = a.as_f32x8();
284    let b = b.as_f32x8();
285    let add = simd_add(a, b);
286    let sub = simd_sub(a, b);
287    simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
288}
289
290#[inline]
295#[target_feature(enable = "avx")]
296#[cfg_attr(test, assert_instr(vsubpd))]
297#[stable(feature = "simd_x86", since = "1.27.0")]
298pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
299    simd_sub(a, b)
300}
301
302#[inline]
307#[target_feature(enable = "avx")]
308#[cfg_attr(test, assert_instr(vsubps))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
311    simd_sub(a, b)
312}
313
314#[inline]
319#[target_feature(enable = "avx")]
320#[cfg_attr(test, assert_instr(vdivps))]
321#[stable(feature = "simd_x86", since = "1.27.0")]
322pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
323    simd_div(a, b)
324}
325
326#[inline]
331#[target_feature(enable = "avx")]
332#[cfg_attr(test, assert_instr(vdivpd))]
333#[stable(feature = "simd_x86", since = "1.27.0")]
334pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
335    simd_div(a, b)
336}
337
338#[inline]
352#[target_feature(enable = "avx")]
353#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
354#[rustc_legacy_const_generics(1)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
357    static_assert_uimm_bits!(ROUNDING, 4);
358    roundpd256(a, ROUNDING)
359}
360
361#[inline]
366#[target_feature(enable = "avx")]
367#[cfg_attr(test, assert_instr(vroundpd))]
368#[stable(feature = "simd_x86", since = "1.27.0")]
369pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
370    simd_ceil(a)
371}
372
373#[inline]
378#[target_feature(enable = "avx")]
379#[cfg_attr(test, assert_instr(vroundpd))]
380#[stable(feature = "simd_x86", since = "1.27.0")]
381pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
382    simd_floor(a)
383}
384
385#[inline]
399#[target_feature(enable = "avx")]
400#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
401#[rustc_legacy_const_generics(1)]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
404    static_assert_uimm_bits!(ROUNDING, 4);
405    roundps256(a, ROUNDING)
406}
407
408#[inline]
413#[target_feature(enable = "avx")]
414#[cfg_attr(test, assert_instr(vroundps))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
417    simd_ceil(a)
418}
419
420#[inline]
425#[target_feature(enable = "avx")]
426#[cfg_attr(test, assert_instr(vroundps))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
429    simd_floor(a)
430}
431
432#[inline]
437#[target_feature(enable = "avx")]
438#[cfg_attr(test, assert_instr(vsqrtps))]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
441    simd_fsqrt(a)
442}
443
444#[inline]
449#[target_feature(enable = "avx")]
450#[cfg_attr(test, assert_instr(vsqrtpd))]
451#[stable(feature = "simd_x86", since = "1.27.0")]
452pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
453    simd_fsqrt(a)
454}
455
456#[inline]
461#[target_feature(enable = "avx")]
462#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
466#[rustc_legacy_const_generics(2)]
467#[stable(feature = "simd_x86", since = "1.27.0")]
468pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
469    static_assert_uimm_bits!(IMM4, 4);
470    simd_shuffle!(
471        a,
472        b,
473        [
474            ((IMM4 as u32 >> 0) & 1) * 4 + 0,
475            ((IMM4 as u32 >> 1) & 1) * 4 + 1,
476            ((IMM4 as u32 >> 2) & 1) * 4 + 2,
477            ((IMM4 as u32 >> 3) & 1) * 4 + 3,
478        ],
479    )
480}
481
482#[inline]
487#[target_feature(enable = "avx")]
488#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
489#[rustc_legacy_const_generics(2)]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
492    static_assert_uimm_bits!(IMM8, 8);
493    simd_shuffle!(
494        a,
495        b,
496        [
497            ((IMM8 as u32 >> 0) & 1) * 8 + 0,
498            ((IMM8 as u32 >> 1) & 1) * 8 + 1,
499            ((IMM8 as u32 >> 2) & 1) * 8 + 2,
500            ((IMM8 as u32 >> 3) & 1) * 8 + 3,
501            ((IMM8 as u32 >> 4) & 1) * 8 + 4,
502            ((IMM8 as u32 >> 5) & 1) * 8 + 5,
503            ((IMM8 as u32 >> 6) & 1) * 8 + 6,
504            ((IMM8 as u32 >> 7) & 1) * 8 + 7,
505        ],
506    )
507}
508
509#[inline]
514#[target_feature(enable = "avx")]
515#[cfg_attr(test, assert_instr(vblendvpd))]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
518    let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
519    transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
520}
521
522#[inline]
527#[target_feature(enable = "avx")]
528#[cfg_attr(test, assert_instr(vblendvps))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
531    let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
532    transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
533}
534
535#[inline]
542#[target_feature(enable = "avx")]
543#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
544#[rustc_legacy_const_generics(2)]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
547    static_assert_uimm_bits!(IMM8, 8);
548    vdpps(a, b, IMM8)
549}
550
551#[inline]
558#[target_feature(enable = "avx")]
559#[cfg_attr(test, assert_instr(vhaddpd))]
560#[stable(feature = "simd_x86", since = "1.27.0")]
561pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
562    vhaddpd(a, b)
563}
564
565#[inline]
573#[target_feature(enable = "avx")]
574#[cfg_attr(test, assert_instr(vhaddps))]
575#[stable(feature = "simd_x86", since = "1.27.0")]
576pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
577    vhaddps(a, b)
578}
579
580#[inline]
587#[target_feature(enable = "avx")]
588#[cfg_attr(test, assert_instr(vhsubpd))]
589#[stable(feature = "simd_x86", since = "1.27.0")]
590pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
591    vhsubpd(a, b)
592}
593
594#[inline]
602#[target_feature(enable = "avx")]
603#[cfg_attr(test, assert_instr(vhsubps))]
604#[stable(feature = "simd_x86", since = "1.27.0")]
605pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
606    vhsubps(a, b)
607}
608
609#[inline]
614#[target_feature(enable = "avx")]
615#[cfg_attr(test, assert_instr(vxorp))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
618    let a: u64x4 = transmute(a);
619    let b: u64x4 = transmute(b);
620    transmute(simd_xor(a, b))
621}
622
623#[inline]
628#[target_feature(enable = "avx")]
629#[cfg_attr(test, assert_instr(vxorps))]
630#[stable(feature = "simd_x86", since = "1.27.0")]
631pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
632    let a: u32x8 = transmute(a);
633    let b: u32x8 = transmute(b);
634    transmute(simd_xor(a, b))
635}
636
637#[stable(feature = "simd_x86", since = "1.27.0")]
639pub const _CMP_EQ_OQ: i32 = 0x00;
640#[stable(feature = "simd_x86", since = "1.27.0")]
642pub const _CMP_LT_OS: i32 = 0x01;
643#[stable(feature = "simd_x86", since = "1.27.0")]
645pub const _CMP_LE_OS: i32 = 0x02;
646#[stable(feature = "simd_x86", since = "1.27.0")]
648pub const _CMP_UNORD_Q: i32 = 0x03;
649#[stable(feature = "simd_x86", since = "1.27.0")]
651pub const _CMP_NEQ_UQ: i32 = 0x04;
652#[stable(feature = "simd_x86", since = "1.27.0")]
654pub const _CMP_NLT_US: i32 = 0x05;
655#[stable(feature = "simd_x86", since = "1.27.0")]
657pub const _CMP_NLE_US: i32 = 0x06;
658#[stable(feature = "simd_x86", since = "1.27.0")]
660pub const _CMP_ORD_Q: i32 = 0x07;
661#[stable(feature = "simd_x86", since = "1.27.0")]
663pub const _CMP_EQ_UQ: i32 = 0x08;
664#[stable(feature = "simd_x86", since = "1.27.0")]
666pub const _CMP_NGE_US: i32 = 0x09;
667#[stable(feature = "simd_x86", since = "1.27.0")]
669pub const _CMP_NGT_US: i32 = 0x0a;
670#[stable(feature = "simd_x86", since = "1.27.0")]
672pub const _CMP_FALSE_OQ: i32 = 0x0b;
673#[stable(feature = "simd_x86", since = "1.27.0")]
675pub const _CMP_NEQ_OQ: i32 = 0x0c;
676#[stable(feature = "simd_x86", since = "1.27.0")]
678pub const _CMP_GE_OS: i32 = 0x0d;
679#[stable(feature = "simd_x86", since = "1.27.0")]
681pub const _CMP_GT_OS: i32 = 0x0e;
682#[stable(feature = "simd_x86", since = "1.27.0")]
684pub const _CMP_TRUE_UQ: i32 = 0x0f;
685#[stable(feature = "simd_x86", since = "1.27.0")]
687pub const _CMP_EQ_OS: i32 = 0x10;
688#[stable(feature = "simd_x86", since = "1.27.0")]
690pub const _CMP_LT_OQ: i32 = 0x11;
691#[stable(feature = "simd_x86", since = "1.27.0")]
693pub const _CMP_LE_OQ: i32 = 0x12;
694#[stable(feature = "simd_x86", since = "1.27.0")]
696pub const _CMP_UNORD_S: i32 = 0x13;
697#[stable(feature = "simd_x86", since = "1.27.0")]
699pub const _CMP_NEQ_US: i32 = 0x14;
700#[stable(feature = "simd_x86", since = "1.27.0")]
702pub const _CMP_NLT_UQ: i32 = 0x15;
703#[stable(feature = "simd_x86", since = "1.27.0")]
705pub const _CMP_NLE_UQ: i32 = 0x16;
706#[stable(feature = "simd_x86", since = "1.27.0")]
708pub const _CMP_ORD_S: i32 = 0x17;
709#[stable(feature = "simd_x86", since = "1.27.0")]
711pub const _CMP_EQ_US: i32 = 0x18;
712#[stable(feature = "simd_x86", since = "1.27.0")]
714pub const _CMP_NGE_UQ: i32 = 0x19;
715#[stable(feature = "simd_x86", since = "1.27.0")]
717pub const _CMP_NGT_UQ: i32 = 0x1a;
718#[stable(feature = "simd_x86", since = "1.27.0")]
720pub const _CMP_FALSE_OS: i32 = 0x1b;
721#[stable(feature = "simd_x86", since = "1.27.0")]
723pub const _CMP_NEQ_OS: i32 = 0x1c;
724#[stable(feature = "simd_x86", since = "1.27.0")]
726pub const _CMP_GE_OQ: i32 = 0x1d;
727#[stable(feature = "simd_x86", since = "1.27.0")]
729pub const _CMP_GT_OQ: i32 = 0x1e;
730#[stable(feature = "simd_x86", since = "1.27.0")]
732pub const _CMP_TRUE_US: i32 = 0x1f;
733
734#[inline]
740#[target_feature(enable = "avx")]
741#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
743#[stable(feature = "simd_x86", since = "1.27.0")]
744pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
745    static_assert_uimm_bits!(IMM5, 5);
746    vcmppd(a, b, const { IMM5 as i8 })
747}
748
749#[inline]
755#[target_feature(enable = "avx")]
756#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
758#[stable(feature = "simd_x86", since = "1.27.0")]
759pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
760    static_assert_uimm_bits!(IMM5, 5);
761    vcmppd256(a, b, IMM5 as u8)
762}
763
764#[inline]
770#[target_feature(enable = "avx")]
771#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
773#[stable(feature = "simd_x86", since = "1.27.0")]
774pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
775    static_assert_uimm_bits!(IMM5, 5);
776    vcmpps(a, b, const { IMM5 as i8 })
777}
778
779#[inline]
785#[target_feature(enable = "avx")]
786#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
788#[stable(feature = "simd_x86", since = "1.27.0")]
789pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
790    static_assert_uimm_bits!(IMM5, 5);
791    vcmpps256(a, b, const { IMM5 as u8 })
792}
793
794#[inline]
802#[target_feature(enable = "avx")]
803#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
807    static_assert_uimm_bits!(IMM5, 5);
808    vcmpsd(a, b, IMM5 as i8)
809}
810
811#[inline]
819#[target_feature(enable = "avx")]
820#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
822#[stable(feature = "simd_x86", since = "1.27.0")]
823pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
824    static_assert_uimm_bits!(IMM5, 5);
825    vcmpss(a, b, IMM5 as i8)
826}
827
828#[inline]
833#[target_feature(enable = "avx")]
834#[cfg_attr(test, assert_instr(vcvtdq2pd))]
835#[stable(feature = "simd_x86", since = "1.27.0")]
836pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
837    simd_cast(a.as_i32x4())
838}
839
840#[inline]
845#[target_feature(enable = "avx")]
846#[cfg_attr(test, assert_instr(vcvtdq2ps))]
847#[stable(feature = "simd_x86", since = "1.27.0")]
848pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
849    simd_cast(a.as_i32x8())
850}
851
852#[inline]
857#[target_feature(enable = "avx")]
858#[cfg_attr(test, assert_instr(vcvtpd2ps))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
861    simd_cast(a)
862}
863
864#[inline]
869#[target_feature(enable = "avx")]
870#[cfg_attr(test, assert_instr(vcvtps2dq))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
873    transmute(vcvtps2dq(a))
874}
875
876#[inline]
881#[target_feature(enable = "avx")]
882#[cfg_attr(test, assert_instr(vcvtps2pd))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
885    simd_cast(a)
886}
887
888#[inline]
892#[target_feature(enable = "avx")]
893#[stable(feature = "simd_x86", since = "1.27.0")]
895pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
896    simd_extract!(a, 0)
897}
898
899#[inline]
904#[target_feature(enable = "avx")]
905#[cfg_attr(test, assert_instr(vcvttpd2dq))]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
908    transmute(vcvttpd2dq(a))
909}
910
911#[inline]
916#[target_feature(enable = "avx")]
917#[cfg_attr(test, assert_instr(vcvtpd2dq))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
920    transmute(vcvtpd2dq(a))
921}
922
923#[inline]
928#[target_feature(enable = "avx")]
929#[cfg_attr(test, assert_instr(vcvttps2dq))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
932    transmute(vcvttps2dq(a))
933}
934
935#[inline]
940#[target_feature(enable = "avx")]
941#[cfg_attr(
942    all(test, not(target_env = "msvc")),
943    assert_instr(vextractf128, IMM1 = 1)
944)]
945#[rustc_legacy_const_generics(1)]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
948    static_assert_uimm_bits!(IMM1, 1);
949    simd_shuffle!(
950        a,
951        _mm256_undefined_ps(),
952        [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
953    )
954}
955
956#[inline]
961#[target_feature(enable = "avx")]
962#[cfg_attr(
963    all(test, not(target_env = "msvc")),
964    assert_instr(vextractf128, IMM1 = 1)
965)]
966#[rustc_legacy_const_generics(1)]
967#[stable(feature = "simd_x86", since = "1.27.0")]
968pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
969    static_assert_uimm_bits!(IMM1, 1);
970    simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
971}
972
973#[inline]
977#[target_feature(enable = "avx")]
978#[cfg_attr(
979    all(test, not(target_env = "msvc")),
980    assert_instr(vextractf128, IMM1 = 1)
981)]
982#[rustc_legacy_const_generics(1)]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
985    static_assert_uimm_bits!(IMM1, 1);
986    let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
987    transmute(dst)
988}
989
990#[inline]
994#[target_feature(enable = "avx")]
995#[rustc_legacy_const_generics(1)]
997#[stable(feature = "simd_x86", since = "1.27.0")]
998pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
999    static_assert_uimm_bits!(INDEX, 3);
1000    simd_extract!(a.as_i32x8(), INDEX as u32)
1001}
1002
1003#[inline]
1007#[target_feature(enable = "avx")]
1008#[stable(feature = "simd_x86", since = "1.27.0")]
1009pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1010    simd_extract!(a.as_i32x8(), 0)
1011}
1012
1013#[inline]
1017#[target_feature(enable = "avx")]
1018#[cfg_attr(test, assert_instr(vzeroall))]
1019#[stable(feature = "simd_x86", since = "1.27.0")]
1020pub unsafe fn _mm256_zeroall() {
1021    vzeroall()
1022}
1023
1024#[inline]
1029#[target_feature(enable = "avx")]
1030#[cfg_attr(test, assert_instr(vzeroupper))]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub unsafe fn _mm256_zeroupper() {
1033    vzeroupper()
1034}
1035
1036#[inline]
1041#[target_feature(enable = "avx")]
1042#[cfg_attr(test, assert_instr(vpermilps))]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1045    vpermilps256(a, b.as_i32x8())
1046}
1047
1048#[inline]
1053#[target_feature(enable = "avx")]
1054#[cfg_attr(test, assert_instr(vpermilps))]
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1057    vpermilps(a, b.as_i32x4())
1058}
1059
1060#[inline]
1065#[target_feature(enable = "avx")]
1066#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1067#[rustc_legacy_const_generics(1)]
1068#[stable(feature = "simd_x86", since = "1.27.0")]
1069pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1070    static_assert_uimm_bits!(IMM8, 8);
1071    simd_shuffle!(
1072        a,
1073        _mm256_undefined_ps(),
1074        [
1075            (IMM8 as u32 >> 0) & 0b11,
1076            (IMM8 as u32 >> 2) & 0b11,
1077            (IMM8 as u32 >> 4) & 0b11,
1078            (IMM8 as u32 >> 6) & 0b11,
1079            ((IMM8 as u32 >> 0) & 0b11) + 4,
1080            ((IMM8 as u32 >> 2) & 0b11) + 4,
1081            ((IMM8 as u32 >> 4) & 0b11) + 4,
1082            ((IMM8 as u32 >> 6) & 0b11) + 4,
1083        ],
1084    )
1085}
1086
1087#[inline]
1092#[target_feature(enable = "avx")]
1093#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1094#[rustc_legacy_const_generics(1)]
1095#[stable(feature = "simd_x86", since = "1.27.0")]
1096pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1097    static_assert_uimm_bits!(IMM8, 8);
1098    simd_shuffle!(
1099        a,
1100        _mm_undefined_ps(),
1101        [
1102            (IMM8 as u32 >> 0) & 0b11,
1103            (IMM8 as u32 >> 2) & 0b11,
1104            (IMM8 as u32 >> 4) & 0b11,
1105            (IMM8 as u32 >> 6) & 0b11,
1106        ],
1107    )
1108}
1109
1110#[inline]
1115#[target_feature(enable = "avx")]
1116#[cfg_attr(test, assert_instr(vpermilpd))]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1119    vpermilpd256(a, b.as_i64x4())
1120}
1121
1122#[inline]
1127#[target_feature(enable = "avx")]
1128#[cfg_attr(test, assert_instr(vpermilpd))]
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1131    vpermilpd(a, b.as_i64x2())
1132}
1133
1134#[inline]
1139#[target_feature(enable = "avx")]
1140#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1141#[rustc_legacy_const_generics(1)]
1142#[stable(feature = "simd_x86", since = "1.27.0")]
1143pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1144    static_assert_uimm_bits!(IMM4, 4);
1145    simd_shuffle!(
1146        a,
1147        _mm256_undefined_pd(),
1148        [
1149            ((IMM4 as u32 >> 0) & 1),
1150            ((IMM4 as u32 >> 1) & 1),
1151            ((IMM4 as u32 >> 2) & 1) + 2,
1152            ((IMM4 as u32 >> 3) & 1) + 2,
1153        ],
1154    )
1155}
1156
1157#[inline]
1162#[target_feature(enable = "avx")]
1163#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1164#[rustc_legacy_const_generics(1)]
1165#[stable(feature = "simd_x86", since = "1.27.0")]
1166pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1167    static_assert_uimm_bits!(IMM2, 2);
1168    simd_shuffle!(
1169        a,
1170        _mm_undefined_pd(),
1171        [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1172    )
1173}
1174
1175#[inline]
1180#[target_feature(enable = "avx")]
1181#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1182#[rustc_legacy_const_generics(2)]
1183#[stable(feature = "simd_x86", since = "1.27.0")]
1184pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1185    static_assert_uimm_bits!(IMM8, 8);
1186    vperm2f128ps256(a, b, IMM8 as i8)
1187}
1188
1189#[inline]
1194#[target_feature(enable = "avx")]
1195#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1196#[rustc_legacy_const_generics(2)]
1197#[stable(feature = "simd_x86", since = "1.27.0")]
1198pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1199    static_assert_uimm_bits!(IMM8, 8);
1200    vperm2f128pd256(a, b, IMM8 as i8)
1201}
1202
1203#[inline]
1208#[target_feature(enable = "avx")]
1209#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1210#[rustc_legacy_const_generics(2)]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1212pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1213    static_assert_uimm_bits!(IMM8, 8);
1214    transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8))
1215}
1216
1217#[inline]
1222#[target_feature(enable = "avx")]
1223#[cfg_attr(test, assert_instr(vbroadcastss))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[allow(clippy::trivially_copy_pass_by_ref)]
1226pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1227    _mm256_set1_ps(*f)
1228}
1229
1230#[inline]
1235#[target_feature(enable = "avx")]
1236#[cfg_attr(test, assert_instr(vbroadcastss))]
1237#[stable(feature = "simd_x86", since = "1.27.0")]
1238#[allow(clippy::trivially_copy_pass_by_ref)]
1239pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1240    _mm_set1_ps(*f)
1241}
1242
1243#[inline]
1248#[target_feature(enable = "avx")]
1249#[cfg_attr(test, assert_instr(vbroadcastsd))]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251#[allow(clippy::trivially_copy_pass_by_ref)]
1252pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1253    _mm256_set1_pd(*f)
1254}
1255
1256#[inline]
1261#[target_feature(enable = "avx")]
1262#[cfg_attr(test, assert_instr(vbroadcastf128))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1265    simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
1266}
1267
1268#[inline]
1273#[target_feature(enable = "avx")]
1274#[cfg_attr(test, assert_instr(vbroadcastf128))]
1275#[stable(feature = "simd_x86", since = "1.27.0")]
1276pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1277    simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
1278}
1279
1280#[inline]
1286#[target_feature(enable = "avx")]
1287#[cfg_attr(
1288    all(test, not(target_env = "msvc")),
1289    assert_instr(vinsertf128, IMM1 = 1)
1290)]
1291#[rustc_legacy_const_generics(2)]
1292#[stable(feature = "simd_x86", since = "1.27.0")]
1293pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1294    static_assert_uimm_bits!(IMM1, 1);
1295    simd_shuffle!(
1296        a,
1297        _mm256_castps128_ps256(b),
1298        [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1299    )
1300}
1301
1302#[inline]
1308#[target_feature(enable = "avx")]
1309#[cfg_attr(
1310    all(test, not(target_env = "msvc")),
1311    assert_instr(vinsertf128, IMM1 = 1)
1312)]
1313#[rustc_legacy_const_generics(2)]
1314#[stable(feature = "simd_x86", since = "1.27.0")]
1315pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1316    static_assert_uimm_bits!(IMM1, 1);
1317    simd_shuffle!(
1318        a,
1319        _mm256_castpd128_pd256(b),
1320        [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1321    )
1322}
1323
1324#[inline]
1329#[target_feature(enable = "avx")]
1330#[cfg_attr(
1331    all(test, not(target_env = "msvc")),
1332    assert_instr(vinsertf128, IMM1 = 1)
1333)]
1334#[rustc_legacy_const_generics(2)]
1335#[stable(feature = "simd_x86", since = "1.27.0")]
1336pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1337    static_assert_uimm_bits!(IMM1, 1);
1338    let dst: i64x4 = simd_shuffle!(
1339        a.as_i64x4(),
1340        _mm256_castsi128_si256(b).as_i64x4(),
1341        [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1342    );
1343    transmute(dst)
1344}
1345
1346#[inline]
1351#[target_feature(enable = "avx")]
1352#[rustc_legacy_const_generics(2)]
1354#[stable(feature = "simd_x86", since = "1.27.0")]
1355pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1356    static_assert_uimm_bits!(INDEX, 5);
1357    transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i))
1358}
1359
1360#[inline]
1365#[target_feature(enable = "avx")]
1366#[rustc_legacy_const_generics(2)]
1368#[stable(feature = "simd_x86", since = "1.27.0")]
1369pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1370    static_assert_uimm_bits!(INDEX, 4);
1371    transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i))
1372}
1373
1374#[inline]
1379#[target_feature(enable = "avx")]
1380#[rustc_legacy_const_generics(2)]
1382#[stable(feature = "simd_x86", since = "1.27.0")]
1383pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1384    static_assert_uimm_bits!(INDEX, 3);
1385    transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i))
1386}
1387
1388#[inline]
1395#[target_feature(enable = "avx")]
1396#[cfg_attr(test, assert_instr(vmovap))]
1397#[stable(feature = "simd_x86", since = "1.27.0")]
1398#[allow(clippy::cast_ptr_alignment)]
1399pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1400    *(mem_addr as *const __m256d)
1401}
1402
1403#[inline]
1410#[target_feature(enable = "avx")]
1411#[cfg_attr(test, assert_instr(vmovap))]
1412#[stable(feature = "simd_x86", since = "1.27.0")]
1413#[allow(clippy::cast_ptr_alignment)]
1414pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1415    *(mem_addr as *mut __m256d) = a;
1416}
1417
1418#[inline]
1425#[target_feature(enable = "avx")]
1426#[cfg_attr(test, assert_instr(vmovaps))]
1427#[stable(feature = "simd_x86", since = "1.27.0")]
1428#[allow(clippy::cast_ptr_alignment)]
1429pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1430    *(mem_addr as *const __m256)
1431}
1432
1433#[inline]
1440#[target_feature(enable = "avx")]
1441#[cfg_attr(test, assert_instr(vmovaps))]
1442#[stable(feature = "simd_x86", since = "1.27.0")]
1443#[allow(clippy::cast_ptr_alignment)]
1444pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1445    *(mem_addr as *mut __m256) = a;
1446}
1447
1448#[inline]
1454#[target_feature(enable = "avx")]
1455#[cfg_attr(test, assert_instr(vmovup))]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1458    let mut dst = _mm256_undefined_pd();
1459    ptr::copy_nonoverlapping(
1460        mem_addr as *const u8,
1461        ptr::addr_of_mut!(dst) as *mut u8,
1462        mem::size_of::<__m256d>(),
1463    );
1464    dst
1465}
1466
1467#[inline]
1473#[target_feature(enable = "avx")]
1474#[cfg_attr(test, assert_instr(vmovup))]
1475#[stable(feature = "simd_x86", since = "1.27.0")]
1476pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1477    mem_addr.cast::<__m256d>().write_unaligned(a);
1478}
1479
1480#[inline]
1486#[target_feature(enable = "avx")]
1487#[cfg_attr(test, assert_instr(vmovups))]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1490    let mut dst = _mm256_undefined_ps();
1491    ptr::copy_nonoverlapping(
1492        mem_addr as *const u8,
1493        ptr::addr_of_mut!(dst) as *mut u8,
1494        mem::size_of::<__m256>(),
1495    );
1496    dst
1497}
1498
1499#[inline]
1505#[target_feature(enable = "avx")]
1506#[cfg_attr(test, assert_instr(vmovups))]
1507#[stable(feature = "simd_x86", since = "1.27.0")]
1508pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1509    mem_addr.cast::<__m256>().write_unaligned(a);
1510}
1511
1512#[inline]
1518#[target_feature(enable = "avx")]
1519#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1521pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1522    *mem_addr
1523}
1524
1525#[inline]
1531#[target_feature(enable = "avx")]
1532#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1534pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1535    *mem_addr = a;
1536}
1537
1538#[inline]
1543#[target_feature(enable = "avx")]
1544#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1546pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1547    let mut dst = _mm256_undefined_si256();
1548    ptr::copy_nonoverlapping(
1549        mem_addr as *const u8,
1550        ptr::addr_of_mut!(dst) as *mut u8,
1551        mem::size_of::<__m256i>(),
1552    );
1553    dst
1554}
1555
1556#[inline]
1561#[target_feature(enable = "avx")]
1562#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1564pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1565    mem_addr.write_unaligned(a);
1566}
1567
1568#[inline]
1574#[target_feature(enable = "avx")]
1575#[cfg_attr(test, assert_instr(vmaskmovpd))]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1578    maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
1579}
1580
1581#[inline]
1586#[target_feature(enable = "avx")]
1587#[cfg_attr(test, assert_instr(vmaskmovpd))]
1588#[stable(feature = "simd_x86", since = "1.27.0")]
1589pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1590    maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
1591}
1592
1593#[inline]
1599#[target_feature(enable = "avx")]
1600#[cfg_attr(test, assert_instr(vmaskmovpd))]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1603    maskloadpd(mem_addr as *const i8, mask.as_i64x2())
1604}
1605
1606#[inline]
1611#[target_feature(enable = "avx")]
1612#[cfg_attr(test, assert_instr(vmaskmovpd))]
1613#[stable(feature = "simd_x86", since = "1.27.0")]
1614pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1615    maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
1616}
1617
1618#[inline]
1624#[target_feature(enable = "avx")]
1625#[cfg_attr(test, assert_instr(vmaskmovps))]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1628    maskloadps256(mem_addr as *const i8, mask.as_i32x8())
1629}
1630
1631#[inline]
1636#[target_feature(enable = "avx")]
1637#[cfg_attr(test, assert_instr(vmaskmovps))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1640    maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
1641}
1642
1643#[inline]
1649#[target_feature(enable = "avx")]
1650#[cfg_attr(test, assert_instr(vmaskmovps))]
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1653    maskloadps(mem_addr as *const i8, mask.as_i32x4())
1654}
1655
1656#[inline]
1661#[target_feature(enable = "avx")]
1662#[cfg_attr(test, assert_instr(vmaskmovps))]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1665    maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
1666}
1667
1668#[inline]
1673#[target_feature(enable = "avx")]
1674#[cfg_attr(test, assert_instr(vmovshdup))]
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1677    simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
1678}
1679
1680#[inline]
1685#[target_feature(enable = "avx")]
1686#[cfg_attr(test, assert_instr(vmovsldup))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1689    simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
1690}
1691
1692#[inline]
1697#[target_feature(enable = "avx")]
1698#[cfg_attr(test, assert_instr(vmovddup))]
1699#[stable(feature = "simd_x86", since = "1.27.0")]
1700pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1701    simd_shuffle!(a, a, [0, 0, 2, 2])
1702}
1703
1704#[inline]
1710#[target_feature(enable = "avx")]
1711#[cfg_attr(test, assert_instr(vlddqu))]
1712#[stable(feature = "simd_x86", since = "1.27.0")]
1713pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1714    transmute(vlddqu(mem_addr as *const i8))
1715}
1716
1717#[inline]
1732#[target_feature(enable = "avx")]
1733#[cfg_attr(test, assert_instr(vmovntdq))]
1734#[stable(feature = "simd_x86", since = "1.27.0")]
1735pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1736    crate::arch::asm!(
1737        vps!("vmovntdq", ",{a}"),
1738        p = in(reg) mem_addr,
1739        a = in(ymm_reg) a,
1740        options(nostack, preserves_flags),
1741    );
1742}
1743
1744#[inline]
1759#[target_feature(enable = "avx")]
1760#[cfg_attr(test, assert_instr(vmovntpd))]
1761#[stable(feature = "simd_x86", since = "1.27.0")]
1762#[allow(clippy::cast_ptr_alignment)]
1763pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1764    crate::arch::asm!(
1765        vps!("vmovntpd", ",{a}"),
1766        p = in(reg) mem_addr,
1767        a = in(ymm_reg) a,
1768        options(nostack, preserves_flags),
1769    );
1770}
1771
1772#[inline]
1788#[target_feature(enable = "avx")]
1789#[cfg_attr(test, assert_instr(vmovntps))]
1790#[stable(feature = "simd_x86", since = "1.27.0")]
1791#[allow(clippy::cast_ptr_alignment)]
1792pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1793    crate::arch::asm!(
1794        vps!("vmovntps", ",{a}"),
1795        p = in(reg) mem_addr,
1796        a = in(ymm_reg) a,
1797        options(nostack, preserves_flags),
1798    );
1799}
1800
1801#[inline]
1807#[target_feature(enable = "avx")]
1808#[cfg_attr(test, assert_instr(vrcpps))]
1809#[stable(feature = "simd_x86", since = "1.27.0")]
1810pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1811    vrcpps(a)
1812}
1813
1814#[inline]
1820#[target_feature(enable = "avx")]
1821#[cfg_attr(test, assert_instr(vrsqrtps))]
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1823pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1824    vrsqrtps(a)
1825}
1826
1827#[inline]
1832#[target_feature(enable = "avx")]
1833#[cfg_attr(test, assert_instr(vunpckhpd))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1836    simd_shuffle!(a, b, [1, 5, 3, 7])
1837}
1838
1839#[inline]
1844#[target_feature(enable = "avx")]
1845#[cfg_attr(test, assert_instr(vunpckhps))]
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1847pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1848    simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
1849}
1850
1851#[inline]
1856#[target_feature(enable = "avx")]
1857#[cfg_attr(test, assert_instr(vunpcklpd))]
1858#[stable(feature = "simd_x86", since = "1.27.0")]
1859pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1860    simd_shuffle!(a, b, [0, 4, 2, 6])
1861}
1862
1863#[inline]
1868#[target_feature(enable = "avx")]
1869#[cfg_attr(test, assert_instr(vunpcklps))]
1870#[stable(feature = "simd_x86", since = "1.27.0")]
1871pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1872    simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
1873}
1874
1875#[inline]
1882#[target_feature(enable = "avx")]
1883#[cfg_attr(test, assert_instr(vptest))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1886    ptestz256(a.as_i64x4(), b.as_i64x4())
1887}
1888
1889#[inline]
1896#[target_feature(enable = "avx")]
1897#[cfg_attr(test, assert_instr(vptest))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1900    ptestc256(a.as_i64x4(), b.as_i64x4())
1901}
1902
1903#[inline]
1911#[target_feature(enable = "avx")]
1912#[cfg_attr(test, assert_instr(vptest))]
1913#[stable(feature = "simd_x86", since = "1.27.0")]
1914pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1915    ptestnzc256(a.as_i64x4(), b.as_i64x4())
1916}
1917
1918#[inline]
1928#[target_feature(enable = "avx")]
1929#[cfg_attr(test, assert_instr(vtestpd))]
1930#[stable(feature = "simd_x86", since = "1.27.0")]
1931pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1932    vtestzpd256(a, b)
1933}
1934
1935#[inline]
1945#[target_feature(enable = "avx")]
1946#[cfg_attr(test, assert_instr(vtestpd))]
1947#[stable(feature = "simd_x86", since = "1.27.0")]
1948pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1949    vtestcpd256(a, b)
1950}
1951
1952#[inline]
1963#[target_feature(enable = "avx")]
1964#[cfg_attr(test, assert_instr(vtestpd))]
1965#[stable(feature = "simd_x86", since = "1.27.0")]
1966pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1967    vtestnzcpd256(a, b)
1968}
1969
1970#[inline]
1980#[target_feature(enable = "avx")]
1981#[cfg_attr(test, assert_instr(vtestpd))]
1982#[stable(feature = "simd_x86", since = "1.27.0")]
1983pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1984    vtestzpd(a, b)
1985}
1986
1987#[inline]
1997#[target_feature(enable = "avx")]
1998#[cfg_attr(test, assert_instr(vtestpd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2001    vtestcpd(a, b)
2002}
2003
2004#[inline]
2015#[target_feature(enable = "avx")]
2016#[cfg_attr(test, assert_instr(vtestpd))]
2017#[stable(feature = "simd_x86", since = "1.27.0")]
2018pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2019    vtestnzcpd(a, b)
2020}
2021
2022#[inline]
2032#[target_feature(enable = "avx")]
2033#[cfg_attr(test, assert_instr(vtestps))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2036    vtestzps256(a, b)
2037}
2038
2039#[inline]
2049#[target_feature(enable = "avx")]
2050#[cfg_attr(test, assert_instr(vtestps))]
2051#[stable(feature = "simd_x86", since = "1.27.0")]
2052pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2053    vtestcps256(a, b)
2054}
2055
2056#[inline]
2067#[target_feature(enable = "avx")]
2068#[cfg_attr(test, assert_instr(vtestps))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2071    vtestnzcps256(a, b)
2072}
2073
2074#[inline]
2084#[target_feature(enable = "avx")]
2085#[cfg_attr(test, assert_instr(vtestps))]
2086#[stable(feature = "simd_x86", since = "1.27.0")]
2087pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2088    vtestzps(a, b)
2089}
2090
2091#[inline]
2101#[target_feature(enable = "avx")]
2102#[cfg_attr(test, assert_instr(vtestps))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2105    vtestcps(a, b)
2106}
2107
2108#[inline]
2119#[target_feature(enable = "avx")]
2120#[cfg_attr(test, assert_instr(vtestps))]
2121#[stable(feature = "simd_x86", since = "1.27.0")]
2122pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2123    vtestnzcps(a, b)
2124}
2125
2126#[inline]
2132#[target_feature(enable = "avx")]
2133#[cfg_attr(test, assert_instr(vmovmskpd))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2136    let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
2139    simd_bitmask::<i64x4, u8>(mask).into()
2140}
2141
2142#[inline]
2148#[target_feature(enable = "avx")]
2149#[cfg_attr(test, assert_instr(vmovmskps))]
2150#[stable(feature = "simd_x86", since = "1.27.0")]
2151pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2152    let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
2155    simd_bitmask::<i32x8, u8>(mask).into()
2156}
2157
2158#[inline]
2162#[target_feature(enable = "avx")]
2163#[cfg_attr(test, assert_instr(vxorp))]
2164#[stable(feature = "simd_x86", since = "1.27.0")]
2165pub unsafe fn _mm256_setzero_pd() -> __m256d {
2166    const { mem::zeroed() }
2167}
2168
2169#[inline]
2173#[target_feature(enable = "avx")]
2174#[cfg_attr(test, assert_instr(vxorps))]
2175#[stable(feature = "simd_x86", since = "1.27.0")]
2176pub unsafe fn _mm256_setzero_ps() -> __m256 {
2177    const { mem::zeroed() }
2178}
2179
2180#[inline]
2184#[target_feature(enable = "avx")]
2185#[cfg_attr(test, assert_instr(vxor))]
2186#[stable(feature = "simd_x86", since = "1.27.0")]
2187pub unsafe fn _mm256_setzero_si256() -> __m256i {
2188    const { mem::zeroed() }
2189}
2190
2191#[inline]
2196#[target_feature(enable = "avx")]
2197#[cfg_attr(test, assert_instr(vinsertf128))]
2199#[stable(feature = "simd_x86", since = "1.27.0")]
2200pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2201    _mm256_setr_pd(d, c, b, a)
2202}
2203
2204#[inline]
2209#[target_feature(enable = "avx")]
2210#[stable(feature = "simd_x86", since = "1.27.0")]
2212pub unsafe fn _mm256_set_ps(
2213    a: f32,
2214    b: f32,
2215    c: f32,
2216    d: f32,
2217    e: f32,
2218    f: f32,
2219    g: f32,
2220    h: f32,
2221) -> __m256 {
2222    _mm256_setr_ps(h, g, f, e, d, c, b, a)
2223}
2224
2225#[inline]
2229#[target_feature(enable = "avx")]
2230#[stable(feature = "simd_x86", since = "1.27.0")]
2232pub unsafe fn _mm256_set_epi8(
2233    e00: i8,
2234    e01: i8,
2235    e02: i8,
2236    e03: i8,
2237    e04: i8,
2238    e05: i8,
2239    e06: i8,
2240    e07: i8,
2241    e08: i8,
2242    e09: i8,
2243    e10: i8,
2244    e11: i8,
2245    e12: i8,
2246    e13: i8,
2247    e14: i8,
2248    e15: i8,
2249    e16: i8,
2250    e17: i8,
2251    e18: i8,
2252    e19: i8,
2253    e20: i8,
2254    e21: i8,
2255    e22: i8,
2256    e23: i8,
2257    e24: i8,
2258    e25: i8,
2259    e26: i8,
2260    e27: i8,
2261    e28: i8,
2262    e29: i8,
2263    e30: i8,
2264    e31: i8,
2265) -> __m256i {
2266    #[rustfmt::skip]
2267    _mm256_setr_epi8(
2268        e31, e30, e29, e28, e27, e26, e25, e24,
2269        e23, e22, e21, e20, e19, e18, e17, e16,
2270        e15, e14, e13, e12, e11, e10, e09, e08,
2271        e07, e06, e05, e04, e03, e02, e01, e00,
2272    )
2273}
2274
2275#[inline]
2279#[target_feature(enable = "avx")]
2280#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub unsafe fn _mm256_set_epi16(
2283    e00: i16,
2284    e01: i16,
2285    e02: i16,
2286    e03: i16,
2287    e04: i16,
2288    e05: i16,
2289    e06: i16,
2290    e07: i16,
2291    e08: i16,
2292    e09: i16,
2293    e10: i16,
2294    e11: i16,
2295    e12: i16,
2296    e13: i16,
2297    e14: i16,
2298    e15: i16,
2299) -> __m256i {
2300    #[rustfmt::skip]
2301    _mm256_setr_epi16(
2302        e15, e14, e13, e12,
2303        e11, e10, e09, e08,
2304        e07, e06, e05, e04,
2305        e03, e02, e01, e00,
2306    )
2307}
2308
2309#[inline]
2313#[target_feature(enable = "avx")]
2314#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub unsafe fn _mm256_set_epi32(
2317    e0: i32,
2318    e1: i32,
2319    e2: i32,
2320    e3: i32,
2321    e4: i32,
2322    e5: i32,
2323    e6: i32,
2324    e7: i32,
2325) -> __m256i {
2326    _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
2327}
2328
2329#[inline]
2333#[target_feature(enable = "avx")]
2334#[stable(feature = "simd_x86", since = "1.27.0")]
2336pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2337    _mm256_setr_epi64x(d, c, b, a)
2338}
2339
2340#[inline]
2345#[target_feature(enable = "avx")]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2348pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2349    __m256d([a, b, c, d])
2350}
2351
2352#[inline]
2357#[target_feature(enable = "avx")]
2358#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub unsafe fn _mm256_setr_ps(
2361    a: f32,
2362    b: f32,
2363    c: f32,
2364    d: f32,
2365    e: f32,
2366    f: f32,
2367    g: f32,
2368    h: f32,
2369) -> __m256 {
2370    __m256([a, b, c, d, e, f, g, h])
2371}
2372
2373#[inline]
2378#[target_feature(enable = "avx")]
2379#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub unsafe fn _mm256_setr_epi8(
2382    e00: i8,
2383    e01: i8,
2384    e02: i8,
2385    e03: i8,
2386    e04: i8,
2387    e05: i8,
2388    e06: i8,
2389    e07: i8,
2390    e08: i8,
2391    e09: i8,
2392    e10: i8,
2393    e11: i8,
2394    e12: i8,
2395    e13: i8,
2396    e14: i8,
2397    e15: i8,
2398    e16: i8,
2399    e17: i8,
2400    e18: i8,
2401    e19: i8,
2402    e20: i8,
2403    e21: i8,
2404    e22: i8,
2405    e23: i8,
2406    e24: i8,
2407    e25: i8,
2408    e26: i8,
2409    e27: i8,
2410    e28: i8,
2411    e29: i8,
2412    e30: i8,
2413    e31: i8,
2414) -> __m256i {
2415    #[rustfmt::skip]
2416    transmute(i8x32::new(
2417        e00, e01, e02, e03, e04, e05, e06, e07,
2418        e08, e09, e10, e11, e12, e13, e14, e15,
2419        e16, e17, e18, e19, e20, e21, e22, e23,
2420        e24, e25, e26, e27, e28, e29, e30, e31,
2421    ))
2422}
2423
2424#[inline]
2429#[target_feature(enable = "avx")]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2432pub unsafe fn _mm256_setr_epi16(
2433    e00: i16,
2434    e01: i16,
2435    e02: i16,
2436    e03: i16,
2437    e04: i16,
2438    e05: i16,
2439    e06: i16,
2440    e07: i16,
2441    e08: i16,
2442    e09: i16,
2443    e10: i16,
2444    e11: i16,
2445    e12: i16,
2446    e13: i16,
2447    e14: i16,
2448    e15: i16,
2449) -> __m256i {
2450    #[rustfmt::skip]
2451    transmute(i16x16::new(
2452        e00, e01, e02, e03,
2453        e04, e05, e06, e07,
2454        e08, e09, e10, e11,
2455        e12, e13, e14, e15,
2456    ))
2457}
2458
2459#[inline]
2464#[target_feature(enable = "avx")]
2465#[stable(feature = "simd_x86", since = "1.27.0")]
2467pub unsafe fn _mm256_setr_epi32(
2468    e0: i32,
2469    e1: i32,
2470    e2: i32,
2471    e3: i32,
2472    e4: i32,
2473    e5: i32,
2474    e6: i32,
2475    e7: i32,
2476) -> __m256i {
2477    transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
2478}
2479
2480#[inline]
2485#[target_feature(enable = "avx")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2488pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2489    transmute(i64x4::new(a, b, c, d))
2490}
2491
2492#[inline]
2497#[target_feature(enable = "avx")]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2500pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2501    _mm256_setr_pd(a, a, a, a)
2502}
2503
2504#[inline]
2509#[target_feature(enable = "avx")]
2510#[stable(feature = "simd_x86", since = "1.27.0")]
2512pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2513    _mm256_setr_ps(a, a, a, a, a, a, a, a)
2514}
2515
2516#[inline]
2521#[target_feature(enable = "avx")]
2522#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2525    #[rustfmt::skip]
2526    _mm256_setr_epi8(
2527        a, a, a, a, a, a, a, a,
2528        a, a, a, a, a, a, a, a,
2529        a, a, a, a, a, a, a, a,
2530        a, a, a, a, a, a, a, a,
2531    )
2532}
2533
2534#[inline]
2539#[target_feature(enable = "avx")]
2540#[cfg_attr(test, assert_instr(vinsertf128))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2544pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2545    _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
2546}
2547
2548#[inline]
2553#[target_feature(enable = "avx")]
2554#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2557    _mm256_setr_epi32(a, a, a, a, a, a, a, a)
2558}
2559
2560#[inline]
2565#[target_feature(enable = "avx")]
2566#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2567#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2570pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2571    _mm256_setr_epi64x(a, a, a, a)
2572}
2573
2574#[inline]
2578#[target_feature(enable = "avx")]
2579#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2583    transmute(a)
2584}
2585
2586#[inline]
2590#[target_feature(enable = "avx")]
2591#[stable(feature = "simd_x86", since = "1.27.0")]
2594pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2595    transmute(a)
2596}
2597
2598#[inline]
2602#[target_feature(enable = "avx")]
2603#[stable(feature = "simd_x86", since = "1.27.0")]
2606pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2607    transmute(a)
2608}
2609
2610#[inline]
2614#[target_feature(enable = "avx")]
2615#[stable(feature = "simd_x86", since = "1.27.0")]
2618pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2619    transmute(a)
2620}
2621
2622#[inline]
2626#[target_feature(enable = "avx")]
2627#[stable(feature = "simd_x86", since = "1.27.0")]
2630pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2631    transmute(a)
2632}
2633
2634#[inline]
2638#[target_feature(enable = "avx")]
2639#[stable(feature = "simd_x86", since = "1.27.0")]
2642pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2643    transmute(a)
2644}
2645
2646#[inline]
2650#[target_feature(enable = "avx")]
2651#[stable(feature = "simd_x86", since = "1.27.0")]
2654pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2655    simd_shuffle!(a, a, [0, 1, 2, 3])
2656}
2657
2658#[inline]
2662#[target_feature(enable = "avx")]
2663#[stable(feature = "simd_x86", since = "1.27.0")]
2666pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2667    simd_shuffle!(a, a, [0, 1])
2668}
2669
2670#[inline]
2674#[target_feature(enable = "avx")]
2675#[stable(feature = "simd_x86", since = "1.27.0")]
2678pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2679    let a = a.as_i64x4();
2680    let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2681    transmute(dst)
2682}
2683
2684#[inline]
2689#[target_feature(enable = "avx")]
2690#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2694    simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4])
2695}
2696
2697#[inline]
2702#[target_feature(enable = "avx")]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2706pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2707    simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2])
2708}
2709
2710#[inline]
2715#[target_feature(enable = "avx")]
2716#[stable(feature = "simd_x86", since = "1.27.0")]
2719pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2720    let a = a.as_i64x2();
2721    let undefined = i64x2::ZERO;
2722    let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2723    transmute(dst)
2724}
2725
2726#[inline]
2732#[target_feature(enable = "avx")]
2733#[stable(feature = "simd_x86", since = "1.27.0")]
2736pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2737    simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
2738}
2739
2740#[inline]
2746#[target_feature(enable = "avx")]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2750pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2751    let b = i64x2::ZERO;
2752    let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2753    transmute(dst)
2754}
2755
2756#[inline]
2763#[target_feature(enable = "avx")]
2764#[stable(feature = "simd_x86", since = "1.27.0")]
2767pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2768    simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3])
2769}
2770
2771#[inline]
2777#[target_feature(enable = "avx")]
2778#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub unsafe fn _mm256_undefined_ps() -> __m256 {
2781    const { mem::zeroed() }
2782}
2783
2784#[inline]
2790#[target_feature(enable = "avx")]
2791#[stable(feature = "simd_x86", since = "1.27.0")]
2793pub unsafe fn _mm256_undefined_pd() -> __m256d {
2794    const { mem::zeroed() }
2795}
2796
2797#[inline]
2803#[target_feature(enable = "avx")]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2806pub unsafe fn _mm256_undefined_si256() -> __m256i {
2807    const { mem::zeroed() }
2808}
2809
2810#[inline]
2814#[target_feature(enable = "avx")]
2815#[cfg_attr(test, assert_instr(vinsertf128))]
2816#[stable(feature = "simd_x86", since = "1.27.0")]
2817pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2818    simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
2819}
2820
2821#[inline]
2825#[target_feature(enable = "avx")]
2826#[cfg_attr(test, assert_instr(vinsertf128))]
2827#[stable(feature = "simd_x86", since = "1.27.0")]
2828pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2829    let hi: __m128 = transmute(hi);
2830    let lo: __m128 = transmute(lo);
2831    transmute(_mm256_set_m128(hi, lo))
2832}
2833
2834#[inline]
2838#[target_feature(enable = "avx")]
2839#[cfg_attr(test, assert_instr(vinsertf128))]
2840#[stable(feature = "simd_x86", since = "1.27.0")]
2841pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2842    let hi: __m128 = transmute(hi);
2843    let lo: __m128 = transmute(lo);
2844    transmute(_mm256_set_m128(hi, lo))
2845}
2846
2847#[inline]
2851#[target_feature(enable = "avx")]
2852#[cfg_attr(test, assert_instr(vinsertf128))]
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2854pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2855    _mm256_set_m128(hi, lo)
2856}
2857
2858#[inline]
2862#[target_feature(enable = "avx")]
2863#[cfg_attr(test, assert_instr(vinsertf128))]
2864#[stable(feature = "simd_x86", since = "1.27.0")]
2865pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2866    _mm256_set_m128d(hi, lo)
2867}
2868
2869#[inline]
2873#[target_feature(enable = "avx")]
2874#[cfg_attr(test, assert_instr(vinsertf128))]
2875#[stable(feature = "simd_x86", since = "1.27.0")]
2876pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2877    _mm256_set_m128i(hi, lo)
2878}
2879
2880#[inline]
2887#[target_feature(enable = "avx")]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2890pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2891    let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2892    _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
2893}
2894
2895#[inline]
2902#[target_feature(enable = "avx")]
2903#[stable(feature = "simd_x86", since = "1.27.0")]
2905pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2906    let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
2907    _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
2908}
2909
2910#[inline]
2916#[target_feature(enable = "avx")]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2919pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2920    let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
2921    _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
2922}
2923
2924#[inline]
2931#[target_feature(enable = "avx")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2934pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2935    let lo = _mm256_castps256_ps128(a);
2936    _mm_storeu_ps(loaddr, lo);
2937    let hi = _mm256_extractf128_ps::<1>(a);
2938    _mm_storeu_ps(hiaddr, hi);
2939}
2940
2941#[inline]
2948#[target_feature(enable = "avx")]
2949#[stable(feature = "simd_x86", since = "1.27.0")]
2951pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2952    let lo = _mm256_castpd256_pd128(a);
2953    _mm_storeu_pd(loaddr, lo);
2954    let hi = _mm256_extractf128_pd::<1>(a);
2955    _mm_storeu_pd(hiaddr, hi);
2956}
2957
2958#[inline]
2964#[target_feature(enable = "avx")]
2965#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2968    let lo = _mm256_castsi256_si128(a);
2969    _mm_storeu_si128(loaddr, lo);
2970    let hi = _mm256_extractf128_si256::<1>(a);
2971    _mm_storeu_si128(hiaddr, hi);
2972}
2973
2974#[inline]
2978#[target_feature(enable = "avx")]
2979#[stable(feature = "simd_x86", since = "1.27.0")]
2981pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2982    simd_extract!(a, 0)
2983}
2984
2985#[allow(improper_ctypes)]
2987extern "C" {
2988    #[link_name = "llvm.x86.avx.round.pd.256"]
2989    fn roundpd256(a: __m256d, b: i32) -> __m256d;
2990    #[link_name = "llvm.x86.avx.round.ps.256"]
2991    fn roundps256(a: __m256, b: i32) -> __m256;
2992    #[link_name = "llvm.x86.avx.dp.ps.256"]
2993    fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2994    #[link_name = "llvm.x86.avx.hadd.pd.256"]
2995    fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2996    #[link_name = "llvm.x86.avx.hadd.ps.256"]
2997    fn vhaddps(a: __m256, b: __m256) -> __m256;
2998    #[link_name = "llvm.x86.avx.hsub.pd.256"]
2999    fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
3000    #[link_name = "llvm.x86.avx.hsub.ps.256"]
3001    fn vhsubps(a: __m256, b: __m256) -> __m256;
3002    #[link_name = "llvm.x86.sse2.cmp.pd"]
3003    fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3004    #[link_name = "llvm.x86.avx.cmp.pd.256"]
3005    fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3006    #[link_name = "llvm.x86.sse.cmp.ps"]
3007    fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3008    #[link_name = "llvm.x86.avx.cmp.ps.256"]
3009    fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3010    #[link_name = "llvm.x86.sse2.cmp.sd"]
3011    fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3012    #[link_name = "llvm.x86.sse.cmp.ss"]
3013    fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3014    #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3015    fn vcvtps2dq(a: __m256) -> i32x8;
3016    #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3017    fn vcvttpd2dq(a: __m256d) -> i32x4;
3018    #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3019    fn vcvtpd2dq(a: __m256d) -> i32x4;
3020    #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3021    fn vcvttps2dq(a: __m256) -> i32x8;
3022    #[link_name = "llvm.x86.avx.vzeroall"]
3023    fn vzeroall();
3024    #[link_name = "llvm.x86.avx.vzeroupper"]
3025    fn vzeroupper();
3026    #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3027    fn vpermilps256(a: __m256, b: i32x8) -> __m256;
3028    #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3029    fn vpermilps(a: __m128, b: i32x4) -> __m128;
3030    #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3031    fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3032    #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3033    fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3034    #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
3035    fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
3036    #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3037    fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3038    #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3039    fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3040    #[link_name = "llvm.x86.avx.maskload.pd.256"]
3041    fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3042    #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3043    fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3044    #[link_name = "llvm.x86.avx.maskload.pd"]
3045    fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3046    #[link_name = "llvm.x86.avx.maskstore.pd"]
3047    fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3048    #[link_name = "llvm.x86.avx.maskload.ps.256"]
3049    fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3050    #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3051    fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3052    #[link_name = "llvm.x86.avx.maskload.ps"]
3053    fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3054    #[link_name = "llvm.x86.avx.maskstore.ps"]
3055    fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3056    #[link_name = "llvm.x86.avx.ldu.dq.256"]
3057    fn vlddqu(mem_addr: *const i8) -> i8x32;
3058    #[link_name = "llvm.x86.avx.rcp.ps.256"]
3059    fn vrcpps(a: __m256) -> __m256;
3060    #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3061    fn vrsqrtps(a: __m256) -> __m256;
3062    #[link_name = "llvm.x86.avx.ptestz.256"]
3063    fn ptestz256(a: i64x4, b: i64x4) -> i32;
3064    #[link_name = "llvm.x86.avx.ptestc.256"]
3065    fn ptestc256(a: i64x4, b: i64x4) -> i32;
3066    #[link_name = "llvm.x86.avx.ptestnzc.256"]
3067    fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3068    #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3069    fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3070    #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3071    fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3072    #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3073    fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3074    #[link_name = "llvm.x86.avx.vtestz.pd"]
3075    fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3076    #[link_name = "llvm.x86.avx.vtestc.pd"]
3077    fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3078    #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3079    fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3080    #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3081    fn vtestzps256(a: __m256, b: __m256) -> i32;
3082    #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3083    fn vtestcps256(a: __m256, b: __m256) -> i32;
3084    #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3085    fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3086    #[link_name = "llvm.x86.avx.vtestz.ps"]
3087    fn vtestzps(a: __m128, b: __m128) -> i32;
3088    #[link_name = "llvm.x86.avx.vtestc.ps"]
3089    fn vtestcps(a: __m128, b: __m128) -> i32;
3090    #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3091    fn vtestnzcps(a: __m128, b: __m128) -> i32;
3092    #[link_name = "llvm.x86.avx.min.ps.256"]
3093    fn vminps(a: __m256, b: __m256) -> __m256;
3094    #[link_name = "llvm.x86.avx.max.ps.256"]
3095    fn vmaxps(a: __m256, b: __m256) -> __m256;
3096    #[link_name = "llvm.x86.avx.min.pd.256"]
3097    fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3098    #[link_name = "llvm.x86.avx.max.pd.256"]
3099    fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3100}
3101
3102#[cfg(test)]
3103mod tests {
3104    use crate::hint::black_box;
3105    use crate::ptr;
3106    use stdarch_test::simd_test;
3107
3108    use crate::core_arch::x86::*;
3109
3110    #[simd_test(enable = "avx")]
3111    unsafe fn test_mm256_add_pd() {
3112        let a = _mm256_setr_pd(1., 2., 3., 4.);
3113        let b = _mm256_setr_pd(5., 6., 7., 8.);
3114        let r = _mm256_add_pd(a, b);
3115        let e = _mm256_setr_pd(6., 8., 10., 12.);
3116        assert_eq_m256d(r, e);
3117    }
3118
3119    #[simd_test(enable = "avx")]
3120    unsafe fn test_mm256_add_ps() {
3121        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3122        let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3123        let r = _mm256_add_ps(a, b);
3124        let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3125        assert_eq_m256(r, e);
3126    }
3127
3128    #[simd_test(enable = "avx")]
3129    unsafe fn test_mm256_and_pd() {
3130        let a = _mm256_set1_pd(1.);
3131        let b = _mm256_set1_pd(0.6);
3132        let r = _mm256_and_pd(a, b);
3133        let e = _mm256_set1_pd(0.5);
3134        assert_eq_m256d(r, e);
3135    }
3136
3137    #[simd_test(enable = "avx")]
3138    unsafe fn test_mm256_and_ps() {
3139        let a = _mm256_set1_ps(1.);
3140        let b = _mm256_set1_ps(0.6);
3141        let r = _mm256_and_ps(a, b);
3142        let e = _mm256_set1_ps(0.5);
3143        assert_eq_m256(r, e);
3144    }
3145
3146    #[simd_test(enable = "avx")]
3147    unsafe fn test_mm256_or_pd() {
3148        let a = _mm256_set1_pd(1.);
3149        let b = _mm256_set1_pd(0.6);
3150        let r = _mm256_or_pd(a, b);
3151        let e = _mm256_set1_pd(1.2);
3152        assert_eq_m256d(r, e);
3153    }
3154
3155    #[simd_test(enable = "avx")]
3156    unsafe fn test_mm256_or_ps() {
3157        let a = _mm256_set1_ps(1.);
3158        let b = _mm256_set1_ps(0.6);
3159        let r = _mm256_or_ps(a, b);
3160        let e = _mm256_set1_ps(1.2);
3161        assert_eq_m256(r, e);
3162    }
3163
3164    #[simd_test(enable = "avx")]
3165    unsafe fn test_mm256_shuffle_pd() {
3166        let a = _mm256_setr_pd(1., 4., 5., 8.);
3167        let b = _mm256_setr_pd(2., 3., 6., 7.);
3168        let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3169        let e = _mm256_setr_pd(4., 3., 8., 7.);
3170        assert_eq_m256d(r, e);
3171    }
3172
3173    #[simd_test(enable = "avx")]
3174    unsafe fn test_mm256_shuffle_ps() {
3175        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3176        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3177        let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3178        let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3179        assert_eq_m256(r, e);
3180    }
3181
3182    #[simd_test(enable = "avx")]
3183    unsafe fn test_mm256_andnot_pd() {
3184        let a = _mm256_set1_pd(0.);
3185        let b = _mm256_set1_pd(0.6);
3186        let r = _mm256_andnot_pd(a, b);
3187        assert_eq_m256d(r, b);
3188    }
3189
3190    #[simd_test(enable = "avx")]
3191    unsafe fn test_mm256_andnot_ps() {
3192        let a = _mm256_set1_ps(0.);
3193        let b = _mm256_set1_ps(0.6);
3194        let r = _mm256_andnot_ps(a, b);
3195        assert_eq_m256(r, b);
3196    }
3197
3198    #[simd_test(enable = "avx")]
3199    unsafe fn test_mm256_max_pd() {
3200        let a = _mm256_setr_pd(1., 4., 5., 8.);
3201        let b = _mm256_setr_pd(2., 3., 6., 7.);
3202        let r = _mm256_max_pd(a, b);
3203        let e = _mm256_setr_pd(2., 4., 6., 8.);
3204        assert_eq_m256d(r, e);
3205        let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3208        let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3209        let wu: [u64; 4] = transmute(w);
3210        let xu: [u64; 4] = transmute(x);
3211        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3212        assert_eq!(xu, [0u64; 4]);
3213        let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3217        let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3218        let yf: [f64; 4] = transmute(y);
3219        let zf: [f64; 4] = transmute(z);
3220        assert_eq!(yf, [0.0; 4]);
3221        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3222    }
3223
3224    #[simd_test(enable = "avx")]
3225    unsafe fn test_mm256_max_ps() {
3226        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3227        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3228        let r = _mm256_max_ps(a, b);
3229        let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3230        assert_eq_m256(r, e);
3231        let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3234        let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3235        let wu: [u32; 8] = transmute(w);
3236        let xu: [u32; 8] = transmute(x);
3237        assert_eq!(wu, [0x8000_0000u32; 8]);
3238        assert_eq!(xu, [0u32; 8]);
3239        let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3243        let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3244        let yf: [f32; 8] = transmute(y);
3245        let zf: [f32; 8] = transmute(z);
3246        assert_eq!(yf, [0.0; 8]);
3247        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3248    }
3249
3250    #[simd_test(enable = "avx")]
3251    unsafe fn test_mm256_min_pd() {
3252        let a = _mm256_setr_pd(1., 4., 5., 8.);
3253        let b = _mm256_setr_pd(2., 3., 6., 7.);
3254        let r = _mm256_min_pd(a, b);
3255        let e = _mm256_setr_pd(1., 3., 5., 7.);
3256        assert_eq_m256d(r, e);
3257        let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3260        let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3261        let wu: [u64; 4] = transmute(w);
3262        let xu: [u64; 4] = transmute(x);
3263        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3264        assert_eq!(xu, [0u64; 4]);
3265        let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3269        let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3270        let yf: [f64; 4] = transmute(y);
3271        let zf: [f64; 4] = transmute(z);
3272        assert_eq!(yf, [0.0; 4]);
3273        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3274    }
3275
3276    #[simd_test(enable = "avx")]
3277    unsafe fn test_mm256_min_ps() {
3278        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3279        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3280        let r = _mm256_min_ps(a, b);
3281        let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3282        assert_eq_m256(r, e);
3283        let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3286        let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3287        let wu: [u32; 8] = transmute(w);
3288        let xu: [u32; 8] = transmute(x);
3289        assert_eq!(wu, [0x8000_0000u32; 8]);
3290        assert_eq!(xu, [0u32; 8]);
3291        let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3295        let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3296        let yf: [f32; 8] = transmute(y);
3297        let zf: [f32; 8] = transmute(z);
3298        assert_eq!(yf, [0.0; 8]);
3299        assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3300    }
3301
3302    #[simd_test(enable = "avx")]
3303    unsafe fn test_mm256_mul_pd() {
3304        let a = _mm256_setr_pd(1., 2., 3., 4.);
3305        let b = _mm256_setr_pd(5., 6., 7., 8.);
3306        let r = _mm256_mul_pd(a, b);
3307        let e = _mm256_setr_pd(5., 12., 21., 32.);
3308        assert_eq_m256d(r, e);
3309    }
3310
3311    #[simd_test(enable = "avx")]
3312    unsafe fn test_mm256_mul_ps() {
3313        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3314        let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3315        let r = _mm256_mul_ps(a, b);
3316        let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3317        assert_eq_m256(r, e);
3318    }
3319
3320    #[simd_test(enable = "avx")]
3321    unsafe fn test_mm256_addsub_pd() {
3322        let a = _mm256_setr_pd(1., 2., 3., 4.);
3323        let b = _mm256_setr_pd(5., 6., 7., 8.);
3324        let r = _mm256_addsub_pd(a, b);
3325        let e = _mm256_setr_pd(-4., 8., -4., 12.);
3326        assert_eq_m256d(r, e);
3327    }
3328
3329    #[simd_test(enable = "avx")]
3330    unsafe fn test_mm256_addsub_ps() {
3331        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3332        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3333        let r = _mm256_addsub_ps(a, b);
3334        let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3335        assert_eq_m256(r, e);
3336    }
3337
3338    #[simd_test(enable = "avx")]
3339    unsafe fn test_mm256_sub_pd() {
3340        let a = _mm256_setr_pd(1., 2., 3., 4.);
3341        let b = _mm256_setr_pd(5., 6., 7., 8.);
3342        let r = _mm256_sub_pd(a, b);
3343        let e = _mm256_setr_pd(-4., -4., -4., -4.);
3344        assert_eq_m256d(r, e);
3345    }
3346
3347    #[simd_test(enable = "avx")]
3348    unsafe fn test_mm256_sub_ps() {
3349        let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3350        let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3351        let r = _mm256_sub_ps(a, b);
3352        let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3353        assert_eq_m256(r, e);
3354    }
3355
3356    #[simd_test(enable = "avx")]
3357    unsafe fn test_mm256_round_pd() {
3358        let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3359        let result_closest = _mm256_round_pd::<0b0000>(a);
3360        let result_down = _mm256_round_pd::<0b0001>(a);
3361        let result_up = _mm256_round_pd::<0b0010>(a);
3362        let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3363        let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3364        let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3365        assert_eq_m256d(result_closest, expected_closest);
3366        assert_eq_m256d(result_down, expected_down);
3367        assert_eq_m256d(result_up, expected_up);
3368    }
3369
3370    #[simd_test(enable = "avx")]
3371    unsafe fn test_mm256_floor_pd() {
3372        let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3373        let result_down = _mm256_floor_pd(a);
3374        let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3375        assert_eq_m256d(result_down, expected_down);
3376    }
3377
3378    #[simd_test(enable = "avx")]
3379    unsafe fn test_mm256_ceil_pd() {
3380        let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3381        let result_up = _mm256_ceil_pd(a);
3382        let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3383        assert_eq_m256d(result_up, expected_up);
3384    }
3385
3386    #[simd_test(enable = "avx")]
3387    unsafe fn test_mm256_round_ps() {
3388        let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3389        let result_closest = _mm256_round_ps::<0b0000>(a);
3390        let result_down = _mm256_round_ps::<0b0001>(a);
3391        let result_up = _mm256_round_ps::<0b0010>(a);
3392        let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3393        let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3394        let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3395        assert_eq_m256(result_closest, expected_closest);
3396        assert_eq_m256(result_down, expected_down);
3397        assert_eq_m256(result_up, expected_up);
3398    }
3399
3400    #[simd_test(enable = "avx")]
3401    unsafe fn test_mm256_floor_ps() {
3402        let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3403        let result_down = _mm256_floor_ps(a);
3404        let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3405        assert_eq_m256(result_down, expected_down);
3406    }
3407
3408    #[simd_test(enable = "avx")]
3409    unsafe fn test_mm256_ceil_ps() {
3410        let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3411        let result_up = _mm256_ceil_ps(a);
3412        let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3413        assert_eq_m256(result_up, expected_up);
3414    }
3415
3416    #[simd_test(enable = "avx")]
3417    unsafe fn test_mm256_sqrt_pd() {
3418        let a = _mm256_setr_pd(4., 9., 16., 25.);
3419        let r = _mm256_sqrt_pd(a);
3420        let e = _mm256_setr_pd(2., 3., 4., 5.);
3421        assert_eq_m256d(r, e);
3422    }
3423
3424    #[simd_test(enable = "avx")]
3425    unsafe fn test_mm256_sqrt_ps() {
3426        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3427        let r = _mm256_sqrt_ps(a);
3428        let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3429        assert_eq_m256(r, e);
3430    }
3431
3432    #[simd_test(enable = "avx")]
3433    unsafe fn test_mm256_div_ps() {
3434        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3435        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3436        let r = _mm256_div_ps(a, b);
3437        let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3438        assert_eq_m256(r, e);
3439    }
3440
3441    #[simd_test(enable = "avx")]
3442    unsafe fn test_mm256_div_pd() {
3443        let a = _mm256_setr_pd(4., 9., 16., 25.);
3444        let b = _mm256_setr_pd(4., 3., 2., 5.);
3445        let r = _mm256_div_pd(a, b);
3446        let e = _mm256_setr_pd(1., 3., 8., 5.);
3447        assert_eq_m256d(r, e);
3448    }
3449
3450    #[simd_test(enable = "avx")]
3451    unsafe fn test_mm256_blend_pd() {
3452        let a = _mm256_setr_pd(4., 9., 16., 25.);
3453        let b = _mm256_setr_pd(4., 3., 2., 5.);
3454        let r = _mm256_blend_pd::<0x0>(a, b);
3455        assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3456        let r = _mm256_blend_pd::<0x3>(a, b);
3457        assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3458        let r = _mm256_blend_pd::<0xF>(a, b);
3459        assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3460    }
3461
3462    #[simd_test(enable = "avx")]
3463    unsafe fn test_mm256_blend_ps() {
3464        let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3465        let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3466        let r = _mm256_blend_ps::<0x0>(a, b);
3467        assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3468        let r = _mm256_blend_ps::<0x3>(a, b);
3469        assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3470        let r = _mm256_blend_ps::<0xF>(a, b);
3471        assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3472    }
3473
3474    #[simd_test(enable = "avx")]
3475    unsafe fn test_mm256_blendv_pd() {
3476        let a = _mm256_setr_pd(4., 9., 16., 25.);
3477        let b = _mm256_setr_pd(4., 3., 2., 5.);
3478        let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3479        let r = _mm256_blendv_pd(a, b, c);
3480        let e = _mm256_setr_pd(4., 9., 2., 5.);
3481        assert_eq_m256d(r, e);
3482    }
3483
3484    #[simd_test(enable = "avx")]
3485    unsafe fn test_mm256_blendv_ps() {
3486        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3487        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3488        #[rustfmt::skip]
3489        let c = _mm256_setr_ps(
3490            0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3491        );
3492        let r = _mm256_blendv_ps(a, b, c);
3493        let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3494        assert_eq_m256(r, e);
3495    }
3496
3497    #[simd_test(enable = "avx")]
3498    unsafe fn test_mm256_dp_ps() {
3499        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3500        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3501        let r = _mm256_dp_ps::<0xFF>(a, b);
3502        let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3503        assert_eq_m256(r, e);
3504    }
3505
3506    #[simd_test(enable = "avx")]
3507    unsafe fn test_mm256_hadd_pd() {
3508        let a = _mm256_setr_pd(4., 9., 16., 25.);
3509        let b = _mm256_setr_pd(4., 3., 2., 5.);
3510        let r = _mm256_hadd_pd(a, b);
3511        let e = _mm256_setr_pd(13., 7., 41., 7.);
3512        assert_eq_m256d(r, e);
3513
3514        let a = _mm256_setr_pd(1., 2., 3., 4.);
3515        let b = _mm256_setr_pd(5., 6., 7., 8.);
3516        let r = _mm256_hadd_pd(a, b);
3517        let e = _mm256_setr_pd(3., 11., 7., 15.);
3518        assert_eq_m256d(r, e);
3519    }
3520
3521    #[simd_test(enable = "avx")]
3522    unsafe fn test_mm256_hadd_ps() {
3523        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3524        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3525        let r = _mm256_hadd_ps(a, b);
3526        let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3527        assert_eq_m256(r, e);
3528
3529        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3530        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3531        let r = _mm256_hadd_ps(a, b);
3532        let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3533        assert_eq_m256(r, e);
3534    }
3535
3536    #[simd_test(enable = "avx")]
3537    unsafe fn test_mm256_hsub_pd() {
3538        let a = _mm256_setr_pd(4., 9., 16., 25.);
3539        let b = _mm256_setr_pd(4., 3., 2., 5.);
3540        let r = _mm256_hsub_pd(a, b);
3541        let e = _mm256_setr_pd(-5., 1., -9., -3.);
3542        assert_eq_m256d(r, e);
3543
3544        let a = _mm256_setr_pd(1., 2., 3., 4.);
3545        let b = _mm256_setr_pd(5., 6., 7., 8.);
3546        let r = _mm256_hsub_pd(a, b);
3547        let e = _mm256_setr_pd(-1., -1., -1., -1.);
3548        assert_eq_m256d(r, e);
3549    }
3550
3551    #[simd_test(enable = "avx")]
3552    unsafe fn test_mm256_hsub_ps() {
3553        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3554        let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3555        let r = _mm256_hsub_ps(a, b);
3556        let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3557        assert_eq_m256(r, e);
3558
3559        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3560        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3561        let r = _mm256_hsub_ps(a, b);
3562        let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3563        assert_eq_m256(r, e);
3564    }
3565
3566    #[simd_test(enable = "avx")]
3567    unsafe fn test_mm256_xor_pd() {
3568        let a = _mm256_setr_pd(4., 9., 16., 25.);
3569        let b = _mm256_set1_pd(0.);
3570        let r = _mm256_xor_pd(a, b);
3571        assert_eq_m256d(r, a);
3572    }
3573
3574    #[simd_test(enable = "avx")]
3575    unsafe fn test_mm256_xor_ps() {
3576        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3577        let b = _mm256_set1_ps(0.);
3578        let r = _mm256_xor_ps(a, b);
3579        assert_eq_m256(r, a);
3580    }
3581
3582    #[simd_test(enable = "avx")]
3583    unsafe fn test_mm_cmp_pd() {
3584        let a = _mm_setr_pd(4., 9.);
3585        let b = _mm_setr_pd(4., 3.);
3586        let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3587        assert!(get_m128d(r, 0).is_nan());
3588        assert!(get_m128d(r, 1).is_nan());
3589    }
3590
3591    #[simd_test(enable = "avx")]
3592    unsafe fn test_mm256_cmp_pd() {
3593        let a = _mm256_setr_pd(1., 2., 3., 4.);
3594        let b = _mm256_setr_pd(5., 6., 7., 8.);
3595        let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3596        let e = _mm256_set1_pd(0.);
3597        assert_eq_m256d(r, e);
3598    }
3599
3600    #[simd_test(enable = "avx")]
3601    unsafe fn test_mm_cmp_ps() {
3602        let a = _mm_setr_ps(4., 3., 2., 5.);
3603        let b = _mm_setr_ps(4., 9., 16., 25.);
3604        let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3605        assert!(get_m128(r, 0).is_nan());
3606        assert_eq!(get_m128(r, 1), 0.);
3607        assert_eq!(get_m128(r, 2), 0.);
3608        assert_eq!(get_m128(r, 3), 0.);
3609    }
3610
3611    #[simd_test(enable = "avx")]
3612    unsafe fn test_mm256_cmp_ps() {
3613        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3614        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3615        let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3616        let e = _mm256_set1_ps(0.);
3617        assert_eq_m256(r, e);
3618    }
3619
3620    #[simd_test(enable = "avx")]
3621    unsafe fn test_mm_cmp_sd() {
3622        let a = _mm_setr_pd(4., 9.);
3623        let b = _mm_setr_pd(4., 3.);
3624        let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3625        assert!(get_m128d(r, 0).is_nan());
3626        assert_eq!(get_m128d(r, 1), 9.);
3627    }
3628
3629    #[simd_test(enable = "avx")]
3630    unsafe fn test_mm_cmp_ss() {
3631        let a = _mm_setr_ps(4., 3., 2., 5.);
3632        let b = _mm_setr_ps(4., 9., 16., 25.);
3633        let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3634        assert!(get_m128(r, 0).is_nan());
3635        assert_eq!(get_m128(r, 1), 3.);
3636        assert_eq!(get_m128(r, 2), 2.);
3637        assert_eq!(get_m128(r, 3), 5.);
3638    }
3639
3640    #[simd_test(enable = "avx")]
3641    unsafe fn test_mm256_cvtepi32_pd() {
3642        let a = _mm_setr_epi32(4, 9, 16, 25);
3643        let r = _mm256_cvtepi32_pd(a);
3644        let e = _mm256_setr_pd(4., 9., 16., 25.);
3645        assert_eq_m256d(r, e);
3646    }
3647
3648    #[simd_test(enable = "avx")]
3649    unsafe fn test_mm256_cvtepi32_ps() {
3650        let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3651        let r = _mm256_cvtepi32_ps(a);
3652        let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3653        assert_eq_m256(r, e);
3654    }
3655
3656    #[simd_test(enable = "avx")]
3657    unsafe fn test_mm256_cvtpd_ps() {
3658        let a = _mm256_setr_pd(4., 9., 16., 25.);
3659        let r = _mm256_cvtpd_ps(a);
3660        let e = _mm_setr_ps(4., 9., 16., 25.);
3661        assert_eq_m128(r, e);
3662    }
3663
3664    #[simd_test(enable = "avx")]
3665    unsafe fn test_mm256_cvtps_epi32() {
3666        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3667        let r = _mm256_cvtps_epi32(a);
3668        let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3669        assert_eq_m256i(r, e);
3670    }
3671
3672    #[simd_test(enable = "avx")]
3673    unsafe fn test_mm256_cvtps_pd() {
3674        let a = _mm_setr_ps(4., 9., 16., 25.);
3675        let r = _mm256_cvtps_pd(a);
3676        let e = _mm256_setr_pd(4., 9., 16., 25.);
3677        assert_eq_m256d(r, e);
3678    }
3679
3680    #[simd_test(enable = "avx")]
3681    unsafe fn test_mm256_cvtsd_f64() {
3682        let a = _mm256_setr_pd(1., 2., 3., 4.);
3683        let r = _mm256_cvtsd_f64(a);
3684        assert_eq!(r, 1.);
3685    }
3686
3687    #[simd_test(enable = "avx")]
3688    unsafe fn test_mm256_cvttpd_epi32() {
3689        let a = _mm256_setr_pd(4., 9., 16., 25.);
3690        let r = _mm256_cvttpd_epi32(a);
3691        let e = _mm_setr_epi32(4, 9, 16, 25);
3692        assert_eq_m128i(r, e);
3693    }
3694
3695    #[simd_test(enable = "avx")]
3696    unsafe fn test_mm256_cvtpd_epi32() {
3697        let a = _mm256_setr_pd(4., 9., 16., 25.);
3698        let r = _mm256_cvtpd_epi32(a);
3699        let e = _mm_setr_epi32(4, 9, 16, 25);
3700        assert_eq_m128i(r, e);
3701    }
3702
3703    #[simd_test(enable = "avx")]
3704    unsafe fn test_mm256_cvttps_epi32() {
3705        let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3706        let r = _mm256_cvttps_epi32(a);
3707        let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3708        assert_eq_m256i(r, e);
3709    }
3710
3711    #[simd_test(enable = "avx")]
3712    unsafe fn test_mm256_extractf128_ps() {
3713        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3714        let r = _mm256_extractf128_ps::<0>(a);
3715        let e = _mm_setr_ps(4., 3., 2., 5.);
3716        assert_eq_m128(r, e);
3717    }
3718
3719    #[simd_test(enable = "avx")]
3720    unsafe fn test_mm256_extractf128_pd() {
3721        let a = _mm256_setr_pd(4., 3., 2., 5.);
3722        let r = _mm256_extractf128_pd::<0>(a);
3723        let e = _mm_setr_pd(4., 3.);
3724        assert_eq_m128d(r, e);
3725    }
3726
3727    #[simd_test(enable = "avx")]
3728    unsafe fn test_mm256_extractf128_si256() {
3729        let a = _mm256_setr_epi64x(4, 3, 2, 5);
3730        let r = _mm256_extractf128_si256::<0>(a);
3731        let e = _mm_setr_epi64x(4, 3);
3732        assert_eq_m128i(r, e);
3733    }
3734
3735    #[simd_test(enable = "avx")]
3736    unsafe fn test_mm256_extract_epi32() {
3737        let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3738        let r1 = _mm256_extract_epi32::<0>(a);
3739        let r2 = _mm256_extract_epi32::<3>(a);
3740        assert_eq!(r1, -1);
3741        assert_eq!(r2, 3);
3742    }
3743
3744    #[simd_test(enable = "avx")]
3745    unsafe fn test_mm256_cvtsi256_si32() {
3746        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3747        let r = _mm256_cvtsi256_si32(a);
3748        assert_eq!(r, 1);
3749    }
3750
3751    #[simd_test(enable = "avx")]
3752    #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroall() {
3754        _mm256_zeroall();
3755    }
3756
3757    #[simd_test(enable = "avx")]
3758    #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroupper() {
3760        _mm256_zeroupper();
3761    }
3762
3763    #[simd_test(enable = "avx")]
3764    unsafe fn test_mm256_permutevar_ps() {
3765        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3766        let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3767        let r = _mm256_permutevar_ps(a, b);
3768        let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3769        assert_eq_m256(r, e);
3770    }
3771
3772    #[simd_test(enable = "avx")]
3773    unsafe fn test_mm_permutevar_ps() {
3774        let a = _mm_setr_ps(4., 3., 2., 5.);
3775        let b = _mm_setr_epi32(1, 2, 3, 4);
3776        let r = _mm_permutevar_ps(a, b);
3777        let e = _mm_setr_ps(3., 2., 5., 4.);
3778        assert_eq_m128(r, e);
3779    }
3780
3781    #[simd_test(enable = "avx")]
3782    unsafe fn test_mm256_permute_ps() {
3783        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3784        let r = _mm256_permute_ps::<0x1b>(a);
3785        let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3786        assert_eq_m256(r, e);
3787    }
3788
3789    #[simd_test(enable = "avx")]
3790    unsafe fn test_mm_permute_ps() {
3791        let a = _mm_setr_ps(4., 3., 2., 5.);
3792        let r = _mm_permute_ps::<0x1b>(a);
3793        let e = _mm_setr_ps(5., 2., 3., 4.);
3794        assert_eq_m128(r, e);
3795    }
3796
3797    #[simd_test(enable = "avx")]
3798    unsafe fn test_mm256_permutevar_pd() {
3799        let a = _mm256_setr_pd(4., 3., 2., 5.);
3800        let b = _mm256_setr_epi64x(1, 2, 3, 4);
3801        let r = _mm256_permutevar_pd(a, b);
3802        let e = _mm256_setr_pd(4., 3., 5., 2.);
3803        assert_eq_m256d(r, e);
3804    }
3805
3806    #[simd_test(enable = "avx")]
3807    unsafe fn test_mm_permutevar_pd() {
3808        let a = _mm_setr_pd(4., 3.);
3809        let b = _mm_setr_epi64x(3, 0);
3810        let r = _mm_permutevar_pd(a, b);
3811        let e = _mm_setr_pd(3., 4.);
3812        assert_eq_m128d(r, e);
3813    }
3814
3815    #[simd_test(enable = "avx")]
3816    unsafe fn test_mm256_permute_pd() {
3817        let a = _mm256_setr_pd(4., 3., 2., 5.);
3818        let r = _mm256_permute_pd::<5>(a);
3819        let e = _mm256_setr_pd(3., 4., 5., 2.);
3820        assert_eq_m256d(r, e);
3821    }
3822
3823    #[simd_test(enable = "avx")]
3824    unsafe fn test_mm_permute_pd() {
3825        let a = _mm_setr_pd(4., 3.);
3826        let r = _mm_permute_pd::<1>(a);
3827        let e = _mm_setr_pd(3., 4.);
3828        assert_eq_m128d(r, e);
3829    }
3830
3831    #[simd_test(enable = "avx")]
3832    unsafe fn test_mm256_permute2f128_ps() {
3833        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3834        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3835        let r = _mm256_permute2f128_ps::<0x13>(a, b);
3836        let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3837        assert_eq_m256(r, e);
3838    }
3839
3840    #[simd_test(enable = "avx")]
3841    unsafe fn test_mm256_permute2f128_pd() {
3842        let a = _mm256_setr_pd(1., 2., 3., 4.);
3843        let b = _mm256_setr_pd(5., 6., 7., 8.);
3844        let r = _mm256_permute2f128_pd::<0x31>(a, b);
3845        let e = _mm256_setr_pd(3., 4., 7., 8.);
3846        assert_eq_m256d(r, e);
3847    }
3848
3849    #[simd_test(enable = "avx")]
3850    unsafe fn test_mm256_permute2f128_si256() {
3851        let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3852        let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3853        let r = _mm256_permute2f128_si256::<0x20>(a, b);
3854        let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3855        assert_eq_m256i(r, e);
3856    }
3857
3858    #[simd_test(enable = "avx")]
3859    unsafe fn test_mm256_broadcast_ss() {
3860        let r = _mm256_broadcast_ss(&3.);
3861        let e = _mm256_set1_ps(3.);
3862        assert_eq_m256(r, e);
3863    }
3864
3865    #[simd_test(enable = "avx")]
3866    unsafe fn test_mm_broadcast_ss() {
3867        let r = _mm_broadcast_ss(&3.);
3868        let e = _mm_set1_ps(3.);
3869        assert_eq_m128(r, e);
3870    }
3871
3872    #[simd_test(enable = "avx")]
3873    unsafe fn test_mm256_broadcast_sd() {
3874        let r = _mm256_broadcast_sd(&3.);
3875        let e = _mm256_set1_pd(3.);
3876        assert_eq_m256d(r, e);
3877    }
3878
3879    #[simd_test(enable = "avx")]
3880    unsafe fn test_mm256_broadcast_ps() {
3881        let a = _mm_setr_ps(4., 3., 2., 5.);
3882        let r = _mm256_broadcast_ps(&a);
3883        let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3884        assert_eq_m256(r, e);
3885    }
3886
3887    #[simd_test(enable = "avx")]
3888    unsafe fn test_mm256_broadcast_pd() {
3889        let a = _mm_setr_pd(4., 3.);
3890        let r = _mm256_broadcast_pd(&a);
3891        let e = _mm256_setr_pd(4., 3., 4., 3.);
3892        assert_eq_m256d(r, e);
3893    }
3894
3895    #[simd_test(enable = "avx")]
3896    unsafe fn test_mm256_insertf128_ps() {
3897        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3898        let b = _mm_setr_ps(4., 9., 16., 25.);
3899        let r = _mm256_insertf128_ps::<0>(a, b);
3900        let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3901        assert_eq_m256(r, e);
3902    }
3903
3904    #[simd_test(enable = "avx")]
3905    unsafe fn test_mm256_insertf128_pd() {
3906        let a = _mm256_setr_pd(1., 2., 3., 4.);
3907        let b = _mm_setr_pd(5., 6.);
3908        let r = _mm256_insertf128_pd::<0>(a, b);
3909        let e = _mm256_setr_pd(5., 6., 3., 4.);
3910        assert_eq_m256d(r, e);
3911    }
3912
3913    #[simd_test(enable = "avx")]
3914    unsafe fn test_mm256_insertf128_si256() {
3915        let a = _mm256_setr_epi64x(1, 2, 3, 4);
3916        let b = _mm_setr_epi64x(5, 6);
3917        let r = _mm256_insertf128_si256::<0>(a, b);
3918        let e = _mm256_setr_epi64x(5, 6, 3, 4);
3919        assert_eq_m256i(r, e);
3920    }
3921
3922    #[simd_test(enable = "avx")]
3923    unsafe fn test_mm256_insert_epi8() {
3924        #[rustfmt::skip]
3925        let a = _mm256_setr_epi8(
3926            1, 2, 3, 4, 5, 6, 7, 8,
3927            9, 10, 11, 12, 13, 14, 15, 16,
3928            17, 18, 19, 20, 21, 22, 23, 24,
3929            25, 26, 27, 28, 29, 30, 31, 32,
3930        );
3931        let r = _mm256_insert_epi8::<31>(a, 0);
3932        #[rustfmt::skip]
3933        let e = _mm256_setr_epi8(
3934            1, 2, 3, 4, 5, 6, 7, 8,
3935            9, 10, 11, 12, 13, 14, 15, 16,
3936            17, 18, 19, 20, 21, 22, 23, 24,
3937            25, 26, 27, 28, 29, 30, 31, 0,
3938        );
3939        assert_eq_m256i(r, e);
3940    }
3941
3942    #[simd_test(enable = "avx")]
3943    unsafe fn test_mm256_insert_epi16() {
3944        #[rustfmt::skip]
3945        let a = _mm256_setr_epi16(
3946            0, 1, 2, 3, 4, 5, 6, 7,
3947            8, 9, 10, 11, 12, 13, 14, 15,
3948        );
3949        let r = _mm256_insert_epi16::<15>(a, 0);
3950        #[rustfmt::skip]
3951        let e = _mm256_setr_epi16(
3952            0, 1, 2, 3, 4, 5, 6, 7,
3953            8, 9, 10, 11, 12, 13, 14, 0,
3954        );
3955        assert_eq_m256i(r, e);
3956    }
3957
3958    #[simd_test(enable = "avx")]
3959    unsafe fn test_mm256_insert_epi32() {
3960        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3961        let r = _mm256_insert_epi32::<7>(a, 0);
3962        let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
3963        assert_eq_m256i(r, e);
3964    }
3965
3966    #[simd_test(enable = "avx")]
3967    unsafe fn test_mm256_load_pd() {
3968        let a = _mm256_setr_pd(1., 2., 3., 4.);
3969        let p = ptr::addr_of!(a) as *const f64;
3970        let r = _mm256_load_pd(p);
3971        let e = _mm256_setr_pd(1., 2., 3., 4.);
3972        assert_eq_m256d(r, e);
3973    }
3974
3975    #[simd_test(enable = "avx")]
3976    unsafe fn test_mm256_store_pd() {
3977        let a = _mm256_setr_pd(1., 2., 3., 4.);
3978        let mut r = _mm256_undefined_pd();
3979        _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3980        assert_eq_m256d(r, a);
3981    }
3982
3983    #[simd_test(enable = "avx")]
3984    unsafe fn test_mm256_load_ps() {
3985        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3986        let p = ptr::addr_of!(a) as *const f32;
3987        let r = _mm256_load_ps(p);
3988        let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3989        assert_eq_m256(r, e);
3990    }
3991
3992    #[simd_test(enable = "avx")]
3993    unsafe fn test_mm256_store_ps() {
3994        let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3995        let mut r = _mm256_undefined_ps();
3996        _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3997        assert_eq_m256(r, a);
3998    }
3999
4000    #[simd_test(enable = "avx")]
4001    unsafe fn test_mm256_loadu_pd() {
4002        let a = &[1.0f64, 2., 3., 4.];
4003        let p = a.as_ptr();
4004        let r = _mm256_loadu_pd(black_box(p));
4005        let e = _mm256_setr_pd(1., 2., 3., 4.);
4006        assert_eq_m256d(r, e);
4007    }
4008
4009    #[simd_test(enable = "avx")]
4010    unsafe fn test_mm256_storeu_pd() {
4011        let a = _mm256_set1_pd(9.);
4012        let mut r = _mm256_undefined_pd();
4013        _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4014        assert_eq_m256d(r, a);
4015    }
4016
4017    #[simd_test(enable = "avx")]
4018    unsafe fn test_mm256_loadu_ps() {
4019        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4020        let p = a.as_ptr();
4021        let r = _mm256_loadu_ps(black_box(p));
4022        let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4023        assert_eq_m256(r, e);
4024    }
4025
4026    #[simd_test(enable = "avx")]
4027    unsafe fn test_mm256_storeu_ps() {
4028        let a = _mm256_set1_ps(9.);
4029        let mut r = _mm256_undefined_ps();
4030        _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4031        assert_eq_m256(r, a);
4032    }
4033
4034    #[simd_test(enable = "avx")]
4035    unsafe fn test_mm256_load_si256() {
4036        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4037        let p = ptr::addr_of!(a);
4038        let r = _mm256_load_si256(p);
4039        let e = _mm256_setr_epi64x(1, 2, 3, 4);
4040        assert_eq_m256i(r, e);
4041    }
4042
4043    #[simd_test(enable = "avx")]
4044    unsafe fn test_mm256_store_si256() {
4045        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4046        let mut r = _mm256_undefined_si256();
4047        _mm256_store_si256(ptr::addr_of_mut!(r), a);
4048        assert_eq_m256i(r, a);
4049    }
4050
4051    #[simd_test(enable = "avx")]
4052    unsafe fn test_mm256_loadu_si256() {
4053        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4054        let p = ptr::addr_of!(a);
4055        let r = _mm256_loadu_si256(black_box(p));
4056        let e = _mm256_setr_epi64x(1, 2, 3, 4);
4057        assert_eq_m256i(r, e);
4058    }
4059
4060    #[simd_test(enable = "avx")]
4061    unsafe fn test_mm256_storeu_si256() {
4062        let a = _mm256_set1_epi8(9);
4063        let mut r = _mm256_undefined_si256();
4064        _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4065        assert_eq_m256i(r, a);
4066    }
4067
4068    #[simd_test(enable = "avx")]
4069    unsafe fn test_mm256_maskload_pd() {
4070        let a = &[1.0f64, 2., 3., 4.];
4071        let p = a.as_ptr();
4072        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4073        let r = _mm256_maskload_pd(black_box(p), mask);
4074        let e = _mm256_setr_pd(0., 2., 0., 4.);
4075        assert_eq_m256d(r, e);
4076    }
4077
4078    #[simd_test(enable = "avx")]
4079    unsafe fn test_mm256_maskstore_pd() {
4080        let mut r = _mm256_set1_pd(0.);
4081        let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4082        let a = _mm256_setr_pd(1., 2., 3., 4.);
4083        _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4084        let e = _mm256_setr_pd(0., 2., 0., 4.);
4085        assert_eq_m256d(r, e);
4086    }
4087
4088    #[simd_test(enable = "avx")]
4089    unsafe fn test_mm_maskload_pd() {
4090        let a = &[1.0f64, 2.];
4091        let p = a.as_ptr();
4092        let mask = _mm_setr_epi64x(0, !0);
4093        let r = _mm_maskload_pd(black_box(p), mask);
4094        let e = _mm_setr_pd(0., 2.);
4095        assert_eq_m128d(r, e);
4096    }
4097
4098    #[simd_test(enable = "avx")]
4099    unsafe fn test_mm_maskstore_pd() {
4100        let mut r = _mm_set1_pd(0.);
4101        let mask = _mm_setr_epi64x(0, !0);
4102        let a = _mm_setr_pd(1., 2.);
4103        _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4104        let e = _mm_setr_pd(0., 2.);
4105        assert_eq_m128d(r, e);
4106    }
4107
4108    #[simd_test(enable = "avx")]
4109    unsafe fn test_mm256_maskload_ps() {
4110        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4111        let p = a.as_ptr();
4112        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4113        let r = _mm256_maskload_ps(black_box(p), mask);
4114        let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4115        assert_eq_m256(r, e);
4116    }
4117
4118    #[simd_test(enable = "avx")]
4119    unsafe fn test_mm256_maskstore_ps() {
4120        let mut r = _mm256_set1_ps(0.);
4121        let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4122        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4123        _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4124        let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4125        assert_eq_m256(r, e);
4126    }
4127
4128    #[simd_test(enable = "avx")]
4129    unsafe fn test_mm_maskload_ps() {
4130        let a = &[1.0f32, 2., 3., 4.];
4131        let p = a.as_ptr();
4132        let mask = _mm_setr_epi32(0, !0, 0, !0);
4133        let r = _mm_maskload_ps(black_box(p), mask);
4134        let e = _mm_setr_ps(0., 2., 0., 4.);
4135        assert_eq_m128(r, e);
4136    }
4137
4138    #[simd_test(enable = "avx")]
4139    unsafe fn test_mm_maskstore_ps() {
4140        let mut r = _mm_set1_ps(0.);
4141        let mask = _mm_setr_epi32(0, !0, 0, !0);
4142        let a = _mm_setr_ps(1., 2., 3., 4.);
4143        _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4144        let e = _mm_setr_ps(0., 2., 0., 4.);
4145        assert_eq_m128(r, e);
4146    }
4147
4148    #[simd_test(enable = "avx")]
4149    unsafe fn test_mm256_movehdup_ps() {
4150        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4151        let r = _mm256_movehdup_ps(a);
4152        let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4153        assert_eq_m256(r, e);
4154    }
4155
4156    #[simd_test(enable = "avx")]
4157    unsafe fn test_mm256_moveldup_ps() {
4158        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4159        let r = _mm256_moveldup_ps(a);
4160        let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4161        assert_eq_m256(r, e);
4162    }
4163
4164    #[simd_test(enable = "avx")]
4165    unsafe fn test_mm256_movedup_pd() {
4166        let a = _mm256_setr_pd(1., 2., 3., 4.);
4167        let r = _mm256_movedup_pd(a);
4168        let e = _mm256_setr_pd(1., 1., 3., 3.);
4169        assert_eq_m256d(r, e);
4170    }
4171
4172    #[simd_test(enable = "avx")]
4173    unsafe fn test_mm256_lddqu_si256() {
4174        #[rustfmt::skip]
4175        let a = _mm256_setr_epi8(
4176            1, 2, 3, 4, 5, 6, 7, 8,
4177            9, 10, 11, 12, 13, 14, 15, 16,
4178            17, 18, 19, 20, 21, 22, 23, 24,
4179            25, 26, 27, 28, 29, 30, 31, 32,
4180        );
4181        let p = ptr::addr_of!(a);
4182        let r = _mm256_lddqu_si256(black_box(p));
4183        #[rustfmt::skip]
4184        let e = _mm256_setr_epi8(
4185            1, 2, 3, 4, 5, 6, 7, 8,
4186            9, 10, 11, 12, 13, 14, 15, 16,
4187            17, 18, 19, 20, 21, 22, 23, 24,
4188            25, 26, 27, 28, 29, 30, 31, 32,
4189        );
4190        assert_eq_m256i(r, e);
4191    }
4192
4193    #[simd_test(enable = "avx")]
4194    #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_si256() {
4196        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4197        let mut r = _mm256_undefined_si256();
4198        _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4199        assert_eq_m256i(r, a);
4200    }
4201
4202    #[simd_test(enable = "avx")]
4203    #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_pd() {
4205        #[repr(align(32))]
4206        struct Memory {
4207            pub data: [f64; 4],
4208        }
4209        let a = _mm256_set1_pd(7.0);
4210        let mut mem = Memory { data: [-1.0; 4] };
4211
4212        _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4213        for i in 0..4 {
4214            assert_eq!(mem.data[i], get_m256d(a, i));
4215        }
4216    }
4217
4218    #[simd_test(enable = "avx")]
4219    #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_ps() {
4221        #[repr(align(32))]
4222        struct Memory {
4223            pub data: [f32; 8],
4224        }
4225        let a = _mm256_set1_ps(7.0);
4226        let mut mem = Memory { data: [-1.0; 8] };
4227
4228        _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4229        for i in 0..8 {
4230            assert_eq!(mem.data[i], get_m256(a, i));
4231        }
4232    }
4233
4234    #[simd_test(enable = "avx")]
4235    unsafe fn test_mm256_rcp_ps() {
4236        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4237        let r = _mm256_rcp_ps(a);
4238        #[rustfmt::skip]
4239        let e = _mm256_setr_ps(
4240            0.99975586, 0.49987793, 0.33325195, 0.24993896,
4241            0.19995117, 0.16662598, 0.14282227, 0.12496948,
4242        );
4243        let rel_err = 0.00048828125;
4244        for i in 0..8 {
4245            assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4246        }
4247    }
4248
4249    #[simd_test(enable = "avx")]
4250    unsafe fn test_mm256_rsqrt_ps() {
4251        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4252        let r = _mm256_rsqrt_ps(a);
4253        #[rustfmt::skip]
4254        let e = _mm256_setr_ps(
4255            0.99975586, 0.7069092, 0.5772705, 0.49987793,
4256            0.44714355, 0.40820313, 0.3779297, 0.3534546,
4257        );
4258        let rel_err = 0.00048828125;
4259        for i in 0..8 {
4260            assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4261        }
4262    }
4263
4264    #[simd_test(enable = "avx")]
4265    unsafe fn test_mm256_unpackhi_pd() {
4266        let a = _mm256_setr_pd(1., 2., 3., 4.);
4267        let b = _mm256_setr_pd(5., 6., 7., 8.);
4268        let r = _mm256_unpackhi_pd(a, b);
4269        let e = _mm256_setr_pd(2., 6., 4., 8.);
4270        assert_eq_m256d(r, e);
4271    }
4272
4273    #[simd_test(enable = "avx")]
4274    unsafe fn test_mm256_unpackhi_ps() {
4275        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4276        let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4277        let r = _mm256_unpackhi_ps(a, b);
4278        let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4279        assert_eq_m256(r, e);
4280    }
4281
4282    #[simd_test(enable = "avx")]
4283    unsafe fn test_mm256_unpacklo_pd() {
4284        let a = _mm256_setr_pd(1., 2., 3., 4.);
4285        let b = _mm256_setr_pd(5., 6., 7., 8.);
4286        let r = _mm256_unpacklo_pd(a, b);
4287        let e = _mm256_setr_pd(1., 5., 3., 7.);
4288        assert_eq_m256d(r, e);
4289    }
4290
4291    #[simd_test(enable = "avx")]
4292    unsafe fn test_mm256_unpacklo_ps() {
4293        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4294        let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4295        let r = _mm256_unpacklo_ps(a, b);
4296        let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4297        assert_eq_m256(r, e);
4298    }
4299
4300    #[simd_test(enable = "avx")]
4301    unsafe fn test_mm256_testz_si256() {
4302        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4303        let b = _mm256_setr_epi64x(5, 6, 7, 8);
4304        let r = _mm256_testz_si256(a, b);
4305        assert_eq!(r, 0);
4306        let b = _mm256_set1_epi64x(0);
4307        let r = _mm256_testz_si256(a, b);
4308        assert_eq!(r, 1);
4309    }
4310
4311    #[simd_test(enable = "avx")]
4312    unsafe fn test_mm256_testc_si256() {
4313        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4314        let b = _mm256_setr_epi64x(5, 6, 7, 8);
4315        let r = _mm256_testc_si256(a, b);
4316        assert_eq!(r, 0);
4317        let b = _mm256_set1_epi64x(0);
4318        let r = _mm256_testc_si256(a, b);
4319        assert_eq!(r, 1);
4320    }
4321
4322    #[simd_test(enable = "avx")]
4323    unsafe fn test_mm256_testnzc_si256() {
4324        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4325        let b = _mm256_setr_epi64x(5, 6, 7, 8);
4326        let r = _mm256_testnzc_si256(a, b);
4327        assert_eq!(r, 1);
4328        let a = _mm256_setr_epi64x(0, 0, 0, 0);
4329        let b = _mm256_setr_epi64x(0, 0, 0, 0);
4330        let r = _mm256_testnzc_si256(a, b);
4331        assert_eq!(r, 0);
4332    }
4333
4334    #[simd_test(enable = "avx")]
4335    unsafe fn test_mm256_testz_pd() {
4336        let a = _mm256_setr_pd(1., 2., 3., 4.);
4337        let b = _mm256_setr_pd(5., 6., 7., 8.);
4338        let r = _mm256_testz_pd(a, b);
4339        assert_eq!(r, 1);
4340        let a = _mm256_set1_pd(-1.);
4341        let r = _mm256_testz_pd(a, a);
4342        assert_eq!(r, 0);
4343    }
4344
4345    #[simd_test(enable = "avx")]
4346    unsafe fn test_mm256_testc_pd() {
4347        let a = _mm256_setr_pd(1., 2., 3., 4.);
4348        let b = _mm256_setr_pd(5., 6., 7., 8.);
4349        let r = _mm256_testc_pd(a, b);
4350        assert_eq!(r, 1);
4351        let a = _mm256_set1_pd(1.);
4352        let b = _mm256_set1_pd(-1.);
4353        let r = _mm256_testc_pd(a, b);
4354        assert_eq!(r, 0);
4355    }
4356
4357    #[simd_test(enable = "avx")]
4358    unsafe fn test_mm256_testnzc_pd() {
4359        let a = _mm256_setr_pd(1., 2., 3., 4.);
4360        let b = _mm256_setr_pd(5., 6., 7., 8.);
4361        let r = _mm256_testnzc_pd(a, b);
4362        assert_eq!(r, 0);
4363        let a = _mm256_setr_pd(1., -1., -1., -1.);
4364        let b = _mm256_setr_pd(-1., -1., 1., 1.);
4365        let r = _mm256_testnzc_pd(a, b);
4366        assert_eq!(r, 1);
4367    }
4368
4369    #[simd_test(enable = "avx")]
4370    unsafe fn test_mm_testz_pd() {
4371        let a = _mm_setr_pd(1., 2.);
4372        let b = _mm_setr_pd(5., 6.);
4373        let r = _mm_testz_pd(a, b);
4374        assert_eq!(r, 1);
4375        let a = _mm_set1_pd(-1.);
4376        let r = _mm_testz_pd(a, a);
4377        assert_eq!(r, 0);
4378    }
4379
4380    #[simd_test(enable = "avx")]
4381    unsafe fn test_mm_testc_pd() {
4382        let a = _mm_setr_pd(1., 2.);
4383        let b = _mm_setr_pd(5., 6.);
4384        let r = _mm_testc_pd(a, b);
4385        assert_eq!(r, 1);
4386        let a = _mm_set1_pd(1.);
4387        let b = _mm_set1_pd(-1.);
4388        let r = _mm_testc_pd(a, b);
4389        assert_eq!(r, 0);
4390    }
4391
4392    #[simd_test(enable = "avx")]
4393    unsafe fn test_mm_testnzc_pd() {
4394        let a = _mm_setr_pd(1., 2.);
4395        let b = _mm_setr_pd(5., 6.);
4396        let r = _mm_testnzc_pd(a, b);
4397        assert_eq!(r, 0);
4398        let a = _mm_setr_pd(1., -1.);
4399        let b = _mm_setr_pd(-1., -1.);
4400        let r = _mm_testnzc_pd(a, b);
4401        assert_eq!(r, 1);
4402    }
4403
4404    #[simd_test(enable = "avx")]
4405    unsafe fn test_mm256_testz_ps() {
4406        let a = _mm256_set1_ps(1.);
4407        let r = _mm256_testz_ps(a, a);
4408        assert_eq!(r, 1);
4409        let a = _mm256_set1_ps(-1.);
4410        let r = _mm256_testz_ps(a, a);
4411        assert_eq!(r, 0);
4412    }
4413
4414    #[simd_test(enable = "avx")]
4415    unsafe fn test_mm256_testc_ps() {
4416        let a = _mm256_set1_ps(1.);
4417        let r = _mm256_testc_ps(a, a);
4418        assert_eq!(r, 1);
4419        let b = _mm256_set1_ps(-1.);
4420        let r = _mm256_testc_ps(a, b);
4421        assert_eq!(r, 0);
4422    }
4423
4424    #[simd_test(enable = "avx")]
4425    unsafe fn test_mm256_testnzc_ps() {
4426        let a = _mm256_set1_ps(1.);
4427        let r = _mm256_testnzc_ps(a, a);
4428        assert_eq!(r, 0);
4429        let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4430        let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4431        let r = _mm256_testnzc_ps(a, b);
4432        assert_eq!(r, 1);
4433    }
4434
4435    #[simd_test(enable = "avx")]
4436    unsafe fn test_mm_testz_ps() {
4437        let a = _mm_set1_ps(1.);
4438        let r = _mm_testz_ps(a, a);
4439        assert_eq!(r, 1);
4440        let a = _mm_set1_ps(-1.);
4441        let r = _mm_testz_ps(a, a);
4442        assert_eq!(r, 0);
4443    }
4444
4445    #[simd_test(enable = "avx")]
4446    unsafe fn test_mm_testc_ps() {
4447        let a = _mm_set1_ps(1.);
4448        let r = _mm_testc_ps(a, a);
4449        assert_eq!(r, 1);
4450        let b = _mm_set1_ps(-1.);
4451        let r = _mm_testc_ps(a, b);
4452        assert_eq!(r, 0);
4453    }
4454
4455    #[simd_test(enable = "avx")]
4456    unsafe fn test_mm_testnzc_ps() {
4457        let a = _mm_set1_ps(1.);
4458        let r = _mm_testnzc_ps(a, a);
4459        assert_eq!(r, 0);
4460        let a = _mm_setr_ps(1., -1., -1., -1.);
4461        let b = _mm_setr_ps(-1., -1., 1., 1.);
4462        let r = _mm_testnzc_ps(a, b);
4463        assert_eq!(r, 1);
4464    }
4465
4466    #[simd_test(enable = "avx")]
4467    unsafe fn test_mm256_movemask_pd() {
4468        let a = _mm256_setr_pd(1., -2., 3., -4.);
4469        let r = _mm256_movemask_pd(a);
4470        assert_eq!(r, 0xA);
4471    }
4472
4473    #[simd_test(enable = "avx")]
4474    unsafe fn test_mm256_movemask_ps() {
4475        let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4476        let r = _mm256_movemask_ps(a);
4477        assert_eq!(r, 0xAA);
4478    }
4479
4480    #[simd_test(enable = "avx")]
4481    unsafe fn test_mm256_setzero_pd() {
4482        let r = _mm256_setzero_pd();
4483        assert_eq_m256d(r, _mm256_set1_pd(0.));
4484    }
4485
4486    #[simd_test(enable = "avx")]
4487    unsafe fn test_mm256_setzero_ps() {
4488        let r = _mm256_setzero_ps();
4489        assert_eq_m256(r, _mm256_set1_ps(0.));
4490    }
4491
4492    #[simd_test(enable = "avx")]
4493    unsafe fn test_mm256_setzero_si256() {
4494        let r = _mm256_setzero_si256();
4495        assert_eq_m256i(r, _mm256_set1_epi8(0));
4496    }
4497
4498    #[simd_test(enable = "avx")]
4499    unsafe fn test_mm256_set_pd() {
4500        let r = _mm256_set_pd(1., 2., 3., 4.);
4501        assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4502    }
4503
4504    #[simd_test(enable = "avx")]
4505    unsafe fn test_mm256_set_ps() {
4506        let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4507        assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4508    }
4509
4510    #[simd_test(enable = "avx")]
4511    unsafe fn test_mm256_set_epi8() {
4512        #[rustfmt::skip]
4513        let r = _mm256_set_epi8(
4514            1, 2, 3, 4, 5, 6, 7, 8,
4515            9, 10, 11, 12, 13, 14, 15, 16,
4516            17, 18, 19, 20, 21, 22, 23, 24,
4517            25, 26, 27, 28, 29, 30, 31, 32,
4518        );
4519        #[rustfmt::skip]
4520        let e = _mm256_setr_epi8(
4521            32, 31, 30, 29, 28, 27, 26, 25,
4522            24, 23, 22, 21, 20, 19, 18, 17,
4523            16, 15, 14, 13, 12, 11, 10, 9,
4524            8, 7, 6, 5, 4, 3, 2, 1
4525        );
4526        assert_eq_m256i(r, e);
4527    }
4528
4529    #[simd_test(enable = "avx")]
4530    unsafe fn test_mm256_set_epi16() {
4531        #[rustfmt::skip]
4532        let r = _mm256_set_epi16(
4533            1, 2, 3, 4, 5, 6, 7, 8,
4534            9, 10, 11, 12, 13, 14, 15, 16,
4535        );
4536        #[rustfmt::skip]
4537        let e = _mm256_setr_epi16(
4538            16, 15, 14, 13, 12, 11, 10, 9, 8,
4539            7, 6, 5, 4, 3, 2, 1,
4540        );
4541        assert_eq_m256i(r, e);
4542    }
4543
4544    #[simd_test(enable = "avx")]
4545    unsafe fn test_mm256_set_epi32() {
4546        let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4547        assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4548    }
4549
4550    #[simd_test(enable = "avx")]
4551    unsafe fn test_mm256_set_epi64x() {
4552        let r = _mm256_set_epi64x(1, 2, 3, 4);
4553        assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4554    }
4555
4556    #[simd_test(enable = "avx")]
4557    unsafe fn test_mm256_setr_pd() {
4558        let r = _mm256_setr_pd(1., 2., 3., 4.);
4559        assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4560    }
4561
4562    #[simd_test(enable = "avx")]
4563    unsafe fn test_mm256_setr_ps() {
4564        let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4565        assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4566    }
4567
4568    #[simd_test(enable = "avx")]
4569    unsafe fn test_mm256_setr_epi8() {
4570        #[rustfmt::skip]
4571        let r = _mm256_setr_epi8(
4572            1, 2, 3, 4, 5, 6, 7, 8,
4573            9, 10, 11, 12, 13, 14, 15, 16,
4574            17, 18, 19, 20, 21, 22, 23, 24,
4575            25, 26, 27, 28, 29, 30, 31, 32,
4576        );
4577        #[rustfmt::skip]
4578        let e = _mm256_setr_epi8(
4579            1, 2, 3, 4, 5, 6, 7, 8,
4580            9, 10, 11, 12, 13, 14, 15, 16,
4581            17, 18, 19, 20, 21, 22, 23, 24,
4582            25, 26, 27, 28, 29, 30, 31, 32
4583        );
4584
4585        assert_eq_m256i(r, e);
4586    }
4587
4588    #[simd_test(enable = "avx")]
4589    unsafe fn test_mm256_setr_epi16() {
4590        #[rustfmt::skip]
4591        let r = _mm256_setr_epi16(
4592            1, 2, 3, 4, 5, 6, 7, 8,
4593            9, 10, 11, 12, 13, 14, 15, 16,
4594        );
4595        #[rustfmt::skip]
4596        let e = _mm256_setr_epi16(
4597            1, 2, 3, 4, 5, 6, 7, 8,
4598            9, 10, 11, 12, 13, 14, 15, 16,
4599        );
4600        assert_eq_m256i(r, e);
4601    }
4602
4603    #[simd_test(enable = "avx")]
4604    unsafe fn test_mm256_setr_epi32() {
4605        let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4606        assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4607    }
4608
4609    #[simd_test(enable = "avx")]
4610    unsafe fn test_mm256_setr_epi64x() {
4611        let r = _mm256_setr_epi64x(1, 2, 3, 4);
4612        assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4613    }
4614
4615    #[simd_test(enable = "avx")]
4616    unsafe fn test_mm256_set1_pd() {
4617        let r = _mm256_set1_pd(1.);
4618        assert_eq_m256d(r, _mm256_set1_pd(1.));
4619    }
4620
4621    #[simd_test(enable = "avx")]
4622    unsafe fn test_mm256_set1_ps() {
4623        let r = _mm256_set1_ps(1.);
4624        assert_eq_m256(r, _mm256_set1_ps(1.));
4625    }
4626
4627    #[simd_test(enable = "avx")]
4628    unsafe fn test_mm256_set1_epi8() {
4629        let r = _mm256_set1_epi8(1);
4630        assert_eq_m256i(r, _mm256_set1_epi8(1));
4631    }
4632
4633    #[simd_test(enable = "avx")]
4634    unsafe fn test_mm256_set1_epi16() {
4635        let r = _mm256_set1_epi16(1);
4636        assert_eq_m256i(r, _mm256_set1_epi16(1));
4637    }
4638
4639    #[simd_test(enable = "avx")]
4640    unsafe fn test_mm256_set1_epi32() {
4641        let r = _mm256_set1_epi32(1);
4642        assert_eq_m256i(r, _mm256_set1_epi32(1));
4643    }
4644
4645    #[simd_test(enable = "avx")]
4646    unsafe fn test_mm256_set1_epi64x() {
4647        let r = _mm256_set1_epi64x(1);
4648        assert_eq_m256i(r, _mm256_set1_epi64x(1));
4649    }
4650
4651    #[simd_test(enable = "avx")]
4652    unsafe fn test_mm256_castpd_ps() {
4653        let a = _mm256_setr_pd(1., 2., 3., 4.);
4654        let r = _mm256_castpd_ps(a);
4655        let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4656        assert_eq_m256(r, e);
4657    }
4658
4659    #[simd_test(enable = "avx")]
4660    unsafe fn test_mm256_castps_pd() {
4661        let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4662        let r = _mm256_castps_pd(a);
4663        let e = _mm256_setr_pd(1., 2., 3., 4.);
4664        assert_eq_m256d(r, e);
4665    }
4666
4667    #[simd_test(enable = "avx")]
4668    unsafe fn test_mm256_castps_si256() {
4669        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4670        let r = _mm256_castps_si256(a);
4671        #[rustfmt::skip]
4672        let e = _mm256_setr_epi8(
4673            0, 0, -128, 63, 0, 0, 0, 64,
4674            0, 0, 64, 64, 0, 0, -128, 64,
4675            0, 0, -96, 64, 0, 0, -64, 64,
4676            0, 0, -32, 64, 0, 0, 0, 65,
4677        );
4678        assert_eq_m256i(r, e);
4679    }
4680
4681    #[simd_test(enable = "avx")]
4682    unsafe fn test_mm256_castsi256_ps() {
4683        #[rustfmt::skip]
4684        let a = _mm256_setr_epi8(
4685            0, 0, -128, 63, 0, 0, 0, 64,
4686            0, 0, 64, 64, 0, 0, -128, 64,
4687            0, 0, -96, 64, 0, 0, -64, 64,
4688            0, 0, -32, 64, 0, 0, 0, 65,
4689        );
4690        let r = _mm256_castsi256_ps(a);
4691        let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4692        assert_eq_m256(r, e);
4693    }
4694
4695    #[simd_test(enable = "avx")]
4696    unsafe fn test_mm256_castpd_si256() {
4697        let a = _mm256_setr_pd(1., 2., 3., 4.);
4698        let r = _mm256_castpd_si256(a);
4699        assert_eq_m256d(transmute(r), a);
4700    }
4701
4702    #[simd_test(enable = "avx")]
4703    unsafe fn test_mm256_castsi256_pd() {
4704        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4705        let r = _mm256_castsi256_pd(a);
4706        assert_eq_m256d(r, transmute(a));
4707    }
4708
4709    #[simd_test(enable = "avx")]
4710    unsafe fn test_mm256_castps256_ps128() {
4711        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4712        let r = _mm256_castps256_ps128(a);
4713        assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4714    }
4715
4716    #[simd_test(enable = "avx")]
4717    unsafe fn test_mm256_castpd256_pd128() {
4718        let a = _mm256_setr_pd(1., 2., 3., 4.);
4719        let r = _mm256_castpd256_pd128(a);
4720        assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4721    }
4722
4723    #[simd_test(enable = "avx")]
4724    unsafe fn test_mm256_castsi256_si128() {
4725        let a = _mm256_setr_epi64x(1, 2, 3, 4);
4726        let r = _mm256_castsi256_si128(a);
4727        assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4728    }
4729
4730    #[simd_test(enable = "avx")]
4731    unsafe fn test_mm256_castps128_ps256() {
4732        let a = _mm_setr_ps(1., 2., 3., 4.);
4733        let r = _mm256_castps128_ps256(a);
4734        assert_eq_m128(_mm256_castps256_ps128(r), a);
4735    }
4736
4737    #[simd_test(enable = "avx")]
4738    unsafe fn test_mm256_castpd128_pd256() {
4739        let a = _mm_setr_pd(1., 2.);
4740        let r = _mm256_castpd128_pd256(a);
4741        assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4742    }
4743
4744    #[simd_test(enable = "avx")]
4745    unsafe fn test_mm256_castsi128_si256() {
4746        let a = _mm_setr_epi32(1, 2, 3, 4);
4747        let r = _mm256_castsi128_si256(a);
4748        assert_eq_m128i(_mm256_castsi256_si128(r), a);
4749    }
4750
4751    #[simd_test(enable = "avx")]
4752    unsafe fn test_mm256_zextps128_ps256() {
4753        let a = _mm_setr_ps(1., 2., 3., 4.);
4754        let r = _mm256_zextps128_ps256(a);
4755        let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4756        assert_eq_m256(r, e);
4757    }
4758
4759    #[simd_test(enable = "avx")]
4760    unsafe fn test_mm256_zextsi128_si256() {
4761        let a = _mm_setr_epi64x(1, 2);
4762        let r = _mm256_zextsi128_si256(a);
4763        let e = _mm256_setr_epi64x(1, 2, 0, 0);
4764        assert_eq_m256i(r, e);
4765    }
4766
4767    #[simd_test(enable = "avx")]
4768    unsafe fn test_mm256_zextpd128_pd256() {
4769        let a = _mm_setr_pd(1., 2.);
4770        let r = _mm256_zextpd128_pd256(a);
4771        let e = _mm256_setr_pd(1., 2., 0., 0.);
4772        assert_eq_m256d(r, e);
4773    }
4774
4775    #[simd_test(enable = "avx")]
4776    unsafe fn test_mm256_set_m128() {
4777        let hi = _mm_setr_ps(5., 6., 7., 8.);
4778        let lo = _mm_setr_ps(1., 2., 3., 4.);
4779        let r = _mm256_set_m128(hi, lo);
4780        let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4781        assert_eq_m256(r, e);
4782    }
4783
4784    #[simd_test(enable = "avx")]
4785    unsafe fn test_mm256_set_m128d() {
4786        let hi = _mm_setr_pd(3., 4.);
4787        let lo = _mm_setr_pd(1., 2.);
4788        let r = _mm256_set_m128d(hi, lo);
4789        let e = _mm256_setr_pd(1., 2., 3., 4.);
4790        assert_eq_m256d(r, e);
4791    }
4792
4793    #[simd_test(enable = "avx")]
4794    unsafe fn test_mm256_set_m128i() {
4795        #[rustfmt::skip]
4796        let hi = _mm_setr_epi8(
4797            17, 18, 19, 20,
4798            21, 22, 23, 24,
4799            25, 26, 27, 28,
4800            29, 30, 31, 32,
4801        );
4802        #[rustfmt::skip]
4803        let lo = _mm_setr_epi8(
4804            1, 2, 3, 4,
4805            5, 6, 7, 8,
4806            9, 10, 11, 12,
4807            13, 14, 15, 16,
4808        );
4809        let r = _mm256_set_m128i(hi, lo);
4810        #[rustfmt::skip]
4811        let e = _mm256_setr_epi8(
4812            1, 2, 3, 4, 5, 6, 7, 8,
4813            9, 10, 11, 12, 13, 14, 15, 16,
4814            17, 18, 19, 20, 21, 22, 23, 24,
4815            25, 26, 27, 28, 29, 30, 31, 32,
4816        );
4817        assert_eq_m256i(r, e);
4818    }
4819
4820    #[simd_test(enable = "avx")]
4821    unsafe fn test_mm256_setr_m128() {
4822        let lo = _mm_setr_ps(1., 2., 3., 4.);
4823        let hi = _mm_setr_ps(5., 6., 7., 8.);
4824        let r = _mm256_setr_m128(lo, hi);
4825        let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4826        assert_eq_m256(r, e);
4827    }
4828
4829    #[simd_test(enable = "avx")]
4830    unsafe fn test_mm256_setr_m128d() {
4831        let lo = _mm_setr_pd(1., 2.);
4832        let hi = _mm_setr_pd(3., 4.);
4833        let r = _mm256_setr_m128d(lo, hi);
4834        let e = _mm256_setr_pd(1., 2., 3., 4.);
4835        assert_eq_m256d(r, e);
4836    }
4837
4838    #[simd_test(enable = "avx")]
4839    unsafe fn test_mm256_setr_m128i() {
4840        #[rustfmt::skip]
4841        let lo = _mm_setr_epi8(
4842            1, 2, 3, 4,
4843            5, 6, 7, 8,
4844            9, 10, 11, 12,
4845            13, 14, 15, 16,
4846        );
4847        #[rustfmt::skip]
4848        let hi = _mm_setr_epi8(
4849            17, 18, 19, 20, 21, 22, 23, 24,
4850            25, 26, 27, 28, 29, 30, 31, 32,
4851        );
4852        let r = _mm256_setr_m128i(lo, hi);
4853        #[rustfmt::skip]
4854        let e = _mm256_setr_epi8(
4855            1, 2, 3, 4, 5, 6, 7, 8,
4856            9, 10, 11, 12, 13, 14, 15, 16,
4857            17, 18, 19, 20, 21, 22, 23, 24,
4858            25, 26, 27, 28, 29, 30, 31, 32,
4859        );
4860        assert_eq_m256i(r, e);
4861    }
4862
4863    #[simd_test(enable = "avx")]
4864    unsafe fn test_mm256_loadu2_m128() {
4865        let hi = &[5., 6., 7., 8.];
4866        let hiaddr = hi.as_ptr();
4867        let lo = &[1., 2., 3., 4.];
4868        let loaddr = lo.as_ptr();
4869        let r = _mm256_loadu2_m128(hiaddr, loaddr);
4870        let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4871        assert_eq_m256(r, e);
4872    }
4873
4874    #[simd_test(enable = "avx")]
4875    unsafe fn test_mm256_loadu2_m128d() {
4876        let hi = &[3., 4.];
4877        let hiaddr = hi.as_ptr();
4878        let lo = &[1., 2.];
4879        let loaddr = lo.as_ptr();
4880        let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4881        let e = _mm256_setr_pd(1., 2., 3., 4.);
4882        assert_eq_m256d(r, e);
4883    }
4884
4885    #[simd_test(enable = "avx")]
4886    unsafe fn test_mm256_loadu2_m128i() {
4887        #[rustfmt::skip]
4888        let hi = _mm_setr_epi8(
4889            17, 18, 19, 20, 21, 22, 23, 24,
4890            25, 26, 27, 28, 29, 30, 31, 32,
4891        );
4892        #[rustfmt::skip]
4893        let lo = _mm_setr_epi8(
4894            1, 2, 3, 4, 5, 6, 7, 8,
4895            9, 10, 11, 12, 13, 14, 15, 16,
4896        );
4897        let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4898        #[rustfmt::skip]
4899        let e = _mm256_setr_epi8(
4900            1, 2, 3, 4, 5, 6, 7, 8,
4901            9, 10, 11, 12, 13, 14, 15, 16,
4902            17, 18, 19, 20, 21, 22, 23, 24,
4903            25, 26, 27, 28, 29, 30, 31, 32,
4904        );
4905        assert_eq_m256i(r, e);
4906    }
4907
4908    #[simd_test(enable = "avx")]
4909    unsafe fn test_mm256_storeu2_m128() {
4910        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4911        let mut hi = _mm_undefined_ps();
4912        let mut lo = _mm_undefined_ps();
4913        _mm256_storeu2_m128(
4914            ptr::addr_of_mut!(hi) as *mut f32,
4915            ptr::addr_of_mut!(lo) as *mut f32,
4916            a,
4917        );
4918        assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4919        assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4920    }
4921
4922    #[simd_test(enable = "avx")]
4923    unsafe fn test_mm256_storeu2_m128d() {
4924        let a = _mm256_setr_pd(1., 2., 3., 4.);
4925        let mut hi = _mm_undefined_pd();
4926        let mut lo = _mm_undefined_pd();
4927        _mm256_storeu2_m128d(
4928            ptr::addr_of_mut!(hi) as *mut f64,
4929            ptr::addr_of_mut!(lo) as *mut f64,
4930            a,
4931        );
4932        assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4933        assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4934    }
4935
4936    #[simd_test(enable = "avx")]
4937    unsafe fn test_mm256_storeu2_m128i() {
4938        #[rustfmt::skip]
4939        let a = _mm256_setr_epi8(
4940            1, 2, 3, 4, 5, 6, 7, 8,
4941            9, 10, 11, 12, 13, 14, 15, 16,
4942            17, 18, 19, 20, 21, 22, 23, 24,
4943            25, 26, 27, 28, 29, 30, 31, 32,
4944        );
4945        let mut hi = _mm_undefined_si128();
4946        let mut lo = _mm_undefined_si128();
4947        _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
4948        #[rustfmt::skip]
4949        let e_hi = _mm_setr_epi8(
4950            17, 18, 19, 20, 21, 22, 23, 24,
4951            25, 26, 27, 28, 29, 30, 31, 32
4952        );
4953        #[rustfmt::skip]
4954        let e_lo = _mm_setr_epi8(
4955            1, 2, 3, 4, 5, 6, 7, 8,
4956            9, 10, 11, 12, 13, 14, 15, 16
4957        );
4958
4959        assert_eq_m128i(hi, e_hi);
4960        assert_eq_m128i(lo, e_lo);
4961    }
4962
4963    #[simd_test(enable = "avx")]
4964    unsafe fn test_mm256_cvtss_f32() {
4965        let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4966        let r = _mm256_cvtss_f32(a);
4967        assert_eq!(r, 1.);
4968    }
4969}