use crate::core_arch::{simd::*, x86::*};
use crate::intrinsics::simd::*;
#[cfg(test)]
use stdarch_test::assert_instr;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NO_EXC: i32 = 0x08;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NINT: i32 = 0x00;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
    let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
    transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
    static_assert_uimm_bits!(IMM8, 8);
    transmute::<i16x8, _>(simd_shuffle!(
        a.as_i16x8(),
        b.as_i16x8(),
        [
            [0, 8][IMM8 as usize & 1],
            [1, 9][(IMM8 >> 1) as usize & 1],
            [2, 10][(IMM8 >> 2) as usize & 1],
            [3, 11][(IMM8 >> 3) as usize & 1],
            [4, 12][(IMM8 >> 4) as usize & 1],
            [5, 13][(IMM8 >> 5) as usize & 1],
            [6, 14][(IMM8 >> 6) as usize & 1],
            [7, 15][(IMM8 >> 7) as usize & 1],
        ]
    ))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
    let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
    transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
    let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
    transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
    static_assert_uimm_bits!(IMM2, 2);
    transmute::<f64x2, _>(simd_shuffle!(
        a.as_f64x2(),
        b.as_f64x2(),
        [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
    ))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
    static_assert_uimm_bits!(IMM4, 4);
    transmute::<f32x4, _>(simd_shuffle!(
        a.as_f32x4(),
        b.as_f32x4(),
        [
            [0, 4][IMM4 as usize & 1],
            [1, 5][(IMM4 >> 1) as usize & 1],
            [2, 6][(IMM4 >> 2) as usize & 1],
            [3, 7][(IMM4 >> 3) as usize & 1],
        ]
    ))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
    static_assert_uimm_bits!(IMM8, 2);
    simd_extract!(a, IMM8 as u32, f32).to_bits() as i32
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
    static_assert_uimm_bits!(IMM8, 4);
    simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 1))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
    static_assert_uimm_bits!(IMM8, 2);
    simd_extract!(a.as_i32x4(), IMM8 as u32, i32)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
    static_assert_uimm_bits!(IMM8, 8);
    insertps(a, b, IMM8 as u8)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
    static_assert_uimm_bits!(IMM8, 4);
    transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
    static_assert_uimm_bits!(IMM8, 2);
    transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_i8x16();
    let b = b.as_i8x16();
    transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_u16x8();
    let b = b.as_u16x8();
    transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_i32x4();
    let b = b.as_i32x4();
    transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_u32x4();
    let b = b.as_u32x4();
    transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_i8x16();
    let b = b.as_i8x16();
    transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_u16x8();
    let b = b.as_u16x8();
    transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_i32x4();
    let b = b.as_i32x4();
    transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
    let a = a.as_u32x4();
    let b = b.as_u32x4();
    transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(packusdw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
    transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
    transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
    let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
    transmute(simd_cast::<_, i16x8>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
    let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
    transmute(simd_cast::<_, i32x4>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
    let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
    let a = a.as_i16x8();
    let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
    transmute(simd_cast::<_, i32x4>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
    let a = a.as_i16x8();
    let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
    let a = a.as_i32x4();
    let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
    let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
    transmute(simd_cast::<_, i16x8>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
    let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
    transmute(simd_cast::<_, i32x4>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
    let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
    let a = a.as_u16x8();
    let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
    transmute(simd_cast::<_, i32x4>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
    let a = a.as_u16x8();
    let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
    let a = a.as_u32x4();
    let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
    transmute(simd_cast::<_, i64x2>(a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
    static_assert_uimm_bits!(IMM8, 8);
    dppd(a, b, IMM8 as u8)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
    static_assert_uimm_bits!(IMM8, 8);
    dpps(a, b, IMM8 as u8)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
    simd_floor(a)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
    simd_floor(a)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
    roundsd(a, b, _MM_FROUND_FLOOR)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
    roundss(a, b, _MM_FROUND_FLOOR)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
    simd_ceil(a)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
    simd_ceil(a)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
    roundsd(a, b, _MM_FROUND_CEIL)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
    roundss(a, b, _MM_FROUND_CEIL)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
    static_assert_uimm_bits!(ROUNDING, 4);
    roundpd(a, ROUNDING)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
    static_assert_uimm_bits!(ROUNDING, 4);
    roundps(a, ROUNDING)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
    static_assert_uimm_bits!(ROUNDING, 4);
    roundsd(a, b, ROUNDING)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
    static_assert_uimm_bits!(ROUNDING, 4);
    roundss(a, b, ROUNDING)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(phminposuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
    transmute(phminposuw(a.as_u16x8()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmuldq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
    let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
    let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
    transmute(simd_mul(a, b))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmulld))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
    transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
    static_assert_uimm_bits!(IMM8, 3);
    transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
    ptestz(a.as_i64x2(), mask.as_i64x2())
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
    ptestc(a.as_i64x2(), mask.as_i64x2())
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
    ptestnzc(a.as_i64x2(), mask.as_i64x2())
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
    _mm_testz_si128(a, mask)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
    _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
    _mm_testnzc_si128(a, mask)
}
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(movntdqa))]
#[stable(feature = "simd_x86_updates", since = "1.82.0")]
pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
    let dst: __m128i;
    crate::arch::asm!(
        vpl!("movntdqa {a}"),
        a = out(xmm_reg) dst,
        p = in(reg) mem_addr,
        options(pure, readonly, nostack, preserves_flags),
    );
    dst
}
#[allow(improper_ctypes)]
extern "C" {
    #[link_name = "llvm.x86.sse41.insertps"]
    fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
    #[link_name = "llvm.x86.sse41.packusdw"]
    fn packusdw(a: i32x4, b: i32x4) -> u16x8;
    #[link_name = "llvm.x86.sse41.dppd"]
    fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
    #[link_name = "llvm.x86.sse41.dpps"]
    fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
    #[link_name = "llvm.x86.sse41.round.pd"]
    fn roundpd(a: __m128d, rounding: i32) -> __m128d;
    #[link_name = "llvm.x86.sse41.round.ps"]
    fn roundps(a: __m128, rounding: i32) -> __m128;
    #[link_name = "llvm.x86.sse41.round.sd"]
    fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
    #[link_name = "llvm.x86.sse41.round.ss"]
    fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
    #[link_name = "llvm.x86.sse41.phminposuw"]
    fn phminposuw(a: u16x8) -> u16x8;
    #[link_name = "llvm.x86.sse41.mpsadbw"]
    fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
    #[link_name = "llvm.x86.sse41.ptestz"]
    fn ptestz(a: i64x2, mask: i64x2) -> i32;
    #[link_name = "llvm.x86.sse41.ptestc"]
    fn ptestc(a: i64x2, mask: i64x2) -> i32;
    #[link_name = "llvm.x86.sse41.ptestnzc"]
    fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
}
#[cfg(test)]
mod tests {
    use crate::core_arch::x86::*;
    use std::mem;
    use stdarch_test::simd_test;
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blendv_epi8() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            0, 1, 2, 3, 4, 5, 6, 7,
            8, 9, 10, 11, 12, 13, 14, 15,
        );
        #[rustfmt::skip]
        let b = _mm_setr_epi8(
            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        );
        #[rustfmt::skip]
        let mask = _mm_setr_epi8(
            0, -1, 0, -1, 0, -1, 0, -1,
            0, -1, 0, -1, 0, -1, 0, -1,
        );
        #[rustfmt::skip]
        let e = _mm_setr_epi8(
            0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
        );
        assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blendv_pd() {
        let a = _mm_set1_pd(0.0);
        let b = _mm_set1_pd(1.0);
        let mask = transmute(_mm_setr_epi64x(0, -1));
        let r = _mm_blendv_pd(a, b, mask);
        let e = _mm_setr_pd(0.0, 1.0);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blendv_ps() {
        let a = _mm_set1_ps(0.0);
        let b = _mm_set1_ps(1.0);
        let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
        let r = _mm_blendv_ps(a, b, mask);
        let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blend_pd() {
        let a = _mm_set1_pd(0.0);
        let b = _mm_set1_pd(1.0);
        let r = _mm_blend_pd::<0b10>(a, b);
        let e = _mm_setr_pd(0.0, 1.0);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blend_ps() {
        let a = _mm_set1_ps(0.0);
        let b = _mm_set1_ps(1.0);
        let r = _mm_blend_ps::<0b1010>(a, b);
        let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_blend_epi16() {
        let a = _mm_set1_epi16(0);
        let b = _mm_set1_epi16(1);
        let r = _mm_blend_epi16::<0b1010_1100>(a, b);
        let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_extract_ps() {
        let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
        let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
        assert_eq!(r, 1.0);
        let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
        assert_eq!(r, 3.0);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_extract_epi8() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            -1, 1, 2, 3, 4, 5, 6, 7,
            8, 9, 10, 11, 12, 13, 14, 15
        );
        let r1 = _mm_extract_epi8::<0>(a);
        let r2 = _mm_extract_epi8::<3>(a);
        assert_eq!(r1, 0xFF);
        assert_eq!(r2, 3);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_extract_epi32() {
        let a = _mm_setr_epi32(0, 1, 2, 3);
        let r = _mm_extract_epi32::<1>(a);
        assert_eq!(r, 1);
        let r = _mm_extract_epi32::<3>(a);
        assert_eq!(r, 3);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_insert_ps() {
        let a = _mm_set1_ps(1.0);
        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let r = _mm_insert_ps::<0b11_00_1100>(a, b);
        let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
        assert_eq_m128(r, e);
        let a = _mm_set1_ps(1.0);
        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
        let r = _mm_insert_ps::<0b11_00_0001>(a, b);
        let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_insert_epi8() {
        let a = _mm_set1_epi8(0);
        let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
        let r = _mm_insert_epi8::<1>(a, 32);
        assert_eq_m128i(r, e);
        let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
        let r = _mm_insert_epi8::<14>(a, 32);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_insert_epi32() {
        let a = _mm_set1_epi32(0);
        let e = _mm_setr_epi32(0, 32, 0, 0);
        let r = _mm_insert_epi32::<1>(a, 32);
        assert_eq_m128i(r, e);
        let e = _mm_setr_epi32(0, 0, 0, 32);
        let r = _mm_insert_epi32::<3>(a, 32);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_max_epi8() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            1, 4, 5, 8, 9, 12, 13, 16,
            17, 20, 21, 24, 25, 28, 29, 32,
        );
        #[rustfmt::skip]
        let b = _mm_setr_epi8(
            2, 3, 6, 7, 10, 11, 14, 15,
            18, 19, 22, 23, 26, 27, 30, 31,
        );
        let r = _mm_max_epi8(a, b);
        #[rustfmt::skip]
        let e = _mm_setr_epi8(
            2, 4, 6, 8, 10, 12, 14, 16,
            18, 20, 22, 24, 26, 28, 30, 32,
        );
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_max_epu16() {
        let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
        let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
        let r = _mm_max_epu16(a, b);
        let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_max_epi32() {
        let a = _mm_setr_epi32(1, 4, 5, 8);
        let b = _mm_setr_epi32(2, 3, 6, 7);
        let r = _mm_max_epi32(a, b);
        let e = _mm_setr_epi32(2, 4, 6, 8);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_max_epu32() {
        let a = _mm_setr_epi32(1, 4, 5, 8);
        let b = _mm_setr_epi32(2, 3, 6, 7);
        let r = _mm_max_epu32(a, b);
        let e = _mm_setr_epi32(2, 4, 6, 8);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epi8_1() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            1, 4, 5, 8, 9, 12, 13, 16,
            17, 20, 21, 24, 25, 28, 29, 32,
        );
        #[rustfmt::skip]
        let b = _mm_setr_epi8(
            2, 3, 6, 7, 10, 11, 14, 15,
            18, 19, 22, 23, 26, 27, 30, 31,
        );
        let r = _mm_min_epi8(a, b);
        #[rustfmt::skip]
        let e = _mm_setr_epi8(
            1, 3, 5, 7, 9, 11, 13, 15,
            17, 19, 21, 23, 25, 27, 29, 31,
        );
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epi8_2() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            1, -4, -5, 8, -9, -12, 13, -16,
            17, 20, 21, 24, 25, 28, 29, 32,
        );
        #[rustfmt::skip]
        let b = _mm_setr_epi8(
            2, -3, -6, 7, -10, -11, 14, -15,
            18, 19, 22, 23, 26, 27, 30, 31,
        );
        let r = _mm_min_epi8(a, b);
        #[rustfmt::skip]
        let e = _mm_setr_epi8(
            1, -4, -6, 7, -10, -12, 13, -16,
            17, 19, 21, 23, 25, 27, 29, 31,
        );
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epu16() {
        let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
        let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
        let r = _mm_min_epu16(a, b);
        let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epi32_1() {
        let a = _mm_setr_epi32(1, 4, 5, 8);
        let b = _mm_setr_epi32(2, 3, 6, 7);
        let r = _mm_min_epi32(a, b);
        let e = _mm_setr_epi32(1, 3, 5, 7);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epi32_2() {
        let a = _mm_setr_epi32(-1, 4, 5, -7);
        let b = _mm_setr_epi32(-2, 3, -6, 8);
        let r = _mm_min_epi32(a, b);
        let e = _mm_setr_epi32(-2, 3, -6, -7);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_min_epu32() {
        let a = _mm_setr_epi32(1, 4, 5, 8);
        let b = _mm_setr_epi32(2, 3, 6, 7);
        let r = _mm_min_epu32(a, b);
        let e = _mm_setr_epi32(1, 3, 5, 7);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_packus_epi32() {
        let a = _mm_setr_epi32(1, 2, 3, 4);
        let b = _mm_setr_epi32(-1, -2, -3, -4);
        let r = _mm_packus_epi32(a, b);
        let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cmpeq_epi64() {
        let a = _mm_setr_epi64x(0, 1);
        let b = _mm_setr_epi64x(0, 0);
        let r = _mm_cmpeq_epi64(a, b);
        let e = _mm_setr_epi64x(-1, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi8_epi16() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepi8_epi16(a);
        let e = _mm_set1_epi16(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi8(-10);
        let r = _mm_cvtepi8_epi16(a);
        let e = _mm_set1_epi16(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi8_epi32() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepi8_epi32(a);
        let e = _mm_set1_epi32(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi8(-10);
        let r = _mm_cvtepi8_epi32(a);
        let e = _mm_set1_epi32(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi8_epi64() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepi8_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi8(-10);
        let r = _mm_cvtepi8_epi64(a);
        let e = _mm_set1_epi64x(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi16_epi32() {
        let a = _mm_set1_epi16(10);
        let r = _mm_cvtepi16_epi32(a);
        let e = _mm_set1_epi32(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi16(-10);
        let r = _mm_cvtepi16_epi32(a);
        let e = _mm_set1_epi32(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi16_epi64() {
        let a = _mm_set1_epi16(10);
        let r = _mm_cvtepi16_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi16(-10);
        let r = _mm_cvtepi16_epi64(a);
        let e = _mm_set1_epi64x(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepi32_epi64() {
        let a = _mm_set1_epi32(10);
        let r = _mm_cvtepi32_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
        let a = _mm_set1_epi32(-10);
        let r = _mm_cvtepi32_epi64(a);
        let e = _mm_set1_epi64x(-10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu8_epi16() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepu8_epi16(a);
        let e = _mm_set1_epi16(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu8_epi32() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepu8_epi32(a);
        let e = _mm_set1_epi32(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu8_epi64() {
        let a = _mm_set1_epi8(10);
        let r = _mm_cvtepu8_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu16_epi32() {
        let a = _mm_set1_epi16(10);
        let r = _mm_cvtepu16_epi32(a);
        let e = _mm_set1_epi32(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu16_epi64() {
        let a = _mm_set1_epi16(10);
        let r = _mm_cvtepu16_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_cvtepu32_epi64() {
        let a = _mm_set1_epi32(10);
        let r = _mm_cvtepu32_epi64(a);
        let e = _mm_set1_epi64x(10);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_dp_pd() {
        let a = _mm_setr_pd(2.0, 3.0);
        let b = _mm_setr_pd(1.0, 4.0);
        let e = _mm_setr_pd(14.0, 0.0);
        assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_dp_ps() {
        let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
        let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
        let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
        assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_floor_pd() {
        let a = _mm_setr_pd(2.5, 4.5);
        let r = _mm_floor_pd(a);
        let e = _mm_setr_pd(2.0, 4.0);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_floor_ps() {
        let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
        let r = _mm_floor_ps(a);
        let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_floor_sd() {
        let a = _mm_setr_pd(2.5, 4.5);
        let b = _mm_setr_pd(-1.5, -3.5);
        let r = _mm_floor_sd(a, b);
        let e = _mm_setr_pd(-2.0, 4.5);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_floor_ss() {
        let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
        let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
        let r = _mm_floor_ss(a, b);
        let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_ceil_pd() {
        let a = _mm_setr_pd(1.5, 3.5);
        let r = _mm_ceil_pd(a);
        let e = _mm_setr_pd(2.0, 4.0);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_ceil_ps() {
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let r = _mm_ceil_ps(a);
        let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_ceil_sd() {
        let a = _mm_setr_pd(1.5, 3.5);
        let b = _mm_setr_pd(-2.5, -4.5);
        let r = _mm_ceil_sd(a, b);
        let e = _mm_setr_pd(-2.0, 3.5);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_ceil_ss() {
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
        let r = _mm_ceil_ss(a, b);
        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_round_pd() {
        let a = _mm_setr_pd(1.25, 3.75);
        let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
        let e = _mm_setr_pd(1.0, 4.0);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_round_ps() {
        let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
        let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
        let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_round_sd() {
        let a = _mm_setr_pd(1.5, 3.5);
        let b = _mm_setr_pd(-2.5, -4.5);
        let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
        let e = _mm_setr_pd(-2.0, 3.5);
        assert_eq_m128d(r, e);
        let a = _mm_setr_pd(1.5, 3.5);
        let b = _mm_setr_pd(-2.5, -4.5);
        let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
        let e = _mm_setr_pd(-3.0, 3.5);
        assert_eq_m128d(r, e);
        let a = _mm_setr_pd(1.5, 3.5);
        let b = _mm_setr_pd(-2.5, -4.5);
        let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
        let e = _mm_setr_pd(-2.0, 3.5);
        assert_eq_m128d(r, e);
        let a = _mm_setr_pd(1.5, 3.5);
        let b = _mm_setr_pd(-2.5, -4.5);
        let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
        let e = _mm_setr_pd(-2.0, 3.5);
        assert_eq_m128d(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_round_ss() {
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
        let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
        assert_eq_m128(r, e);
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
        let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
        assert_eq_m128(r, e);
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
        let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
        assert_eq_m128(r, e);
        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
        let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
        assert_eq_m128(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_minpos_epu16_1() {
        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
        let r = _mm_minpos_epu16(a);
        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_minpos_epu16_2() {
        let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
        let r = _mm_minpos_epu16(a);
        let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_minpos_epu16_3() {
        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
        let r = _mm_minpos_epu16(a);
        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_mul_epi32() {
        {
            let a = _mm_setr_epi32(1, 1, 1, 1);
            let b = _mm_setr_epi32(1, 2, 3, 4);
            let r = _mm_mul_epi32(a, b);
            let e = _mm_setr_epi64x(1, 3);
            assert_eq_m128i(r, e);
        }
        {
            let a = _mm_setr_epi32(15, 2 , 1234567, 4 );
            let b = _mm_setr_epi32(
                -20, -256, 666666, 666666, );
            let r = _mm_mul_epi32(a, b);
            let e = _mm_setr_epi64x(-300, 823043843622);
            assert_eq_m128i(r, e);
        }
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_mullo_epi32() {
        {
            let a = _mm_setr_epi32(1, 1, 1, 1);
            let b = _mm_setr_epi32(1, 2, 3, 4);
            let r = _mm_mullo_epi32(a, b);
            let e = _mm_setr_epi32(1, 2, 3, 4);
            assert_eq_m128i(r, e);
        }
        {
            let a = _mm_setr_epi32(15, -2, 1234567, 99999);
            let b = _mm_setr_epi32(-20, -256, 666666, -99999);
            let r = _mm_mullo_epi32(a, b);
            let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
            assert_eq_m128i(r, e);
        }
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_minpos_epu16() {
        let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
        let r = _mm_minpos_epu16(a);
        let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_mpsadbw_epu8() {
        #[rustfmt::skip]
        let a = _mm_setr_epi8(
            0, 1, 2, 3, 4, 5, 6, 7,
            8, 9, 10, 11, 12, 13, 14, 15,
        );
        let r = _mm_mpsadbw_epu8::<0b000>(a, a);
        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
        assert_eq_m128i(r, e);
        let r = _mm_mpsadbw_epu8::<0b001>(a, a);
        let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
        assert_eq_m128i(r, e);
        let r = _mm_mpsadbw_epu8::<0b100>(a, a);
        let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
        assert_eq_m128i(r, e);
        let r = _mm_mpsadbw_epu8::<0b101>(a, a);
        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
        assert_eq_m128i(r, e);
        let r = _mm_mpsadbw_epu8::<0b111>(a, a);
        let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
        assert_eq_m128i(r, e);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_testz_si128() {
        let a = _mm_set1_epi8(1);
        let mask = _mm_set1_epi8(0);
        let r = _mm_testz_si128(a, mask);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b110);
        let r = _mm_testz_si128(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(0b011);
        let mask = _mm_set1_epi8(0b100);
        let r = _mm_testz_si128(a, mask);
        assert_eq!(r, 1);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_testc_si128() {
        let a = _mm_set1_epi8(-1);
        let mask = _mm_set1_epi8(0);
        let r = _mm_testc_si128(a, mask);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b110);
        let r = _mm_testc_si128(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b100);
        let r = _mm_testc_si128(a, mask);
        assert_eq!(r, 1);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_testnzc_si128() {
        let a = _mm_set1_epi8(0);
        let mask = _mm_set1_epi8(1);
        let r = _mm_testnzc_si128(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(-1);
        let mask = _mm_set1_epi8(0);
        let r = _mm_testnzc_si128(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b110);
        let r = _mm_testnzc_si128(a, mask);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b101);
        let r = _mm_testnzc_si128(a, mask);
        assert_eq!(r, 0);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_test_all_zeros() {
        let a = _mm_set1_epi8(1);
        let mask = _mm_set1_epi8(0);
        let r = _mm_test_all_zeros(a, mask);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b110);
        let r = _mm_test_all_zeros(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(0b011);
        let mask = _mm_set1_epi8(0b100);
        let r = _mm_test_all_zeros(a, mask);
        assert_eq!(r, 1);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_test_all_ones() {
        let a = _mm_set1_epi8(-1);
        let r = _mm_test_all_ones(a);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let r = _mm_test_all_ones(a);
        assert_eq!(r, 0);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_test_mix_ones_zeros() {
        let a = _mm_set1_epi8(0);
        let mask = _mm_set1_epi8(1);
        let r = _mm_test_mix_ones_zeros(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(-1);
        let mask = _mm_set1_epi8(0);
        let r = _mm_test_mix_ones_zeros(a, mask);
        assert_eq!(r, 0);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b110);
        let r = _mm_test_mix_ones_zeros(a, mask);
        assert_eq!(r, 1);
        let a = _mm_set1_epi8(0b101);
        let mask = _mm_set1_epi8(0b101);
        let r = _mm_test_mix_ones_zeros(a, mask);
        assert_eq!(r, 0);
    }
    #[simd_test(enable = "sse4.1")]
    unsafe fn test_mm_stream_load_si128() {
        let a = _mm_set_epi64x(5, 6);
        let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
        assert_eq_m128i(a, r);
    }
}