use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
mem::{self, transmute},
ptr,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
let a = a.as_i32x16();
let zero: i32x16 = mem::zeroed();
let sub = simd_sub(zero, a);
let cmp: i32x16 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
let a = a.as_i64x8();
let zero: i64x8 = mem::zeroed();
let sub = simd_sub(zero, a);
let cmp: i64x8 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
let a = _mm512_set1_epi32(0x7FFFFFFF);
let b = transmute::<f32x16, __m512i>(v2.as_f32x16());
let abs = _mm512_and_epi32(a, b);
transmute(abs)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
let abs = _mm512_abs_ps(v2).as_f32x16();
transmute(simd_select_bitmask(k, abs, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF);
let b = transmute::<f64x8, __m512i>(v2.as_f64x8());
let abs = _mm512_and_epi64(a, b);
transmute(abs)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
let abs = _mm512_abs_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, abs, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let mov = a.as_i32x16();
transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
let mov = a.as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let mov = a.as_i64x8();
transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
let mov = a.as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov = a.as_f32x16();
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
let mov = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let mov = a.as_f64x8();
transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
let mov = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_add(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, add, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_add(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, add, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_add(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let add = _mm512_add_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, add, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let add = _mm512_add_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_add(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let add = _mm512_add_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, add, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let add = _mm512_add_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_sub(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_sub(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_sub_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_sub(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let sub = _mm512_sub_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let sub = _mm512_sub_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_sub(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let sub = _mm512_sub_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let sub = _mm512_sub_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epi32(a, b).as_i64x8();
transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epi32(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_mul(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_mask_mullo_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let mul = _mm512_mullo_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mullo_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_mullox_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let mul = _mm512_mullox_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epu32(a, b).as_u64x8();
transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epu32(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let mul = _mm512_mul_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let mul = _mm512_mul_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let mul = _mm512_mul_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let mul = _mm512_mul_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_div(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let div = _mm512_div_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, div, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let div = _mm512_div_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_div(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let div = _mm512_div_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, div, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let div = _mm512_div_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, max, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, max, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
transmute(vmaxps(
a.as_f32x16(),
b.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_max_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_max_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_max_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_max_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu32(a, b).as_u32x16();
transmute(simd_select_bitmask(k, max, src.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu32(a, b).as_u32x16();
let zero = _mm512_setzero_si512().as_u32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu64(a, b).as_u64x8();
transmute(simd_select_bitmask(k, max, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu64(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, max, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, max, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
transmute(vminps(
a.as_f32x16(),
b.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_min_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_min_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_min_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_min_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epu32(a, b).as_u32x16();
transmute(simd_select_bitmask(k, max, src.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epu32(a, b).as_u32x16();
let zero = _mm512_setzero_si512().as_u32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epu64(a, b).as_u64x8();
transmute(simd_select_bitmask(k, max, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_min_epu64(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
transmute(vfmadd132ps(
a.as_f32x16(),
b.as_f32x16(),
c.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
transmute(vfmadd132pd(
a.as_f64x8(),
b.as_f64x8(),
c.as_f64x8(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
transmute(vfmadd132ps(
a.as_f32x16(),
b.as_f32x16(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
transmute(vfmadd132pd(
a.as_f64x8(),
b.as_f64x8(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
transmute(vfmaddsub213ps(
a.as_f32x16(),
b.as_f32x16(),
c.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
transmute(vfmaddsub213pd(
a.as_f64x8(),
b.as_f64x8(),
c.as_f64x8(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
transmute(vfmaddsub213ps(
a.as_f32x16(),
b.as_f32x16(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
transmute(vfmaddsub213pd(
a.as_f64x8(),
b.as_f64x8(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
transmute(vfmadd132ps(
sub,
b.as_f32x16(),
c.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
transmute(vfmadd132pd(
sub,
b.as_f64x8(),
c.as_f64x8(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
transmute(vfmadd132ps(
suba,
b.as_f32x16(),
subc,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
transmute(vfmadd132pd(
suba,
b.as_f64x8(),
subc,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
transmute(vrcp14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vrcp14ps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
transmute(vrcp14pd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vrcp14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
transmute(vrsqrt14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vrsqrt14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
transmute(vrsqrt14pd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
src.as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_roundscale_ps(a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(
a,
$imm8,
zero,
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_roundscale_pd(a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_roundscale_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
src.as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmps(
a,
b,
c,
$imm8,
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fixupimm_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512i,
imm8: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmps(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fixupimm_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmpsz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fixupimm_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512i,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fixupimm_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_ternarylogic_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
let src = src.as_i32x16();
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_ternarylogic_epi32(
k: __mmask16,
a: __m512i,
b: __m512i,
c: __m512i,
imm8: i32,
) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
let b = b.as_i64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_ternarylogic_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
let src = src.as_i64x8();
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_ternarylogic_epi64(
k: __mmask8,
a: __m512i,
b: __m512i,
c: __m512i,
imm8: i32,
) -> __m512i {
let a = a.as_i64x8();
let b = b.as_i64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_getmant_ps(
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4,
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_getmant_ps(
src: __m512,
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4,
src.as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_getmant_ps(
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4,
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_getmant_pd(
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4,
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_getmant_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4,
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_getmant_pd(
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4,
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_add_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, addround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_add_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, addround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_add_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, addround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_add_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, addround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_sub_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, subround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_sub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, subround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_sub_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, subround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_sub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, subround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_mul_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, mulround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_mul_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mulround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_mul_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, mulround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_mul_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mulround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_div_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, divround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_div_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, divround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_div_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, divround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_div_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, divround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_sqrt_round_ps(a: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_sqrt_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, sqrtround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sqrtround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_sqrt_round_pd(a: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_sqrt_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a.as_f64x8(), $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, sqrtround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d, rounding: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a.as_f64x8(), $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sqrtround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmaddsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmaddsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmaddsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsubadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
macro_rules! call {
($imm4:expr) => {
vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
macro_rules! call {
($imm4:expr) => {
vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_max_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_max_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_min_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_min_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_getexp_round_ps(a: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vgetexpps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_getexp_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vgetexpps(a.as_f32x16(), src.as_f32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vgetexpps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_getexp_round_pd(a: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vgetexppd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_getexp_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vgetexppd(a.as_f64x8(), src.as_f64x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vgetexppd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_roundscale_round_ps(a: __m512, imm8: i32, sae: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_roundscale_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, src, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_roundscale_round_ps(
k: __mmask16,
a: __m512,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, zero, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_roundscale_round_pd(a: __m512d, imm8: i32, sae: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, zero, 0b11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_roundscale_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, src, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_roundscale_round_pd(
k: __mmask8,
a: __m512d,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, zero, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_scalef_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vscalefps(a.as_f32x16(), b.as_f32x16(), src.as_f32x16(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_scalef_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_scalef_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vscalefpd(a.as_f64x8(), b.as_f64x8(), src.as_f64x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_scalef_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
k,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_fixupimm_round_ps(
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmps(a, b, c, $imm8, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_mask_fixupimm_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmps(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_maskz_fixupimm_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpsz(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_fixupimm_round_pd(
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpd(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_mask_fixupimm_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpd(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_maskz_fixupimm_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpdz(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(1, 2, 3)]
pub unsafe fn _mm512_getmant_round_ps(
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4_1,
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm512_mask_getmant_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4_1,
src.as_f32x16(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm512_maskz_getmant_round_ps(
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(
a.as_f32x16(),
$imm2 << 2 | $imm4_1,
_mm512_setzero_ps().as_f32x16(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(1, 2, 3)]
pub unsafe fn _mm512_getmant_round_pd(
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4_1,
_mm512_setzero_pd().as_f64x8(),
0b11111111,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm512_mask_getmant_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4_1,
src.as_f64x8(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm512_maskz_getmant_round_pd(
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(
a.as_f64x8(),
$imm2 << 2 | $imm4_1,
_mm512_setzero_pd().as_f64x8(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
src.as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
src.as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
transmute(vcvtps2pd(
_mm512_castps512_ps256(v2).as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
transmute(vcvtps2pd(
_mm512_castps512_ps256(v2).as_f32x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
src.as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
let r: f32x8 = vcvtpd2ps(
v2.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
);
simd_shuffle16(
r,
_mm256_setzero_ps().as_f32x8(),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
let r: f32x8 = vcvtpd2ps(
v2.as_f64x8(),
_mm512_castps512_ps256(src).as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
);
simd_shuffle16(
r,
_mm256_setzero_ps().as_f32x8(),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
let a = a.as_i8x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
let a = a.as_i8x16();
let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<i64x8, _>(simd_cast(v64))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
let a = a.as_u8x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
let a = a.as_u8x16();
let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<i64x8, _>(simd_cast(v64))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
let a = a.as_i16x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
let a = a.as_i16x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
let a = a.as_u16x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
let a = a.as_u16x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
let a = a.as_i32x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
let a = a.as_u32x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
let a = a.as_i32x16();
transmute::<f32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepi32_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepi32_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
let a = a.as_i32x8();
transmute::<f64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepi32_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepi32_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
let a = a.as_u32x16();
transmute::<f32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepu32_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepu32_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
let a = a.as_u32x8();
transmute::<f64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepu32_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepu32_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
let v2 = v2.as_i32x16();
let v256: i32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<f64x8, _>(simd_cast(v256))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
let v2 = v2.as_u32x16();
let v256: u32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<f64x8, _>(simd_cast(v256))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
let a = a.as_i32x16();
transmute::<i16x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
let zero = _mm256_setzero_si256().as_i16x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
let a = a.as_i32x16();
transmute::<i8x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
let zero = _mm_setzero_si128().as_i8x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
let a = a.as_i64x8();
transmute::<i32x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
let a = a.as_i64x8();
transmute::<i16x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
let zero = _mm_setzero_si128().as_i16x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovqb(
a.as_i64x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
transmute(vpmovsdw(
a.as_i32x16(),
_mm256_setzero_si256().as_i16x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovsdw(
a.as_i32x16(),
_mm256_setzero_si256().as_i16x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
transmute(vpmovsdb(
a.as_i32x16(),
_mm_setzero_si128().as_i8x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
transmute(vpmovsqd(
a.as_i64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
transmute(vpmovsqw(
a.as_i64x8(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovsqb(
a.as_i64x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
transmute(vpmovusdw(
a.as_u32x16(),
_mm256_setzero_si256().as_u16x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovusdw(
a.as_u32x16(),
_mm256_setzero_si256().as_u16x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
transmute(vpmovusdb(
a.as_u32x16(),
_mm_setzero_si128().as_u8x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
transmute(vpmovusqd(
a.as_u64x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovusqd(
a.as_u64x8(),
_mm256_setzero_si256().as_u32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
transmute(vpmovusqw(
a.as_u64x8(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovusqb(
a.as_u64x8(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_epi32(
src: __m512i,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_epu32(
src: __m512i,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(a.as_f32x16(), src.as_u32x16(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(a.as_f32x16(), _mm512_setzero_si512().as_u32x16(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(
a.as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_pd(
src: __m512d,
k: __mmask8,
a: __m256,
sae: i32,
) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(a.as_f32x8(), src.as_f64x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d {
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(a.as_f32x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_epi32(
src: __m256i,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_epu32(
src: __m256i,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(a.as_f64x8(), src.as_u32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(a.as_f64x8(), _mm256_setzero_si256().as_u32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 {
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(
a.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_ps(
src: __m256,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256 {
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(a.as_f64x8(), src.as_f32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32) -> __m256 {
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(a.as_f64x8(), _mm256_setzero_ps().as_f32x8(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a.as_i32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundepi32_ps(
src: __m512,
k: __mmask16,
a: __m512i,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a.as_i32x16(), $imm4)
};
}
let r: f32x16 = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, r, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a.as_i32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a.as_u32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundepu32_ps(
src: __m512,
k: __mmask16,
a: __m512i,
rounding: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a.as_u32x16(), $imm4)
};
}
let r: f32x16 = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, r, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a.as_u32x16(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(
a.as_f32x16(),
$imm4,
_mm256_setzero_si256().as_i16x16(),
0b11111111_11111111,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_ph(
src: __m256i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a.as_f32x16(), $imm4, src.as_i16x16(), k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a.as_f32x16(), $imm4, _mm256_setzero_si256().as_i16x16(), k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(
a.as_f32x16(),
$imm4,
_mm256_setzero_si256().as_i16x16(),
0b11111111_11111111,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a.as_f32x16(), $imm4, src.as_i16x16(), k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a.as_f32x16(), $imm4, _mm256_setzero_si256().as_i16x16(), k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(
a.as_i16x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundph_ps(
src: __m512,
k: __mmask16,
a: __m256i,
sae: i32,
) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(a.as_i16x16(), src.as_f32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i, sae: i32) -> __m512 {
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(a.as_i16x16(), _mm512_setzero_ps().as_f32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
src.as_f32x16(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundps_epi32(
src: __m512i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
src: __m512i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundpd_epi32(
src: __m256i,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundpd_epu32(
src: __m256i,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
src.as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
src.as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
src.as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
src.as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_pd() -> __m512d {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_ps() -> __m512 {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero() -> __m512 {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_si512() -> __m512i {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_epi32() -> __m512i {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
let r = i32x16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi8(
e63: i8,
e62: i8,
e61: i8,
e60: i8,
e59: i8,
e58: i8,
e57: i8,
e56: i8,
e55: i8,
e54: i8,
e53: i8,
e52: i8,
e51: i8,
e50: i8,
e49: i8,
e48: i8,
e47: i8,
e46: i8,
e45: i8,
e44: i8,
e43: i8,
e42: i8,
e41: i8,
e40: i8,
e39: i8,
e38: i8,
e37: i8,
e36: i8,
e35: i8,
e34: i8,
e33: i8,
e32: i8,
e31: i8,
e30: i8,
e29: i8,
e28: i8,
e27: i8,
e26: i8,
e25: i8,
e24: i8,
e23: i8,
e22: i8,
e21: i8,
e20: i8,
e19: i8,
e18: i8,
e17: i8,
e16: i8,
e15: i8,
e14: i8,
e13: i8,
e12: i8,
e11: i8,
e10: i8,
e9: i8,
e8: i8,
e7: i8,
e6: i8,
e5: i8,
e4: i8,
e3: i8,
e2: i8,
e1: i8,
e0: i8,
) -> __m512i {
let r = i8x64(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
e56, e57, e58, e59, e60, e61, e62, e63,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi16(
e31: i16,
e30: i16,
e29: i16,
e28: i16,
e27: i16,
e26: i16,
e25: i16,
e24: i16,
e23: i16,
e22: i16,
e21: i16,
e20: i16,
e19: i16,
e18: i16,
e17: i16,
e16: i16,
e15: i16,
e14: i16,
e13: i16,
e12: i16,
e11: i16,
e10: i16,
e9: i16,
e8: i16,
e7: i16,
e6: i16,
e5: i16,
e4: i16,
e3: i16,
e2: i16,
e1: i16,
e0: i16,
) -> __m512i {
let r = i16x32(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
_mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
_mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
_mm512_set_pd(d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
_mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
_mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
_mm512_set_pd(a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d {
let zero = _mm512_setzero_pd().as_f64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vgatherdpd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_pd(
src: __m512d,
mask: __mmask8,
offsets: __m256i,
slice: *const u8,
scale: i32,
) -> __m512d {
let src = src.as_f64x8();
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vgatherdpd(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d {
let zero = _mm512_setzero_pd().as_f64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqpd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_pd(
src: __m512d,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512d {
let src = src.as_f64x8();
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqpd(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 {
let zero = _mm256_setzero_ps().as_f32x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqps(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_ps(
src: __m256,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m256 {
let src = src.as_f32x8();
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqps(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 {
let zero = _mm512_setzero_ps().as_f32x16();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vgatherdps(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_ps(
src: __m512,
mask: __mmask16,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512 {
let src = src.as_f32x16();
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vgatherdps(src, slice, offsets, mask as i16, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i32x16();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpgatherdd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_epi32(
src: __m512i,
mask: __mmask16,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i32x16();
let mask = mask as i16;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpgatherdd(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpgatherdq(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_epi64(
src: __m512i,
mask: __mmask8,
offsets: __m256i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpgatherdq(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqq(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_epi64(
src: __m512i,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqq(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i {
let zeros = _mm256_setzero_si256().as_i32x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqd(zeros, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_epi32(
src: __m256i,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m256i {
let src = src.as_i32x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqd(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) {
let src = src.as_f64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vscatterdpd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_pd(
slice: *mut u8,
mask: __mmask8,
offsets: __m256i,
src: __m512d,
scale: i32,
) {
let src = src.as_f64x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vscatterdpd(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) {
let src = src.as_f64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqpd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_pd(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m512d,
scale: i32,
) {
let src = src.as_f64x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqpd(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) {
let src = src.as_f32x16();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vscatterdps(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_ps(
slice: *mut u8,
mask: __mmask16,
offsets: __m512i,
src: __m512,
scale: i32,
) {
let src = src.as_f32x16();
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vscatterdps(slice, mask as i16, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) {
let src = src.as_f32x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqps(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_ps(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m256,
scale: i32,
) {
let src = src.as_f32x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqps(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) {
let src = src.as_i64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpscatterdq(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_epi64(
slice: *mut u8,
mask: __mmask8,
offsets: __m256i,
src: __m512i,
scale: i32,
) {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpscatterdq(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
let src = src.as_i64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqq(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_epi64(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m512i,
scale: i32,
) {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqq(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
let src = src.as_i32x16();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpscatterdd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_epi32(
slice: *mut u8,
mask: __mmask16,
offsets: __m512i,
src: __m512i,
scale: i32,
) {
let src = src.as_i32x16();
let mask = mask as i16;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpscatterdd(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) {
let src = src.as_i32x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_epi32(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m256i,
scale: i32,
) {
let src = src.as_i32x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqd(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(vpcompressd(
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
transmute(vpcompressq(
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vcompressps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vcompresspd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(vpexpandd(
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
transmute(vpexpandq(
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vexpandps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vexpandpd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_mask_sll_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sll_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sll_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_mask_srl_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_srl_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_srl_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_mask_sll_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sll_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sll_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_mask_srl_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_srl_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_srl_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_mask_sra_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sra_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sra_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_mask_sra_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sra_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sra_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_mask_srav_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srav_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srav_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_mask_srav_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srav_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srav_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_mask_rolv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let rol = _mm512_rolv_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_mask_rorv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let ror = _mm512_rorv_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_mask_sllv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_sllv_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_sllv_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_mask_srlv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srlv_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srlv_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_mask_sllv_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_sllv_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_sllv_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_mask_srlv_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srlv_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srlv_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permute_ps(a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm8:expr) => {
vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm8:expr) => {
vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
};
}
let permute = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm8:expr) => {
vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
};
}
let permute = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permute_pd(a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permutex_epi64(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permutex_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permutex_pd(a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
};
}
let permute = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_mask_permutevar_epi32(
src: __m512i,
k: __mmask16,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
transmute(vpermilps(a.as_f32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_mask_permutevar_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512i,
) -> __m512 {
let permute = _mm512_permutevar_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
let permute = _mm512_permutevar_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
transmute(vpermilpd(a.as_f64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_mask_permutevar_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512i,
) -> __m512d {
let permute = _mm512_permutevar_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
let permute = _mm512_permutevar_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_mask_permutexvar_epi32(
src: __m512i,
k: __mmask16,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermq(a.as_i64x8(), idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm512_mask_permutexvar_epi64(
src: __m512i,
k: __mmask8,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
transmute(vpermps(a.as_f32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_mask_permutexvar_ps(
src: __m512,
k: __mmask16,
idx: __m512i,
a: __m512,
) -> __m512 {
let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
transmute(vpermpd(a.as_f64x8(), idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_mask_permutexvar_pd(
src: __m512d,
k: __mmask8,
idx: __m512i,
a: __m512d,
) -> __m512d {
let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2d))]
pub unsafe fn _mm512_mask_permutex2var_epi32(
a: __m512i,
k: __mmask16,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_epi32(
k: __mmask16,
a: __m512i,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermi2d))]
pub unsafe fn _mm512_mask2_permutex2var_epi32(
a: __m512i,
idx: __m512i,
k: __mmask16,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2q))]
pub unsafe fn _mm512_mask_permutex2var_epi64(
a: __m512i,
k: __mmask8,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_epi64(
k: __mmask8,
a: __m512i,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermi2q))]
pub unsafe fn _mm512_mask2_permutex2var_epi64(
a: __m512i,
idx: __m512i,
k: __mmask8,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2ps))]
pub unsafe fn _mm512_mask_permutex2var_ps(
a: __m512,
k: __mmask16,
idx: __m512i,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_ps(
k: __mmask16,
a: __m512,
idx: __m512i,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_mask2_permutex2var_ps(
a: __m512,
idx: __m512i,
k: __mmask16,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2pd))]
pub unsafe fn _mm512_mask_permutex2var_pd(
a: __m512d,
k: __mmask8,
idx: __m512i,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_pd(
k: __mmask8,
a: __m512d,
idx: __m512i,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_mask2_permutex2var_pd(
a: __m512d,
idx: __m512i,
k: __mmask8,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
a,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let r: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_shuffle_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
imm8: _MM_PERM_ENUM,
) -> __m512i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
a,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let shuffle: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
a,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let shuffle: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle8 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle7 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 7) & 0x1 {
0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
_ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
}
};
}
macro_rules! shuffle6 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
match (imm8 >> 6) & 0x1 {
0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
_ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
}
};
}
macro_rules! shuffle5 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
match (imm8 >> 5) & 0x1 {
0 => shuffle6!($a, $b, $c, $d, $e, 12),
_ => shuffle6!($a, $b, $c, $d, $e, 13),
}
};
}
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
match (imm8 >> 4) & 0x1 {
0 => shuffle5!($a, $b, $c, $d, 4),
_ => shuffle5!($a, $b, $c, $d, 5),
}
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 10),
_ => shuffle4!($a, $b, $c, 11),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 8),
_ => shuffle2!($a, 9),
}
};
}
match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle8 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle7 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 7) & 0x1 {
0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
_ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
}
};
}
macro_rules! shuffle6 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
match (imm8 >> 6) & 0x1 {
0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
_ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
}
};
}
macro_rules! shuffle5 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
match (imm8 >> 5) & 0x1 {
0 => shuffle6!($a, $b, $c, $d, $e, 12),
_ => shuffle6!($a, $b, $c, $d, $e, 13),
}
};
}
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
match (imm8 >> 4) & 0x1 {
0 => shuffle5!($a, $b, $c, $d, 4),
_ => shuffle5!($a, $b, $c, $d, 5),
}
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 10),
_ => shuffle4!($a, $b, $c, 11),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 8),
_ => shuffle2!($a, 9),
}
};
}
let shuffle = match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
};
transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle8 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle7 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 7) & 0x1 {
0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
_ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
}
};
}
macro_rules! shuffle6 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
match (imm8 >> 6) & 0x1 {
0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
_ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
}
};
}
macro_rules! shuffle5 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
match (imm8 >> 5) & 0x1 {
0 => shuffle6!($a, $b, $c, $d, $e, 12),
_ => shuffle6!($a, $b, $c, $d, $e, 13),
}
};
}
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
match (imm8 >> 4) & 0x1 {
0 => shuffle5!($a, $b, $c, $d, 4),
_ => shuffle5!($a, $b, $c, $d, 5),
}
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 10),
_ => shuffle4!($a, $b, $c, 11),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 8),
_ => shuffle2!($a, 9),
}
};
}
let shuffle = match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
};
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let r: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_i32x4(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_i32x4(
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_i64x2(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
};
transmute(simd_select_bitmask(k, shuffle, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_i64x2(
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
};
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_f32x4(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_f64x2(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
};
transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_f64x2(
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
let shuffle = match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
};
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, shuffle, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extractf32x4_ps(a: __m512, imm8: i32) -> __m128 {
assert!(imm8 >= 0 && imm8 <= 3);
match imm8 & 0x3 {
0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
_ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extractf32x4_ps(
src: __m128,
k: __mmask8,
a: __m512,
imm8: i32,
) -> __m128 {
assert!(imm8 >= 0 && imm8 <= 3);
let extract: __m128 = match imm8 & 0x3 {
0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
_ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
};
transmute(simd_select_bitmask(k, extract.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512, imm8: i32) -> __m128 {
assert!(imm8 >= 0 && imm8 <= 3);
let extract: __m128 = match imm8 & 0x3 {
0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
_ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
};
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, extract.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 1);
match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti64x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extracti64x4_epi64(
src: __m256i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 1);
let extract = match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
};
transmute(simd_select_bitmask(k, extract, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti64x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 1);
let extract: __m256i = match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
};
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, extract.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extractf64x4_pd(a: __m512d, imm8: i32) -> __m256d {
assert!(imm8 >= 0 && imm8 <= 1);
match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extractf64x4_pd(
src: __m256d,
k: __mmask8,
a: __m512d,
imm8: i32,
) -> __m256d {
assert!(imm8 >= 0 && imm8 <= 1);
let extract = match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
};
transmute(simd_select_bitmask(k, extract, src))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m256d {
assert!(imm8 >= 0 && imm8 <= 1);
let extract = match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
};
let zero = _mm256_setzero_pd();
transmute(simd_select_bitmask(k, extract, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let undefined = _mm512_undefined_epi32().as_i32x16();
let extract: i32x4 = match imm8 & 0x3 {
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
_ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
};
transmute(extract)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 3)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extracti32x4_epi32(
src: __m128i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let undefined = _mm512_undefined_epi32().as_i32x16();
let extract: i32x4 = match imm8 & 0x3 {
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
_ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
};
transmute(simd_select_bitmask(k, extract, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 3)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let undefined = _mm512_undefined_epi32().as_i32x16();
let extract: i32x4 = match imm8 & 0x3 {
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
_ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
};
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, extract, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let b = _mm512_castsi128_si512(b).as_i32x16();
let ret: i32x16 = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_inserti32x4(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m128i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let b = _mm512_castsi128_si512(b).as_i32x16();
let insert: i32x16 = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
transmute(simd_select_bitmask(k, insert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_inserti32x4(k: __mmask16, a: __m512i, b: __m128i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let b = _mm512_castsi128_si512(b).as_i32x16();
let insert = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, insert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castsi256_si512(b);
match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_inserti64x4(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m256i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castsi256_si512(b);
let insert = match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
};
transmute(simd_select_bitmask(k, insert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_inserti64x4(k: __mmask8, a: __m512i, b: __m256i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castsi256_si512(b);
let insert = match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
};
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, insert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 3);
let b = _mm512_castps128_ps512(b);
match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_insertf32x4(
src: __m512,
k: __mmask16,
a: __m512,
b: __m128,
imm8: i32,
) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 3);
let b = _mm512_castps128_ps512(b);
let insert = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
transmute(simd_select_bitmask(k, insert, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_insertf32x4(k: __mmask16, a: __m512, b: __m128, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 3);
let b = _mm512_castps128_ps512(b);
let insert = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, insert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castpd256_pd512(b);
match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_insertf64x4(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m256d,
imm8: i32,
) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castpd256_pd512(b);
let insert = match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
};
transmute(simd_select_bitmask(k, insert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_insertf64x4(k: __mmask8, a: __m512d, b: __m256d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castpd256_pd512(b);
let insert = match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
};
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, insert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let r: i32x16 = simd_shuffle16(
a,
b,
[
2,
18,
3,
19,
2 + 4,
18 + 4,
3 + 4,
19 + 4,
2 + 8,
18 + 8,
3 + 8,
19 + 8,
2 + 12,
18 + 12,
3 + 12,
19 + 12,
],
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm512_mask_unpackhi_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm512_mask_unpackhi_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
simd_shuffle16(
a,
b,
[
2,
18,
3,
19,
2 + 4,
18 + 4,
3 + 4,
19 + 4,
2 + 8,
18 + 8,
3 + 8,
19 + 8,
2 + 12,
18 + 12,
3 + 12,
19 + 12,
],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_mask_unpackhi_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
) -> __m512d {
let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let r: i32x16 = simd_shuffle16(
a,
b,
[
0,
16,
1,
17,
0 + 4,
16 + 4,
1 + 4,
17 + 4,
0 + 8,
16 + 8,
1 + 8,
17 + 8,
0 + 12,
16 + 12,
1 + 12,
17 + 12,
],
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm512_mask_unpacklo_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpacklo_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpacklo_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm512_mask_unpacklo_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpacklo_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpacklo_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
simd_shuffle16(
a,
b,
[
0,
16,
1,
17,
0 + 4,
16 + 4,
1 + 4,
17 + 4,
0 + 8,
16 + 8,
1 + 8,
17 + 8,
0 + 12,
16 + 12,
1 + 12,
17 + 12,
],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpacklo_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpacklo_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_mask_unpacklo_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
) -> __m512d {
let unpackhi = _mm512_unpacklo_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let unpackhi = _mm512_unpacklo_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
simd_shuffle16(
a,
_mm_set1_ps(-1.),
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
simd_shuffle16(
a,
_mm256_set1_ps(-1.),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
simd_shuffle16(
a,
_mm_set1_ps(0.),
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
simd_shuffle16(
a,
_mm256_set1_ps(0.),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
transmute(a.as_m512())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
transmute(a.as_m512())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
simd_shuffle8(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
simd_shuffle8(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
simd_shuffle8(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
simd_shuffle8(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
simd_shuffle2(a, a, [0, 1])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
transmute(a.as_m512d())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
transmute(a.as_m512d())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
simd_shuffle8(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
simd_shuffle8(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
simd_shuffle8(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
simd_shuffle8(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
simd_shuffle2(a, a, [0, 1])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
transmute(a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
transmute(a)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast))]
pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
let a = _mm512_castsi128_si512(a).as_i32x16();
let ret: i32x16 = simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcas))]
pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
simd_shuffle8(a, a, [1, 1, 1, 1, 1, 1, 1, 1])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
let a = _mm512_castsi128_si512(a).as_i32x16();
let ret: i32x16 = simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x16();
let b = b.as_i32x16();
let imm8: i32 = imm8 % 16;
let r: i32x16 = match imm8 {
0 => simd_shuffle16(
a,
b,
[
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
],
),
1 => simd_shuffle16(
a,
b,
[
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
],
),
2 => simd_shuffle16(
a,
b,
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
),
3 => simd_shuffle16(
a,
b,
[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
),
4 => simd_shuffle16(
a,
b,
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
),
5 => simd_shuffle16(
a,
b,
[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
),
6 => simd_shuffle16(
a,
b,
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
),
7 => simd_shuffle16(
a,
b,
[23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
),
8 => simd_shuffle16(
a,
b,
[24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
),
9 => simd_shuffle16(
a,
b,
[25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
),
10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
_ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_alignr_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x16();
let b = b.as_i32x16();
let imm8: i32 = imm8 % 16;
let r: i32x16 = match imm8 {
0 => simd_shuffle16(
a,
b,
[
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
],
),
1 => simd_shuffle16(
a,
b,
[
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
],
),
2 => simd_shuffle16(
a,
b,
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
),
3 => simd_shuffle16(
a,
b,
[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
),
4 => simd_shuffle16(
a,
b,
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
),
5 => simd_shuffle16(
a,
b,
[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
),
6 => simd_shuffle16(
a,
b,
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
),
7 => simd_shuffle16(
a,
b,
[23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
),
8 => simd_shuffle16(
a,
b,
[24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
),
9 => simd_shuffle16(
a,
b,
[25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
),
10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
_ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
};
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_alignr_epi32(
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x16();
let b = b.as_i32x16();
let imm8: i32 = imm8 % 16;
let r: i32x16 = match imm8 {
0 => simd_shuffle16(
a,
b,
[
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
],
),
1 => simd_shuffle16(
a,
b,
[
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
],
),
2 => simd_shuffle16(
a,
b,
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
),
3 => simd_shuffle16(
a,
b,
[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
),
4 => simd_shuffle16(
a,
b,
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
),
5 => simd_shuffle16(
a,
b,
[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
),
6 => simd_shuffle16(
a,
b,
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
),
7 => simd_shuffle16(
a,
b,
[23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
),
8 => simd_shuffle16(
a,
b,
[24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
),
9 => simd_shuffle16(
a,
b,
[25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
),
10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
_ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
};
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 8;
let r: i64x8 = match imm8 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
_ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_alignr_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 8;
let r: i64x8 = match imm8 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
_ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
};
transmute(simd_select_bitmask(k, r, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_alignr_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 8;
let r: i64x8 = match imm8 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
_ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
};
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, and, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, and, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, or, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, or, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm512_mask_andnot_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_mask_andnot_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(and))]
pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a & b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(and))]
pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a & b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(or))]
pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(or))]
pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a ^ b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a ^ b)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
transmute(a ^ 0b11111111_11111111)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
transmute(a ^ 0b11111111_11111111)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(not))]
pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_kand(_mm512_knot(a), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(not))]
pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_kand(_mm512_knot(a), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_knot(_mm512_kxor(a, b))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_knot(_mm512_kxor(a, b))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
let r: u16 = a;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
let r: u16 = mask as u16;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
let r: i32 = k1 as i32;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
let a = a & 0b00000000_11111111;
let b = b & 0b11111111_00000000;
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(cmp))]
pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
let r = a | b;
if r == 0b11111111_11111111 {
1
} else {
0
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpneq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpneq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpneq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpneq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpeq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpeq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpeq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpeq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
intrinsics::nontemporal_store(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
intrinsics::nontemporal_store(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
intrinsics::nontemporal_store(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_ps(
e0: f32,
e1: f32,
e2: f32,
e3: f32,
e4: f32,
e5: f32,
e6: f32,
e7: f32,
e8: f32,
e9: f32,
e10: f32,
e11: f32,
e12: f32,
e13: f32,
e14: f32,
e15: f32,
) -> __m512 {
_mm512_setr_ps(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_ps(
e0: f32,
e1: f32,
e2: f32,
e3: f32,
e4: f32,
e5: f32,
e6: f32,
e7: f32,
e8: f32,
e9: f32,
e10: f32,
e11: f32,
e12: f32,
e13: f32,
e14: f32,
e15: f32,
) -> __m512 {
let r = f32x16::new(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
transmute(f64x8::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
transmute(f32x16::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
_mm512_setr_epi32(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
transmute(i8x64::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
transmute(i16x32::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
transmute(i32x16::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
let r = _mm512_set1_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
let r = _mm512_set1_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
transmute(i64x8::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
let r = _mm512_set1_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, r, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
let r = _mm512_set1_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
let r = i64x8::new(d, c, b, a, d, c, b, a);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
let r = i64x8::new(a, b, c, d, a, b, c, d);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, imm8: i32) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpps(
a.as_f32x16(),
b.as_f32x16(),
$imm5,
neg_one,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_ps_mask(k1: __mmask16, a: __m512, b: __m512, imm8: i32) -> __mmask16 {
macro_rules! call {
($imm5:expr) => {
vcmpps(
a.as_f32x16(),
b.as_f32x16(),
$imm5,
k1 as i16,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, imm8: i32, sae: i32) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_ps_mask(
m: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
sae: i32,
) -> __mmask16 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmppd(
a.as_f64x8(),
b.as_f64x8(),
$imm5,
neg_one,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_pd_mask(k1: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmppd(
a.as_f64x8(),
b.as_f64x8(),
$imm5,
k1 as i8,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_pd_mask(
k1: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, imm8: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_ss_mask(k1: __mmask8, a: __m128, b: __m128, imm8: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_ss_mask(
k1: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_sd_mask(k1: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_sd_mask(
k1: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmplt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpgt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmple_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpge_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpeq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpneq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm3:expr) => {
vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epu32_mask(
k1: __mmask16,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask16 {
macro_rules! call {
($imm3:expr) => {
vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, k1 as i16)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmplt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpgt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmple_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpge_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpeq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpneq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm3:expr) => {
vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epi32_mask(
k1: __mmask16,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask16 {
macro_rules! call {
($imm3:expr) => {
vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, k1 as i16)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmple_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpge_epu64_mask(b, a) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpneq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm3:expr) => {
vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epu64_mask(
k1: __mmask8,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
macro_rules! call {
($imm3:expr) => {
vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmple_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpge_epi64_mask(b, a) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpneq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm3:expr) => {
vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epi64_mask(
k1: __mmask8,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
macro_rules! call {
($imm3:expr) => {
vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
simd_reduce_add_unordered(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
simd_reduce_add_unordered(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
simd_reduce_add_unordered(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
simd_reduce_add_unordered(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
simd_reduce_mul_unordered(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_set1_epi32(1).as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
simd_reduce_mul_unordered(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(1).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
simd_reduce_mul_unordered(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_set1_ps(1.).as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
simd_reduce_mul_unordered(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_set1_pd(1.).as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
simd_reduce_max(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
simd_reduce_max(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
simd_reduce_max(a.as_u32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
simd_reduce_max(a.as_u64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
simd_reduce_max(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_undefined_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
simd_reduce_max(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_undefined_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
simd_reduce_min(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
simd_reduce_min(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
simd_reduce_min(a.as_u32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
simd_reduce_min(a.as_u64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
simd_reduce_min(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_undefined_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
simd_reduce_min(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_undefined_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
simd_reduce_and(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_set1_epi32(
1 << 0
| 1 << 1
| 1 << 2
| 1 << 3
| 1 << 4
| 1 << 5
| 1 << 6
| 1 << 7
| 1 << 8
| 1 << 9
| 1 << 10
| 1 << 11
| 1 << 12
| 1 << 13
| 1 << 14
| 1 << 15,
)
.as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
simd_reduce_and(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
.as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
simd_reduce_or(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_or(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
simd_reduce_or(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_or(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_pd() -> __m512d {
_mm512_set1_pd(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_ps() -> __m512 {
_mm512_set1_ps(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_epi32() -> __m512i {
_mm512_set1_epi32(0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined() -> __m512 {
_mm512_set1_ps(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_si512(mem_addr: *mut i32, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
ptr::read_unaligned(mem_addr as *const __m512d)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write_unaligned(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
ptr::read_unaligned(mem_addr as *const __m512)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
ptr::write_unaligned(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_si512(mem_addr: *mut i32, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
ptr::read(mem_addr as *const __m512)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
ptr::write(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
ptr::read(mem_addr as *const __m512d)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovss))]
pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut mov: f32 = extractsrc;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovss))]
pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut mov: f32 = 0.;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsd))]
pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut mov: f64 = extractsrc;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsd))]
pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut mov: f64 = 0.;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss))]
pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss))]
pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd))]
pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd))]
pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss))]
pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss))]
pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd))]
pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd))]
pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss))]
pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss))]
pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd))]
pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd))]
pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss))]
pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss))]
pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd))]
pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd))]
pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss))]
pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vmaxss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss))]
pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vmaxss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd))]
pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd))]
pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss))]
pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vminss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss))]
pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vminss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd))]
pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vminsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd))]
pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vminsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_getmant_ss(
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
_mm_setzero_ps().as_f32x4(),
0b1,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_getmant_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_getmant_ss(
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_getmant_sd(
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
_mm_setzero_pd().as_f64x2(),
0b1,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_getmant_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_getmant_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 255))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_roundscale_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_roundscale_ss(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 255))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_roundscale_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_roundscale_sd(k: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
transmute(vscalefss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vscalefss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vscalefss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc = -fmsub;
fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc = -fmsub;
fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc = -fnmsub;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc = -fnmsub;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vaddss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_add_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vaddss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vaddss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vaddsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_add_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vaddsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_add_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vaddsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsubss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sub_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsubss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsubss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsubsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sub_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsubsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsubsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmulss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_mul_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmulss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmulss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmulsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_mul_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmulsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_mul_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmulsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vdivss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_div_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vdivss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vdivss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vdivsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_div_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vdivsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_div_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vdivsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmaxss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_max_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmaxss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vmaxss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_max_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmaxsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vminss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_min_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vminss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vminss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vminsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_min_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vminsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vminsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sqrt_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsqrtss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sqrt_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsqrtsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sqrt_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_getexp_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vgetexpss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_getexp_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vgetexpsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm_getmant_round_ss(
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
_mm_setzero_ps().as_f32x4(),
0b1,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(4, 5, 6)]
pub unsafe fn _mm_mask_getmant_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
src.as_f32x4(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm_maskz_getmant_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(
a.as_f32x4(),
b.as_f32x4(),
$imm2 << 2 | $imm4_1,
_mm_setzero_ps().as_f32x4(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm_getmant_round_sd(
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
_mm_setzero_pd().as_f64x2(),
0b1,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(4, 5, 6)]
pub unsafe fn _mm_mask_getmant_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
src.as_f64x2(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm_maskz_getmant_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(
a.as_f64x2(),
b.as_f64x2(),
$imm2 << 2 | $imm4_1,
_mm_setzero_pd().as_f64x2(),
k,
$imm4_2,
)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_roundscale_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, zero, 0b11111111, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_roundscale_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, src, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_roundscale_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, zero, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_roundscale_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, zero, 0b11111111, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_roundscale_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, src, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_roundscale_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, zero, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vscalefss(a, b, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_scalef_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vscalefss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_scalef_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vscalefss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_scalef_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vscalefsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_scalef_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmadd_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(fmadd, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmadd_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmadd_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, fmadd, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmadd_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(fmadd, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmadd_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmadd_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, fmadd, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmsub_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(fmsub, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmsub_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmsub_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc = -fmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmsub_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(fmsub, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmsub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmsub_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc = -fmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmadd_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmadd_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmadd_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, fnmadd, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmadd_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmadd_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmadd_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, fnmadd, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmsub_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmsub_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmsub_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc = -fnmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmsub_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmsub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmsub_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc = -fnmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmss(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmss(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmssz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_fixupimm_round_ss(
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmss(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_fixupimm_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmss(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_maskz_fixupimm_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmssz(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_fixupimm_round_sd(
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsd(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_fixupimm_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsd(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_maskz_fixupimm_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsdz(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd))]
pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
transmute(vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd))]
pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
transmute(vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss))]
pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
transmute(vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
src.as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss))]
pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
transmute(vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
_mm_setzero_pd().as_f64x2(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_cvt_roundss_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128,
sae: i32,
) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(a.as_f64x2(), b.as_f32x4(), src.as_f64x2(), k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
_mm_setzero_pd().as_f64x2(),
k,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
_mm_setzero_ps().as_f32x4(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_cvt_roundsd_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128d,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(a.as_f32x4(), b.as_f64x2(), src.as_f32x4(), k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_cvt_roundsd_ss(
k: __mmask8,
a: __m128,
b: __m128d,
rounding: i32,
) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
_mm_setzero_ps().as_f32x4(),
k,
$imm4,
)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_u32(a: __m128, rounding: i32) -> u32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2usi(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d, rounding: i32) -> u32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2usi(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtsi2ss(a.as_f32x4(), b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtsi2ss(a.as_f32x4(), b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
vcvtusi2ss(a.as_f32x4(), b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 {
macro_rules! call {
($imm4:expr) => {
vcvtss2usi(a.as_f32x4(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 {
macro_rules! call {
($imm4:expr) => {
vcvtsd2usi(a.as_f64x2(), $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> i32 {
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vcomiss(a.as_f32x4(), b.as_f32x4(), $imm8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_comi_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> i32 {
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vcomisd(a.as_f64x2(), b.as_f64x2(), $imm8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.pmul.dq.512"]
fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
#[link_name = "llvm.x86.avx512.pmulu.dq.512"]
fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
#[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
fn vpmaxuq(a: u64x8, b: u64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
fn vpminud(a: u32x16, b: u32x16) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
fn vpminuq(a: u64x8, b: u64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.add.ps.512"]
fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.add.pd.512"]
fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.sub.ps.512"]
fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.sub.pd.512"]
fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mul.ps.512"]
fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mul.pd.512"]
fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.div.ps.512"]
fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.div.pd.512"]
fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.max.ps.512"]
fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.max.pd.512"]
fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.min.ps.512"]
fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.min.pd.512"]
fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.pternlog.d.512"]
fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, sae: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.pternlog.q.512"]
fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, sae: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.rcp14.ps.512"]
fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.rcp14.pd.512"]
fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
fn vcvttps2udq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.gather.dpd.512"]
fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.gather.dps.512"]
fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.gather.qpd.512"]
fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.gather.qps.512"]
fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
#[link_name = "llvm.x86.avx512.gather.dpq.512"]
fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.gather.dpi.512"]
fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.gather.qpq.512"]
fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.gather.qpi.512"]
fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.scatter.dpd.512"]
fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dps.512"]
fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpd.512"]
fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qps.512"]
fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpq.512"]
fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
#[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
#[link_name = "llvm.x86.avx512.mask.prol.d.512"]
fn vprold(a: i32x16, i8: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pror.d.512"]
fn vprord(a: i32x16, i8: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prol.q.512"]
fn vprolq(a: i64x8, i8: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pror.q.512"]
fn vprorq(a: i64x8, i8: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psllv.d.512"]
fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrlv.d.512"]
fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psllv.q.512"]
fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psrlv.q.512"]
fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.pslli.d.512"]
fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.psrli.d.512"]
fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.pslli.q.512"]
fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psrli.q.512"]
fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psll.d.512"]
fn vpslld(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psrl.d.512"]
fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psll.q.512"]
fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psrl.q.512"]
fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psra.d.512"]
fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psra.q.512"]
fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psrai.d.512"]
fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.psrai.q.512"]
fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psrav.d.512"]
fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrav.q.512"]
fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.permvar.si.512"]
fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.permvar.di.512"]
fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.permvar.sf.512"]
fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.permvar.df.512"]
fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.compress.d.512"]
fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.compress.q.512"]
fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.expand.q.512"]
fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.add.ss.round"]
fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.add.sd.round"]
fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.div.ss.round"]
fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.div.sd.round"]
fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.max.ss.round"]
fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.max.sd.round"]
fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.min.ss.round"]
fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getexp.sd"]
fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.getmant.ss"]
fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getmant.sd"]
fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.rsqrt14.ss"]
fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rsqrt14.sd"]
fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.rcp14.ss"]
fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rcp14.sd"]
fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.scalef.ss"]
fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.scalef.sd"]
fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.vfmadd.f32"]
fn vfmadd132ss(a: f32, b: f32, c: f32, rounding: i32) -> f32;
#[link_name = "llvm.x86.avx512.vfmadd.f64"]
fn vfmadd132sd(a: f64, b: f64, c: f64, rounding: i32) -> f64;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
fn vcvtss2sd(a: f64x2, a: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.vcvtss2si32"]
fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcvtss2si64"]
fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
#[link_name = "llvm.x86.avx512.vcvtss2usi64"]
fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64;
#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcvtsd2si64"]
fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64;
#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
#[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64;
#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtsi2ss64"]
fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtsi2sd64"]
fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.cvtusi2ss"]
fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtusi642ss"]
fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtusi642sd"]
fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.vcomi.ss"]
fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcomi.sd"]
fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
use crate::hint::black_box;
use crate::mem::{self};
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_abs_epi32(a);
let e = _mm512_setr_epi32(
0,
1,
1,
i32::MAX,
i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
1,
i32::MAX,
i32::MAX.wrapping_add(1),
100,
100,
32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_mask_abs_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
i32::MAX,
i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_maskz_abs_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
i32::MAX,
i32::MAX.wrapping_add(1),
100,
100,
32,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let r = _mm512_abs_ps(a);
let e = _mm512_setr_ps(
0.,
1.,
1.,
f32::MAX,
f32::MAX,
100.,
100.,
32.,
0.,
1.,
1.,
f32::MAX,
f32::MAX,
100.,
100.,
32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let r = _mm512_mask_abs_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
let e = _mm512_setr_ps(
0.,
1.,
1.,
f32::MAX,
f32::MAX,
100.,
100.,
32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mov_epi32() {
let src = _mm512_set1_epi32(1);
let a = _mm512_set1_epi32(2);
let r = _mm512_mask_mov_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mov_epi32() {
let a = _mm512_set1_epi32(2);
let r = _mm512_maskz_mov_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mov_ps() {
let src = _mm512_set1_ps(1.);
let a = _mm512_set1_ps(2.);
let r = _mm512_mask_mov_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mov_ps() {
let a = _mm512_set1_ps(2.);
let r = _mm512_maskz_mov_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_add_epi32(a, b);
let e = _mm512_setr_epi32(
1,
2,
0,
i32::MIN,
i32::MIN + 1,
101,
-99,
-31,
1,
2,
0,
i32::MIN,
i32::MIN + 1,
101,
-99,
-31,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_add_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(
1,
2,
0,
i32::MIN,
i32::MIN + 1,
101,
-99,
-31,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_add_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(
1,
2,
0,
i32::MIN,
i32::MIN + 1,
101,
-99,
-31,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_add_ps(a, b);
let e = _mm512_setr_ps(
1.,
2.,
0.,
f32::MAX,
f32::MIN + 1.,
101.,
-99.,
-31.,
1.,
2.,
0.,
f32::MAX,
f32::MIN + 1.,
101.,
-99.,
-31.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_add_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
1.,
2.,
0.,
f32::MAX,
f32::MIN + 1.,
101.,
-99.,
-31.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_add_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
1.,
2.,
0.,
f32::MAX,
f32::MIN + 1.,
101.,
-99.,
-31.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_sub_epi32(a, b);
let e = _mm512_setr_epi32(
-1,
0,
-2,
i32::MAX - 1,
i32::MAX,
99,
-101,
-33,
-1,
0,
-2,
i32::MAX - 1,
i32::MAX,
99,
-101,
-33,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_sub_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(
-1,
0,
-2,
i32::MAX - 1,
i32::MAX,
99,
-101,
-33,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_sub_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(
-1,
0,
-2,
i32::MAX - 1,
i32::MAX,
99,
-101,
-33,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_sub_ps(a, b);
let e = _mm512_setr_ps(
-1.,
0.,
-2.,
f32::MAX - 1.,
f32::MIN,
99.,
-101.,
-33.,
-1.,
0.,
-2.,
f32::MAX - 1.,
f32::MIN,
99.,
-101.,
-33.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_sub_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
-1.,
0.,
-2.,
f32::MAX - 1.,
f32::MIN,
99.,
-101.,
-33.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_sub_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
-1.,
0.,
-2.,
f32::MAX - 1.,
f32::MIN,
99.,
-101.,
-33.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mullo_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_mullo_epi32(a, b);
let e = _mm512_setr_epi32(
0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mullo_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_mask_mullo_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(
0,
2,
-2,
-2,
0,
200,
-200,
-64,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mullo_epi32() {
let a = _mm512_setr_epi32(
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
0,
1,
-1,
i32::MAX,
i32::MIN,
100,
-100,
-32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_maskz_mullo_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mul_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_mul_ps(a, b);
let e = _mm512_setr_ps(
0.,
2.,
-2.,
f32::INFINITY,
f32::NEG_INFINITY,
200.,
-200.,
-64.,
0.,
2.,
-2.,
f32::INFINITY,
f32::NEG_INFINITY,
200.,
-200.,
-64.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mul_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_mask_mul_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0.,
2.,
-2.,
f32::INFINITY,
f32::NEG_INFINITY,
200.,
-200.,
-64.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mul_ps() {
let a = _mm512_setr_ps(
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
0.,
1.,
-1.,
f32::MAX,
f32::MIN,
100.,
-100.,
-32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_maskz_mul_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0.,
2.,
-2.,
f32::INFINITY,
f32::NEG_INFINITY,
200.,
-200.,
-64.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_div_ps(a, b);
let e = _mm512_setr_ps(
0.,
0.5,
-0.5,
-1.,
50.,
f32::INFINITY,
-50.,
-16.,
0.,
0.5,
-0.5,
500.,
f32::NEG_INFINITY,
50.,
-50.,
-16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_mask_div_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0.,
0.5,
-0.5,
-1.,
50.,
f32::INFINITY,
-50.,
-16.,
0.,
1.,
-1.,
1000.,
-131.,
100.,
-100.,
-32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_maskz_div_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0.,
0.5,
-0.5,
-1.,
50.,
f32::INFINITY,
-50.,
-16.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_max_epi32(a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_max_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_max_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_max_ps(a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_max_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_max_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_max_epu32(a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_max_epu32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_max_epu32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_min_epi32(a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_min_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_min_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_min_ps(a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_min_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_min_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_min_epu32(a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_min_epu32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_min_epu32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_sqrt_ps(a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_mask_sqrt_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_maskz_sqrt_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fmadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fmadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fmsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fmsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmaddsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmaddsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmaddsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmaddsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmaddsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsubadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmsubadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsubadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsubadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsubadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fnmadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fnmsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_rcp14_ps(a);
let e = _mm512_set1_ps(0.33333206);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_rcp14_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
0.33333206, 0.33333206, 0.33333206, 0.33333206,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_rcp14_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
0.33333206, 0.33333206, 0.33333206, 0.33333206,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_rsqrt14_ps(a);
let e = _mm512_set1_ps(0.5773392);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_rsqrt14_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
0.5773392, 0.5773392, 0.5773392,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_rsqrt14_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
0.5773392, 0.5773392, 0.5773392,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_getexp_ps(a);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_getexp_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_getexp_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_roundscale_ps(a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_mask_roundscale_ps(a, 0, a, 0);
let e = _mm512_set1_ps(1.1);
assert_eq_m512(r, e);
let r = _mm512_mask_roundscale_ps(a, 0b11111111_11111111, a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_maskz_roundscale_ps(0, a, 0);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_roundscale_ps(0b11111111_11111111, a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_scalef_ps(a, b);
let e = _mm512_set1_ps(8.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_mask_scalef_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_maskz_scalef_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fixupimm_ps() {
let a = _mm512_set1_ps(f32::NAN);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_fixupimm_ps(a, b, c, 5);
let e = _mm512_set1_ps(0.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fixupimm_ps() {
let a = _mm512_set_ps(
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_mask_fixupimm_ps(a, 0b11111111_00000000, b, c, 5);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fixupimm_ps() {
let a = _mm512_set_ps(
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_maskz_fixupimm_ps(0b11111111_00000000, a, b, c, 5);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_ternarylogic_epi32() {
let a = _mm512_set1_epi32(1 << 2);
let b = _mm512_set1_epi32(1 << 1);
let c = _mm512_set1_epi32(1 << 0);
let r = _mm512_ternarylogic_epi32(a, b, c, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_ternarylogic_epi32() {
let src = _mm512_set1_epi32(1 << 2);
let a = _mm512_set1_epi32(1 << 1);
let b = _mm512_set1_epi32(1 << 0);
let r = _mm512_mask_ternarylogic_epi32(src, 0, a, b, 8);
assert_eq_m512i(r, src);
let r = _mm512_mask_ternarylogic_epi32(src, 0b11111111_11111111, a, b, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_ternarylogic_epi32() {
let a = _mm512_set1_epi32(1 << 2);
let b = _mm512_set1_epi32(1 << 1);
let c = _mm512_set1_epi32(1 << 0);
let r = _mm512_maskz_ternarylogic_epi32(0, a, b, c, 9);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_ternarylogic_epi32(0b11111111_11111111, a, b, c, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
let e = _mm512_set1_ps(1.25);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m512(r, a);
let r = _mm512_mask_getmant_ps(
a,
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
);
let e = _mm512_setr_ps(
10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m512(r, _mm512_setzero_ps());
let r =
_mm512_maskz_getmant_ps(0b11111111_00000000, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1.,
0.5,
1.,
2.5,
3.,
4.5,
5.,
6.5,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_mask_add_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_add_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_maskz_add_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_add_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1.,
0.5,
1.,
2.5,
3.,
4.5,
5.,
6.5,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_sub_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_sub_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_sub_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sub_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
7.,
8.5,
9.,
10.5,
11.,
12.5,
13.,
-0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mul_round_ps() {
let a = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
8.,
9.5,
10.,
11.5,
12.,
13.5,
14.,
0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
0.,
0.15,
0.2,
0.35,
0.4,
0.55,
0.6,
0.75,
0.8,
0.95,
1.0,
1.15,
1.2,
1.35,
1.4,
0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
0.,
0.14999999,
0.2,
0.35,
0.4,
0.54999995,
0.59999996,
0.75,
0.8,
0.95,
1.0,
1.15,
1.1999999,
1.3499999,
1.4,
0.000000000000000000000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mul_round_ps() {
let a = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
8.,
9.5,
10.,
11.5,
12.,
13.5,
14.,
0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_mask_mul_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_mul_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
0.8,
0.95,
1.0,
1.15,
1.2,
1.35,
1.4,
0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mul_round_ps() {
let a = _mm512_setr_ps(
0.,
1.5,
2.,
3.5,
4.,
5.5,
6.,
7.5,
8.,
9.5,
10.,
11.5,
12.,
13.5,
14.,
0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_maskz_mul_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mul_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.8,
0.95,
1.0,
1.15,
1.2,
1.35,
1.4,
0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.33333334);
assert_eq_m512(r, e);
let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.3333333);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_mask_div_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_div_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
0.33333334, 0.33333334, 0.33333334, 0.33333334,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_maskz_div_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_div_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
0.33333334, 0.33333334, 0.33333334, 0.33333334,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.7320508);
assert_eq_m512(r, e);
let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.7320509);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_sqrt_round_ps(a, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_sqrt_round_ps(
a,
0b11111111_00000000,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
1.7320508, 1.7320508, 1.7320508,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_sqrt_round_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sqrt_round_ps(
0b11111111_00000000,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
1.7320508, 1.7320508, 1.7320508,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask_fmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fmadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask3_fmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask_fmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_maskz_fmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask3_fmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
-0.99999994,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
-0.9999999, 1., -0.9999999, 1., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask_fmaddsub_round_ps(
a,
0,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, a);
let r = _mm512_mask_fmaddsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_maskz_fmaddsub_round_ps(
0,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmaddsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask3_fmaddsub_round_ps(
a,
b,
c,
0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmaddsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
);
assert_eq_m512(r, e);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
-0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask_fmsubadd_round_ps(
a,
0,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsubadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_maskz_fmsubadd_round_ps(
0,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsubadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask3_fmsubadd_round_ps(
a,
b,
c,
0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsubadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-0.99999994,
1.0000001,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
-1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask_fnmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_maskz_fnmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask3_fnmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask_fnmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_maskz_fnmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask3_fnmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_max_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_max_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_max_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_max_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_max_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_min_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_min_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_min_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_min_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_min_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_getexp_round_ps(a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
let r = _mm512_getexp_round_ps(a, _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_getexp_round_ps(a, 0, a, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_getexp_round_ps(a, 0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_getexp_round_ps(0, a, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getexp_round_ps(0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_roundscale_round_ps(a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_mask_roundscale_round_ps(a, 0, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.1);
assert_eq_m512(r, e);
let r =
_mm512_mask_roundscale_round_ps(a, 0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_maskz_roundscale_round_ps(0, a, 0, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r =
_mm512_maskz_roundscale_round_ps(0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_scalef_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(8.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r =
_mm512_mask_scalef_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_scalef_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r =
_mm512_maskz_scalef_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_scalef_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fixupimm_round_ps() {
let a = _mm512_set1_ps(f32::NAN);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_fixupimm_round_ps(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(0.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fixupimm_round_ps() {
let a = _mm512_set_ps(
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_mask_fixupimm_round_ps(
a,
0b11111111_00000000,
b,
c,
5,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fixupimm_round_ps() {
let a = _mm512_set_ps(
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
f32::NAN,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_maskz_fixupimm_round_ps(
0b11111111_00000000,
a,
b,
c,
5,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_getmant_round_ps(
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set1_ps(1.25);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_mask_getmant_round_ps(
a,
0,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
assert_eq_m512(r, a);
let r = _mm512_mask_getmant_round_ps(
a,
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_setr_ps(
10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_maskz_getmant_round_ps(
0,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getmant_round_ps(
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtps_epi32(a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtps_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtps_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtps_epu32(a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtps_epu32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtps_epu32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi8_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi8_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu8_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu8_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi16_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi16_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu16_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu16_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_ps(a);
let e = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_ps(-1.);
let r = _mm512_mask_cvtepi32_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
let e = _mm512_set_ps(
-1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu32_ps(a);
let e = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_ps(-1.);
let r = _mm512_mask_cvtepu32_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
let e = _mm512_set_ps(
-1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu32_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_epi16(a);
let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_epi8(a);
let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let r = _mm512_cvtsepi32_epi16(a);
let e = _mm256_set_epi16(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i16::MIN,
i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
let e = _mm256_set_epi16(
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
8,
9,
10,
11,
12,
13,
i16::MIN,
i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
let e = _mm256_set_epi16(
0,
0,
0,
0,
0,
0,
0,
0,
8,
9,
10,
11,
12,
13,
i16::MIN,
i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let r = _mm512_cvtsepi32_epi8(a);
let e = _mm_set_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i8::MIN,
i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
let e = _mm_set_epi8(
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
8,
9,
10,
11,
12,
13,
i8::MIN,
i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
let e = _mm_set_epi8(
0,
0,
0,
0,
0,
0,
0,
0,
8,
9,
10,
11,
12,
13,
i8::MIN,
i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let r = _mm512_cvtusepi32_epi16(a);
let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let r = _mm512_cvtusepi32_epi8(a);
let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
let a = _mm512_set_epi32(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
i32::MIN,
i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r =
_mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvt_roundps_epi32(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvt_roundps_epi32(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r =
_mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvt_roundps_epu32(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvt_roundps_epu32(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_cvt_roundepi32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let src = _mm512_set1_ps(0.);
let r =
_mm512_mask_cvt_roundepi32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundepi32_ps(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_maskz_cvt_roundepi32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundepi32_ps(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
0.,
4294967300.,
2.,
4294967300.,
4.,
4294967300.,
6.,
4294967300.,
8.,
10.,
10.,
12.,
12.,
14.,
14.,
16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let src = _mm512_set1_ps(0.);
let r =
_mm512_mask_cvt_roundepu32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundepu32_ps(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
4294967300.,
2.,
4294967300.,
4.,
4294967300.,
6.,
4294967300.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_maskz_cvt_roundepu32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundepu32_ps(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.,
4294967300.,
2.,
4294967300.,
4.,
4294967300.,
6.,
4294967300.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_cvt_roundps_ph(a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let src = _mm256_set1_epi16(0);
let r = _mm512_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvt_roundps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_cvtps_ph(a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let src = _mm256_set1_epi16(0);
let r = _mm512_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_cvt_roundph_ps(a, _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let src = _mm512_set1_ps(0.);
let r = _mm512_mask_cvt_roundph_ps(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundph_ps(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_maskz_cvt_roundph_ps(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_cvtph_ps(a);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let src = _mm512_set1_ps(0.);
let r = _mm512_mask_cvtph_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_maskz_cvtph_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvttps_epi32(a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvttps_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvttps_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvttps_epu32(a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvttps_epu32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvttps_epu32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32gather_ps() {
let mut arr = [0f32; 256];
for i in 0..256 {
arr[i] = i as f32;
}
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
120., 128., 136., 144., 152., 160., 168., 176.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32gather_ps() {
let mut arr = [0f32; 256];
for i in 0..256 {
arr[i] = i as f32;
}
let src = _mm512_set1_ps(2.);
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
2., 128., 2., 144., 2., 160., 2., 176.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32gather_epi32() {
let mut arr = [0i32; 256];
for i in 0..256 {
arr[i] = i as i32;
}
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32gather_epi32() {
let mut arr = [0i32; 256];
for i in 0..256 {
arr[i] = i as i32;
}
let src = _mm512_set1_epi32(2);
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
2, 144, 2, 176, 2, 208, 2, 240));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32scatter_ps() {
let mut arr = [0f32; 256];
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
_mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
let mut expected = [0f32; 256];
for i in 0..16 {
expected[i * 16] = (i + 1) as f32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32scatter_ps() {
let mut arr = [0f32; 256];
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
_mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
let mut expected = [0f32; 256];
for i in 0..8 {
expected[i * 32 + 16] = 2. * (i + 1) as f32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32scatter_epi32() {
let mut arr = [0i32; 256];
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
_mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
let mut expected = [0i32; 256];
for i in 0..16 {
expected[i * 16] = (i + 1) as i32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32scatter_epi32() {
let mut arr = [0i32; 256];
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
_mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
let mut expected = [0i32; 256];
for i in 0..8 {
expected[i * 32 + 16] = 2 * (i + 1) as i32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmplt_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmpnle_ps_mask(b, a);
assert_eq!(m, 0b00001101_00001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpeq_ps_mask(b, a);
assert_eq!(m, 0b11001101_11001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
assert_eq!(r, 0b01001000_01001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpneq_ps_mask(b, a);
assert_eq!(m, 0b00110010_00110010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpord_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b11000011_11000011;
let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
assert_eq!(m, 0b00000001_00000001);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpunord_ps_mask(a, b);
assert_eq!(m, 0b11111010_11111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b00001111_00001111;
let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
assert_eq!(m, 0b000001010_00001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmplt_epu32_mask(a, b);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmpgt_epu32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmple_epu32_mask(a, b),
!_mm512_cmpgt_epu32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(
_mm512_mask_cmple_epu32_mask(mask, a, b),
0b01111010_01111010
);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpge_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmpge_epu32_mask(a, b),
!_mm512_cmplt_epu32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpge_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpeq_epu32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpneq_epu32_mask(b, a);
assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmplt_epi32_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmpgt_epi32_mask(b, a);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmple_epi32_mask(a, b),
!_mm512_cmpgt_epi32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpge_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmpge_epi32_mask(a, b),
!_mm512_cmplt_epi32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpge_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(
_mm512_mask_cmpge_epi32_mask(mask, a, b),
0b01111010_01111010
);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpeq_epi32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpneq_epi32_mask(b, a);
assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi8() {
let r = _mm512_set1_epi8(2);
assert_eq_m512i(
r,
_mm512_set_epi8(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi16() {
let r = _mm512_set1_epi16(2);
assert_eq_m512i(
r,
_mm512_set_epi16(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi32() {
let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(
r,
_mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_epi32() {
let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(
r,
_mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi8() {
let r = _mm512_set_epi8(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2,
);
assert_eq_m512i(r, _mm512_set1_epi8(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi16() {
let r = _mm512_set_epi16(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2,
);
assert_eq_m512i(r, _mm512_set1_epi16(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi32() {
let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, _mm512_set1_epi32(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_si512() {
assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_epi32() {
assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_ps() {
let r = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(
r,
_mm512_set_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_ps() {
let r = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(
r,
_mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_ps() {
#[rustfmt::skip]
let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
2., 2., 2., 2., 2., 2., 2., 2.);
assert_eq_m512(expected, _mm512_set1_ps(2.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set4_epi32() {
let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set4_ps() {
let r = _mm512_set_ps(
4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
);
assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr4_epi32() {
let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr4_ps() {
let r = _mm512_set_ps(
4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
);
assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_ps() {
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero() {
assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_pd() {
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
let p = a.as_ptr();
let r = _mm512_loadu_pd(black_box(p));
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_pd() {
let a = _mm512_set1_pd(9.);
let mut r = _mm512_undefined_pd();
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
assert_eq_m512d(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_ps() {
let a = &[
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
];
let p = a.as_ptr();
let r = _mm512_loadu_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_pd() {
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_pd() {
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rol_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_rol_epi32(a, 1);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rol_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_rol_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rol_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let r = _mm512_maskz_rol_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_ror_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_ror_epi32(a, 1);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_ror_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_mask_ror_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_ror_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
let r = _mm512_maskz_ror_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_slli_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_slli_epi32(a, 1);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_slli_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_slli_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_slli_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let r = _mm512_maskz_slli_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srli_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_srli_epi32(a, 1);
let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srli_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_mask_srli_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srli_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
let r = _mm512_maskz_srli_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_rolv_epi32(a, b);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_rolv_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_rolv_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rorv_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_rorv_epi32(a, b);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rorv_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_rorv_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rorv_epi32() {
let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_rorv_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_sllv_epi32(a, count);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_sllv_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_sllv_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srlv_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_srlv_epi32(a, count);
let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srlv_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_srlv_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srlv_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_srlv_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sll_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_sll_epi32(a, count);
let e = _mm512_set_epi32(
0,
1 << 2,
1 << 3,
1 << 4,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sll_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_sll_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(
0,
1 << 2,
1 << 3,
1 << 4,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sll_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 31,
);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_sll_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srl_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_srl_epi32(a, count);
let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srl_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_srl_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srl_epi32() {
let a = _mm512_set_epi32(
1 << 31,
1 << 0,
1 << 1,
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 31,
);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_srl_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let count = _mm_set_epi32(1, 0, 0, 2);
let r = _mm512_sra_epi32(a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_sra_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_sra_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
let r = _mm512_srav_epi32(a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let r = _mm512_mask_srav_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
let r = _mm512_maskz_srav_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
let r = _mm512_srai_epi32(a, 2);
let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
let r = _mm512_mask_srai_epi32(a, 0, a, 2);
assert_eq_m512i(r, a);
let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
let r = _mm512_maskz_srai_epi32(0, a, 2);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permute_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_permute_ps(a, 1);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permute_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_permute_ps(a, 0b00000000_00000000, a, 1);
assert_eq_m512(r, a);
let r = _mm512_mask_permute_ps(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permute_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_permute_ps(0, a, 1);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permute_ps(0b00000000_11111111, a, 1);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutevar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_permutevar_epi32(idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutevar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_permutevar_ps(a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_permutevar_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_permutevar_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_permutexvar_epi32(idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_permutexvar_ps(idx, a);
let e = _mm512_set1_ps(14.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
assert_eq_m512(r, a);
let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_ps(14.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_permutexvar_ps(0, idx, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_permutex2var_epi32(a, idx, b);
let e = _mm512_set_epi32(
14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
let e = _mm512_set_epi32(
14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let idx = _mm512_set_epi32(
1000,
1 << 4,
2000,
1 << 4,
3000,
1 << 4,
4000,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
assert_eq_m512i(r, idx);
let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
let e = _mm512_set_epi32(
1000,
1 << 4,
2000,
1 << 4,
3000,
1 << 4,
4000,
1 << 4,
10,
100,
9,
100,
8,
100,
7,
100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_permutex2var_ps(a, idx, b);
let e = _mm512_set_ps(
14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
assert_eq_m512(r, a);
let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
let e = _mm512_set_ps(
14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let idx = _mm512_set_epi32(
1,
1 << 4,
2,
1 << 4,
3,
1 << 4,
4,
1 << 4,
5,
1 << 4,
6,
1 << 4,
7,
1 << 4,
8,
1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_mask2_permutex2var_ps(a, idx, 0b00000000_11111111, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
assert_eq_m512i(r, a);
let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_shuffle_ps(a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_mask_shuffle_ps(a, 0, a, b, 0x0F);
assert_eq_m512(r, a);
let r = _mm512_mask_shuffle_ps(a, 0b11111111_11111111, a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_maskz_shuffle_ps(0, a, b, 0x0F);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_shuffle_ps(0b00000000_11111111, a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_shuffle_i32x4(a, b, 0b00000000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b00000000);
assert_eq_m512i(r, a);
let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b00000000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b00000000);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b00000000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_shuffle_f32x4(a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000);
assert_eq_m512(r, a);
let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_extractf32x4_ps(a, 0x1);
let e = _mm_setr_ps(5., 6., 7., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let src = _mm_set1_ps(100.);
let r = _mm512_mask_extractf32x4_ps(src, 0, a, 0x1);
assert_eq_m128(r, src);
let r = _mm512_mask_extractf32x4_ps(src, 0b11111111, a, 0x1);
let e = _mm_setr_ps(5., 6., 7., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_extractf32x4_ps(0, a, 0x1);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm512_maskz_extractf32x4_ps(0b00000001, a, 0x1);
let e = _mm_setr_ps(5., 0., 0., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_extracti32x4_epi32(a, 0x1);
let e = _mm_setr_epi32(5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm_set1_epi32(100);
let r = _mm512_mask_extracti32x4_epi32(src, 0, a, 0x1);
assert_eq_m128i(r, src);
let r = _mm512_mask_extracti32x4_epi32(src, 0b11111111, a, 0x1);
let e = _mm_setr_epi32(5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_extracti32x4_epi32(0, a, 0x1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_extracti32x4_epi32(0b00000001, a, 0x1);
let e = _mm_setr_epi32(5, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_moveldup_ps(a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_mask_moveldup_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_moveldup_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_movehdup_ps(a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_mask_movehdup_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_movehdup_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_inserti32x4(a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_mask_inserti32x4(a, 0, a, b, 0);
assert_eq_m512i(r, a);
let r = _mm512_mask_inserti32x4(a, 0b11111111_11111111, a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_maskz_inserti32x4(0, a, b, 0);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_inserti32x4(0b00000000_11111111, a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_insertf32x4(a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_mask_insertf32x4(a, 0, a, b, 0);
assert_eq_m512(r, a);
let r = _mm512_mask_insertf32x4(a, 0b11111111_11111111, a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_maskz_insertf32x4(0, a, b, 0);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_insertf32x4(0b00000000_11111111, a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps128_ps512() {
let a = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_castps128_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps256_ps512() {
let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm512_castps256_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_zextps128_ps512() {
let a = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_zextps128_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_zextps256_ps512() {
let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm512_zextps256_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps512_ps128() {
let a = _mm512_setr_ps(
17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
);
let r = _mm512_castps512_ps128(a);
let e = _mm_setr_ps(17., 18., 19., 20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps512_ps256() {
let a = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
);
let r = _mm512_castps512_ps256(a);
let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps_pd() {
let a = _mm512_set1_ps(1.);
let r = _mm512_castps_pd(a);
let e = _mm512_set1_pd(0.007812501848093234);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps_si512() {
let a = _mm512_set1_ps(1.);
let r = _mm512_castps_si512(a);
let e = _mm512_set1_epi32(1065353216);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_broadcastd_epi32(a);
let e = _mm512_set1_epi32(20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcastd_epi32() {
let src = _mm512_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_mask_broadcastd_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
let e = _mm512_set1_epi32(20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_maskz_broadcastd_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_broadcastss_ps(a);
let e = _mm512_set1_ps(20.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcastss_ps() {
let src = _mm512_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_mask_broadcastss_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
let e = _mm512_set1_ps(20.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_maskz_broadcastss_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_broadcast_i32x4(a);
let e = _mm512_set_epi32(
17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcast_i32x4() {
let src = _mm512_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_mask_broadcast_i32x4(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
let e = _mm512_set_epi32(
17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_maskz_broadcast_i32x4(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_broadcast_f32x4(a);
let e = _mm512_set_ps(
17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcast_f32x4() {
let src = _mm512_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_mask_broadcast_f32x4(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
let e = _mm512_set_ps(
17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_maskz_broadcast_f32x4(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_blend_epi32() {
let a = _mm512_set1_epi32(1);
let b = _mm512_set1_epi32(2);
let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_blend_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(2.);
let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_unpackhi_epi32(a, b);
let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_maskz_unpackhi_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_unpackhi_ps(a, b);
let e = _mm512_set_ps(
17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_maskz_unpackhi_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_unpacklo_epi32(a, b);
let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_maskz_unpacklo_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_unpacklo_ps(a, b);
let e = _mm512_set_ps(
19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_maskz_unpacklo_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_alignr_epi32(a, b, 0);
assert_eq_m512i(r, b);
let r = _mm512_alignr_epi32(a, b, 16);
assert_eq_m512i(r, b);
let r = _mm512_alignr_epi32(a, b, 1);
let e = _mm512_set_epi32(
1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_mask_alignr_epi32(a, 0, a, b, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_alignr_epi32(a, 0b11111111_11111111, a, b, 1);
let e = _mm512_set_epi32(
1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_maskz_alignr_epi32(0, a, b, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_alignr_epi32(0b00000000_11111111, a, b, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_and_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_and_epi32(a, b);
let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_and_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_mask_and_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
let e = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_and_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_maskz_and_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_and_si512() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_and_epi32(a, b);
let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_or_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_or_epi32(a, b);
let e = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_or_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_mask_or_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_or_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_maskz_or_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_or_si512() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_or_epi32(a, b);
let e = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_xor_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_xor_epi32(a, b);
let e = _mm512_set_epi32(
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_xor_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_mask_xor_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
let e = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_xor_epi32() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_maskz_xor_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_xor_si512() {
let a = _mm512_set_epi32(
1 << 1 | 1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 3,
);
let b = _mm512_set_epi32(
1 << 1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
);
let r = _mm512_xor_epi32(a, b);
let e = _mm512_set_epi32(
1 << 2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_andnot_epi32() {
let a = _mm512_set1_epi32(0);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_andnot_epi32(a, b);
let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_andnot_epi32() {
let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_mask_andnot_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_andnot_epi32() {
let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_maskz_andnot_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(
0,
0,
0,
0,
0,
0,
0,
0,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kand() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b11001100_00110011;
let r = _mm512_kand(a, b);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kand_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b11001100_00110011;
let r = _kand_mask16(a, b);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kor(a, b);
let e: u16 = 0b11101110_00111011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kor_mask16(a, b);
let e: u16 = 0b11101110_00111011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kxor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kxor(a, b);
let e: u16 = 0b11100010_00111000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kxor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kxor_mask16(a, b);
let e: u16 = 0b11100010_00111000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_knot() {
let a: u16 = 0b11001100_00110011;
let r = _mm512_knot(a);
let e: u16 = 0b00110011_11001100;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_knot_mask16() {
let a: u16 = 0b11001100_00110011;
let r = _knot_mask16(a);
let e: u16 = 0b00110011_11001100;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kandn() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kandn(a, b);
let e: u16 = 0b00100010_00001000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kandn_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kandn_mask16(a, b);
let e: u16 = 0b00100010_00001000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kxnor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kxnor(a, b);
let e: u16 = 0b00011101_11000111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kxnor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kxnor_mask16(a, b);
let e: u16 = 0b00011101_11000111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kmov() {
let a: u16 = 0b11001100_00110011;
let r = _mm512_kmov(a);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_int2mask() {
let a: i32 = 0b11001100_00110011;
let r = _mm512_int2mask(a);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2int() {
let k1: __mmask16 = 0b11001100_00110011;
let r = _mm512_mask2int(k1);
let e: i32 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kunpackb() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kunpackb(a, b);
let e: u16 = 0b00101110_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kortestc() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kortestc(a, b);
assert_eq!(r, 0);
let b: u16 = 0b11111111_11111111;
let r = _mm512_kortestc(a, b);
assert_eq!(r, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_test_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_test_epi32_mask(a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_test_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_testn_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_testn_epi32_mask(a, b);
let e: __mmask16 = 0b00000000_00000000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_testn_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 1);
let r = _mm512_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_stream_ps() {
#[repr(align(32))]
struct Memory {
pub data: [f32; 16],
}
let a = _mm512_set1_ps(7.0);
let mut mem = Memory { data: [-1.0; 16] };
_mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..16 {
assert_eq!(mem.data[i], get_m512(a, i));
}
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_add_epi32() {
let a = _mm512_set1_epi32(1);
let e: i32 = _mm512_reduce_add_epi32(a);
assert_eq!(16, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_add_epi32() {
let a = _mm512_set1_epi32(1);
let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
assert_eq!(8, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_add_ps() {
let a = _mm512_set1_ps(1.);
let e: f32 = _mm512_reduce_add_ps(a);
assert_eq!(16., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_add_ps() {
let a = _mm512_set1_ps(1.);
let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
assert_eq!(8., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_mul_epi32() {
let a = _mm512_set1_epi32(2);
let e: i32 = _mm512_reduce_mul_epi32(a);
assert_eq!(65536, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_mul_epi32() {
let a = _mm512_set1_epi32(2);
let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
assert_eq!(256, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_mul_ps() {
let a = _mm512_set1_ps(2.);
let e: f32 = _mm512_reduce_mul_ps(a);
assert_eq!(65536., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_mul_ps() {
let a = _mm512_set1_ps(2.);
let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
assert_eq!(256., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_reduce_max_epi32(a);
assert_eq!(15, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
assert_eq!(7, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_reduce_max_epu32(a);
assert_eq!(15, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
assert_eq!(7, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_reduce_max_ps(a);
assert_eq!(15., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
assert_eq!(7., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_reduce_min_epi32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_reduce_min_epu32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_reduce_min_ps(a);
assert_eq!(0., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
assert_eq!(0., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_and_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_reduce_and_epi32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_and_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
assert_eq!(1, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_or_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_reduce_or_epi32(a);
assert_eq!(3, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_or_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_mask_reduce_or_epi32(0b11111111_00000000, a);
assert_eq!(1, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_compress_epi32() {
let src = _mm512_set1_epi32(200);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
let e = _mm512_set_epi32(
200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_compress_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_compress_ps() {
let src = _mm512_set1_ps(200.);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
let e = _mm512_set_ps(
200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_compress_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_expand_epi32() {
let src = _mm512_set1_epi32(200);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
let e = _mm512_set_epi32(
200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_expand_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_expand_ps() {
let src = _mm512_set1_ps(200.);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
let e = _mm512_set_ps(
200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_expand_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
let e = _mm512_set_ps(
0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_epi32() {
let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
let p = a.as_ptr();
let r = _mm512_loadu_epi32(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_epi32() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_si512() {
let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
let p = a.as_ptr();
let r = _mm512_loadu_si512(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_si512() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_si512(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_si512() {
#[repr(align(64))]
struct Align {
data: [i32; 16],
}
let a = Align {
data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
};
let p = (a.data).as_ptr();
let r = _mm512_load_si512(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_si512() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_store_si512(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_epi32() {
#[repr(align(64))]
struct Align {
data: [i32; 16],
}
let a = Align {
data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
};
let p = (a.data).as_ptr();
let r = _mm512_load_epi32(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_epi32() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_ps() {
#[repr(align(64))]
struct Align {
data: [f32; 16],
}
let a = Align {
data: [
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
],
};
let p = (a.data).as_ptr();
let r = _mm512_load_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_store_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_set1_epi32() {
let src = _mm512_set1_epi32(2);
let a: i32 = 11;
let r = _mm512_mask_set1_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
let e = _mm512_set1_epi32(11);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm512_maskz_set1_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
let e = _mm512_set1_epi32(11);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_move_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_move_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_move_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 40.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_move_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_move_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_move_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 40.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_move_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_move_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_move_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 4.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_move_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_move_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_move_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 4.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_add_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_add_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_add_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_add_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_add_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_add_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_add_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_add_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_sub_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_sub_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sub_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sub_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sub_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sub_sd(0b11111111, a, b);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_mul_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_mul_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_mul_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_mul_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_mul_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_mul_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_div_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_div_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_div_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_div_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_div_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_div_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_div_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_div_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_max_ss(a, 0, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_max_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_max_ss(0, a, b);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_max_ss(0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_max_sd(a, 0, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_max_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_max_sd(0, a, b);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_max_sd(0b11111111, a, b);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_min_ss(a, 0, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_min_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_min_ss(0, a, b);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_min_ss(0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_min_sd(a, 0, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_min_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_min_sd(0, a, b);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_min_sd(0b11111111, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_sqrt_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_sqrt_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sqrt_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sqrt_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rsqrt14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_rsqrt14_ss(a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rsqrt14_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rsqrt14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_rsqrt14_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rsqrt14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_rsqrt14_sd(a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rsqrt14_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rsqrt14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_rsqrt14_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rcp14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_rcp14_ss(a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rcp14_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_rcp14_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rcp14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_rcp14_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rcp14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_rcp14_sd(a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rcp14_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_rcp14_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rcp14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_rcp14_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_getexp_ss(a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_getexp_ss(a, 0, a, b);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_getexp_ss(0, a, b);
let e = _mm_set_ps(2., 2., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getexp_ss(0b11111111, a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_getexp_sd(a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_getexp_sd(a, 0, a, b);
let e = _mm_set_pd(2., 2.);
assert_eq_m128d(r, e);
let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_getexp_sd(0, a, b);
let e = _mm_set_pd(2., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getexp_sd(0b11111111, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_getmant_ss(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_mask_getmant_ss(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 20.);
assert_eq_m128(r, e);
let r = _mm_mask_getmant_ss(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_maskz_getmant_ss(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getmant_ss(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_getmant_sd(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_mask_getmant_sd(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 20.);
assert_eq_m128d(r, e);
let r = _mm_mask_getmant_sd(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_maskz_getmant_sd(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getmant_sd(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_roundscale_ss(a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_mask_roundscale_ss(a, 0, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
assert_eq_m128(r, e);
let r = _mm_mask_roundscale_ss(a, 0b11111111, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_maskz_roundscale_ss(0, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_roundscale_ss(0b11111111, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_roundscale_sd(a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_mask_roundscale_sd(a, 0, a, b, 0);
let e = _mm_set_pd(2.2, 2.2);
assert_eq_m128d(r, e);
let r = _mm_mask_roundscale_sd(a, 0b11111111, a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_maskz_roundscale_sd(0, a, b, 0);
let e = _mm_set_pd(2.2, 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_roundscale_sd(0b11111111, a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_scalef_ss(a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_scalef_ss(a, 0, a, b);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_scalef_ss(0, a, b);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_scalef_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_scalef_sd(a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_scalef_sd(a, 0, a, b);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_scalef_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_scalef_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmadd_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmadd_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmadd_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmadd_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmadd_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmadd_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmsub_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmsub_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmsub_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmsub_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmsub_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmsub_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmadd_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fnmadd_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmadd_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fnmadd_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmsub_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fnmsub_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmsub_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fnmsub_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_add_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_add_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_add_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_add_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_add_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_add_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_add_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_add_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_add_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_add_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_add_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_add_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sub_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_sub_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_sub_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sub_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_sub_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sub_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sub_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_sub_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sub_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sub_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sub_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sub_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mul_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mul_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_mul_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_mul_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_mul_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_mul_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mul_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mul_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_mul_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_mul_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_mul_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_mul_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_div_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_div_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_div_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_div_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_div_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_div_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_div_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_div_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_div_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_div_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_div_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_div_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_max_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_max_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_max_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_max_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_max_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_max_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_max_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_max_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_max_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_max_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_min_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_min_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_min_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_min_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_min_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_min_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_min_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_min_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_min_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_min_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sqrt_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_sqrt_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_sqrt_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sqrt_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_sqrt_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sqrt_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sqrt_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_sqrt_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sqrt_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sqrt_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sqrt_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sqrt_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_getexp_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_getexp_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
let r = _mm_mask_getexp_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_getexp_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getexp_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_getexp_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_getexp_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 2.);
assert_eq_m128d(r, e);
let r = _mm_mask_getexp_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_getexp_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getexp_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_getmant_round_ss(
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_mask_getmant_round_ss(
a,
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 20.);
assert_eq_m128(r, e);
let r = _mm_mask_getmant_round_ss(
a,
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_maskz_getmant_round_ss(
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getmant_round_ss(
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_getmant_round_sd(
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_mask_getmant_round_sd(
a,
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 20.);
assert_eq_m128d(r, e);
let r = _mm_mask_getmant_round_sd(
a,
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_maskz_getmant_round_sd(
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getmant_round_sd(
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_roundscale_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_mask_roundscale_round_ss(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
assert_eq_m128(r, e);
let r = _mm_mask_roundscale_round_ss(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_maskz_roundscale_round_ss(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_roundscale_round_ss(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_roundscale_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_mask_roundscale_round_sd(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 2.2);
assert_eq_m128d(r, e);
let r = _mm_mask_roundscale_round_sd(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_maskz_roundscale_round_sd(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_roundscale_round_sd(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_scalef_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_scalef_round_ss(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
let r = _mm_mask_scalef_round_ss(
a,
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_scalef_round_ss(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_scalef_round_ss(
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_scalef_round_sd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_scalef_round_sd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_scalef_round_sd(
a,
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_scalef_round_sd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_scalef_round_sd(
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fmadd_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmadd_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fmadd_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fmadd_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmadd_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmadd_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fmsub_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmsub_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsub_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fmsub_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmsub_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmsub_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fnmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fnmadd_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_maskz_fnmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmadd_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_mask3_fnmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmadd_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fnmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmadd_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_maskz_fnmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmadd_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_mask3_fnmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmadd_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fnmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fnmsub_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_maskz_fnmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmsub_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_mask3_fnmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmsub_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fnmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmsub_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_maskz_fnmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmsub_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_mask3_fnmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmsub_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_fixupimm_ss(a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_mask_fixupimm_ss(a, 0b11111111, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_maskz_fixupimm_ss(0b00000000, a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_fixupimm_ss(0b11111111, a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_fixupimm_sd(a, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_mask_fixupimm_sd(a, 0b11111111, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_maskz_fixupimm_sd(0b00000000, a, b, c, 5);
let e = _mm_set_pd(0., 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_fixupimm_sd(0b11111111, a, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_fixupimm_round_ss(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_mask_fixupimm_round_ss(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_maskz_fixupimm_round_ss(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_fixupimm_round_ss(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_fixupimm_round_sd(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_mask_fixupimm_round_sd(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_maskz_fixupimm_round_sd(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_fixupimm_round_sd(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvtss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_mask_cvtss_sd(a, 0, a, b);
assert_eq_m128d(r, a);
let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvtss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_maskz_cvtss_sd(0, a, b);
let e = _mm_set_pd(6., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvtsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_mask_cvtsd_ss(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvtsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_maskz_cvtsd_ss(0, a, b);
let e = _mm_set_ps(0., -0.5, 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_mask_cvt_roundss_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m128d(r, a);
let r = _mm_mask_cvt_roundss_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_maskz_cvt_roundss_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_cvt_roundss_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_cvt_roundsd_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_mask_cvt_roundsd_ss(a, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r =
_mm_mask_cvt_roundsd_ss(a, 0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_maskz_cvt_roundsd_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_cvt_roundsd_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_si32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtss_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtss_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_si32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtsd_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtsd_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundi32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvt_roundi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsi32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvt_roundsi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundu32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: u32 = 9;
let r = _mm_cvt_roundu32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvti32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvti32_ss(a, b);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvti32_sd() {
let a = _mm_set_pd(1., -1.5);
let b: i32 = 9;
let r = _mm_cvti32_sd(a, b);
let e = _mm_set_pd(1., 9.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_si32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_si32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_i32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_u32(a, _MM_FROUND_CUR_DIRECTION);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvttss_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvttss_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_si32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_si32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_i32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_u32(a, _MM_FROUND_CUR_DIRECTION);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvttsd_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvttsd_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: u32 = 9;
let r = _mm_cvtu32_ss(a, b);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu32_sd() {
let a = _mm_set_pd(1., -1.5);
let b: u32 = 9;
let r = _mm_cvtu32_sd(a, b);
let e = _mm_set_pd(1., 9.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu64_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: u64 = 9;
let r = _mm_cvtu64_ss(a, b);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu64_sd() {
let a = _mm_set_pd(1., -1.5);
let b: u64 = 9;
let r = _mm_cvtu64_sd(a, b);
let e = _mm_set_pd(1., 9.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_comi_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_comi_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e: i32 = 0;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_comi_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_comi_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e: i32 = 0;
assert_eq!(r, e);
}
}