Rollup merge of #126417 - beetrees:f16-f128-inline-asm-x86, r=Amanieu

Add `f16` and `f128` inline ASM support for `x86` and `x86-64`

This PR adds `f16` and `f128` input and output support to inline ASM on `x86` and `x86-64`. `f16` vector sizes are taken from [here](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html).

Relevant issue: #125398
Tracking issue: #116909

``@rustbot`` label +F-f16_and_f128
This commit is contained in:
Matthias Krüger 2024-06-15 14:40:48 +02:00 committed by GitHub
commit 0f2cc21547
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 350 additions and 42 deletions

View File

@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => {
let value = bx.insert_element(
bx.const_undef(bx.type_vector(bx.type_f16(), 8)),
value,
bx.const_usize(0),
);
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),
@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
bx.bitcast(value, bx.type_f128())
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => {
let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));
bx.extract_element(value, bx.const_usize(0))
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),
@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
Abi::Vector { .. },
) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if cx.sess().asm_arch == Some(InlineAsmArch::X86)
&& s.primitive() == Primitive::Float(Float::F128) =>
{
cx.type_vector(cx.type_i32(), 4)
}
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),
(
InlineAsmRegClass::X86(
X86InlineAsmRegClass::xmm_reg
| X86InlineAsmRegClass::ymm_reg
| X86InlineAsmRegClass::zmm_reg,
),
Abi::Vector { element, count: count @ (8 | 16) },
) if element.primitive() == Primitive::Float(Float::F16) => {
cx.type_vector(cx.type_i16(), count)
}
(
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
Abi::Scalar(s),

View File

@ -62,8 +62,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> {
ty::Int(IntTy::I64) | ty::Uint(UintTy::U64) => Some(InlineAsmType::I64),
ty::Int(IntTy::I128) | ty::Uint(UintTy::U128) => Some(InlineAsmType::I128),
ty::Int(IntTy::Isize) | ty::Uint(UintTy::Usize) => Some(asm_ty_isize),
ty::Float(FloatTy::F16) => Some(InlineAsmType::F16),
ty::Float(FloatTy::F32) => Some(InlineAsmType::F32),
ty::Float(FloatTy::F64) => Some(InlineAsmType::F64),
ty::Float(FloatTy::F128) => Some(InlineAsmType::F128),
ty::FnPtr(_) => Some(asm_ty_isize),
ty::RawPtr(ty, _) if self.is_thin_ptr_ty(ty) => Some(asm_ty_isize),
ty::Adt(adt, args) if adt.repr().simd() => {
@ -105,8 +107,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> {
width => bug!("unsupported pointer width: {width}"),
})
}
ty::Float(FloatTy::F16) => Some(InlineAsmType::VecF16(size)),
ty::Float(FloatTy::F32) => Some(InlineAsmType::VecF32(size)),
ty::Float(FloatTy::F64) => Some(InlineAsmType::VecF64(size)),
ty::Float(FloatTy::F128) => Some(InlineAsmType::VecF128(size)),
_ => None,
}
}

View File

@ -707,15 +707,19 @@ pub enum InlineAsmType {
I32,
I64,
I128,
F16,
F32,
F64,
F128,
VecI8(u64),
VecI16(u64),
VecI32(u64),
VecI64(u64),
VecI128(u64),
VecF16(u64),
VecF32(u64),
VecF64(u64),
VecF128(u64),
}
impl InlineAsmType {
@ -730,15 +734,19 @@ impl InlineAsmType {
Self::I32 => 4,
Self::I64 => 8,
Self::I128 => 16,
Self::F16 => 2,
Self::F32 => 4,
Self::F64 => 8,
Self::F128 => 16,
Self::VecI8(n) => n * 1,
Self::VecI16(n) => n * 2,
Self::VecI32(n) => n * 4,
Self::VecI64(n) => n * 8,
Self::VecI128(n) => n * 16,
Self::VecF16(n) => n * 2,
Self::VecF32(n) => n * 4,
Self::VecF64(n) => n * 8,
Self::VecF128(n) => n * 16,
})
}
}
@ -751,15 +759,19 @@ impl fmt::Display for InlineAsmType {
Self::I32 => f.write_str("i32"),
Self::I64 => f.write_str("i64"),
Self::I128 => f.write_str("i128"),
Self::F16 => f.write_str("f16"),
Self::F32 => f.write_str("f32"),
Self::F64 => f.write_str("f64"),
Self::F128 => f.write_str("f128"),
Self::VecI8(n) => write!(f, "i8x{n}"),
Self::VecI16(n) => write!(f, "i16x{n}"),
Self::VecI32(n) => write!(f, "i32x{n}"),
Self::VecI64(n) => write!(f, "i64x{n}"),
Self::VecI128(n) => write!(f, "i128x{n}"),
Self::VecF16(n) => write!(f, "f16x{n}"),
Self::VecF32(n) => write!(f, "f32x{n}"),
Self::VecF64(n) => write!(f, "f64x{n}"),
Self::VecF128(n) => write!(f, "f128x{n}"),
}
}
}

View File

@ -107,26 +107,26 @@ impl X86InlineAsmRegClass {
match self {
Self::reg | Self::reg_abcd => {
if arch == InlineAsmArch::X86_64 {
types! { _: I16, I32, I64, F32, F64; }
types! { _: I16, I32, I64, F16, F32, F64; }
} else {
types! { _: I16, I32, F32; }
types! { _: I16, I32, F16, F32; }
}
}
Self::reg_byte => types! { _: I8; },
Self::xmm_reg => types! {
sse: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
sse: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
},
Self::ymm_reg => types! {
avx: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4);
avx: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4);
},
Self::zmm_reg => types! {
avx512f: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF32(16), VecF64(8);
avx512f: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF16(32), VecF32(16), VecF64(8);
},
Self::kreg => types! {
avx512f: I8, I16;

View File

@ -7,7 +7,7 @@
//@ compile-flags: -C llvm-args=--x86-asm-syntax=intel
//@ compile-flags: -C target-feature=+avx512bw
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16, f128)]
#![crate_type = "rlib"]
#![no_core]
#![allow(asm_sub_register, non_camel_case_types)]
@ -41,6 +41,8 @@ pub struct i32x4(i32, i32, i32, i32);
#[repr(simd)]
pub struct i64x2(i64, i64);
#[repr(simd)]
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x4(f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x2(f64, f64);
@ -87,6 +89,8 @@ pub struct i32x8(i32, i32, i32, i32, i32, i32, i32, i32);
#[repr(simd)]
pub struct i64x4(i64, i64, i64, i64);
#[repr(simd)]
pub struct f16x16(f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x8(f32, f32, f32, f32, f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x4(f64, f64, f64, f64);
@ -198,35 +202,59 @@ pub struct i32x16(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i3
#[repr(simd)]
pub struct i64x8(i64, i64, i64, i64, i64, i64, i64, i64);
#[repr(simd)]
pub struct f16x32(
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
f16,
);
#[repr(simd)]
pub struct f32x16(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x8(f64, f64, f64, f64, f64, f64, f64, f64);
impl Copy for i8 {}
impl Copy for i16 {}
impl Copy for i32 {}
impl Copy for f32 {}
impl Copy for i64 {}
impl Copy for f64 {}
impl Copy for ptr {}
impl Copy for i8x16 {}
impl Copy for i16x8 {}
impl Copy for i32x4 {}
impl Copy for i64x2 {}
impl Copy for f32x4 {}
impl Copy for f64x2 {}
impl Copy for i8x32 {}
impl Copy for i16x16 {}
impl Copy for i32x8 {}
impl Copy for i64x4 {}
impl Copy for f32x8 {}
impl Copy for f64x4 {}
impl Copy for i8x64 {}
impl Copy for i16x32 {}
impl Copy for i32x16 {}
impl Copy for i64x8 {}
impl Copy for f32x16 {}
impl Copy for f64x8 {}
macro_rules! impl_copy {
($($ty:ident)*) => {
$(
impl Copy for $ty {}
)*
};
}
impl_copy!(
i8 i16 f16 i32 f32 i64 f64 f128 ptr
i8x16 i16x8 i32x4 i64x2 f16x8 f32x4 f64x2
i8x32 i16x16 i32x8 i64x4 f16x16 f32x8 f64x4
i8x64 i16x32 i32x16 i64x8 f16x32 f32x16 f64x8
);
extern "C" {
fn extern_func();
@ -292,6 +320,13 @@ macro_rules! check_reg {
// CHECK: #NO_APP
check!(reg_i16 i16 reg "mov");
// CHECK-LABEL: reg_f16:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}}
// CHECK: #NO_APP
check!(reg_f16 f16 reg "mov");
// CHECK-LABEL: reg_i32:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
@ -334,6 +369,13 @@ check!(reg_ptr ptr reg "mov");
// CHECK: #NO_APP
check!(reg_abcd_i16 i16 reg_abcd "mov");
// CHECK-LABEL: reg_abcd_f16:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}}
// CHECK: #NO_APP
check!(reg_abcd_f16 f16 reg_abcd "mov");
// CHECK-LABEL: reg_abcd_i32:
// CHECK: #APP
// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}}
@ -375,6 +417,12 @@ check!(reg_abcd_ptr ptr reg_abcd "mov");
// CHECK: #NO_APP
check!(reg_byte i8 reg_byte "mov");
// CHECK-LABEL: xmm_reg_f16:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f16 f16 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_i32:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -399,6 +447,12 @@ check!(xmm_reg_i64 i64 xmm_reg "movaps");
// CHECK: #NO_APP
check!(xmm_reg_f64 f64 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f128:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f128 f128 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_ptr:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -429,6 +483,12 @@ check!(xmm_reg_i32x4 i32x4 xmm_reg "movaps");
// CHECK: #NO_APP
check!(xmm_reg_i64x2 i64x2 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f16x8:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
// CHECK: #NO_APP
check!(xmm_reg_f16x8 f16x8 xmm_reg "movaps");
// CHECK-LABEL: xmm_reg_f32x4:
// CHECK: #APP
// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}}
@ -441,6 +501,12 @@ check!(xmm_reg_f32x4 f32x4 xmm_reg "movaps");
// CHECK: #NO_APP
check!(xmm_reg_f64x2 f64x2 xmm_reg "movaps");
// CHECK-LABEL: ymm_reg_f16:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16 f16 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_i32:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -465,6 +531,12 @@ check!(ymm_reg_i64 i64 ymm_reg "vmovaps");
// CHECK: #NO_APP
check!(ymm_reg_f64 f64 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f128:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f128 f128 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_ptr:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -495,6 +567,12 @@ check!(ymm_reg_i32x4 i32x4 ymm_reg "vmovaps");
// CHECK: #NO_APP
check!(ymm_reg_i64x2 i64x2 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f16x8:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16x8 f16x8 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f32x4:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -531,6 +609,12 @@ check!(ymm_reg_i32x8 i32x8 ymm_reg "vmovaps");
// CHECK: #NO_APP
check!(ymm_reg_i64x4 i64x4 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f16x16:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
// CHECK: #NO_APP
check!(ymm_reg_f16x16 f16x16 ymm_reg "vmovaps");
// CHECK-LABEL: ymm_reg_f32x8:
// CHECK: #APP
// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}}
@ -543,6 +627,12 @@ check!(ymm_reg_f32x8 f32x8 ymm_reg "vmovaps");
// CHECK: #NO_APP
check!(ymm_reg_f64x4 f64x4 ymm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16 f16 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_i32:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -567,6 +657,12 @@ check!(zmm_reg_i64 i64 zmm_reg "vmovaps");
// CHECK: #NO_APP
check!(zmm_reg_f64 f64 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f128:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f128 f128 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_ptr:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -597,6 +693,12 @@ check!(zmm_reg_i32x4 i32x4 zmm_reg "vmovaps");
// CHECK: #NO_APP
check!(zmm_reg_i64x2 i64x2 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x8:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x8 f16x8 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x4:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -633,6 +735,12 @@ check!(zmm_reg_i32x8 i32x8 zmm_reg "vmovaps");
// CHECK: #NO_APP
check!(zmm_reg_i64x4 i64x4 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x16 f16x16 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x8:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -669,6 +777,12 @@ check!(zmm_reg_i32x16 i32x16 zmm_reg "vmovaps");
// CHECK: #NO_APP
check!(zmm_reg_i64x8 i64x8 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f16x32:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
// CHECK: #NO_APP
check!(zmm_reg_f16x32 f16x32 zmm_reg "vmovaps");
// CHECK-LABEL: zmm_reg_f32x16:
// CHECK: #APP
// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}}
@ -717,6 +831,12 @@ check!(kreg_ptr ptr kreg "kmovq");
// CHECK: #NO_APP
check_reg!(eax_i16 i16 "eax" "mov");
// CHECK-LABEL: eax_f16:
// CHECK: #APP
// CHECK: mov eax, eax
// CHECK: #NO_APP
check_reg!(eax_f16 f16 "eax" "mov");
// CHECK-LABEL: eax_i32:
// CHECK: #APP
// CHECK: mov eax, eax
@ -756,6 +876,12 @@ check_reg!(eax_ptr ptr "eax" "mov");
#[cfg(i686)]
check_reg!(ah_byte i8 "ah" "mov");
// CHECK-LABEL: xmm0_f16:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f16 f16 "xmm0" "movaps");
// CHECK-LABEL: xmm0_i32:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -780,6 +906,12 @@ check_reg!(xmm0_i64 i64 "xmm0" "movaps");
// CHECK: #NO_APP
check_reg!(xmm0_f64 f64 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f128:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f128 f128 "xmm0" "movaps");
// CHECK-LABEL: xmm0_ptr:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -810,6 +942,12 @@ check_reg!(xmm0_i32x4 i32x4 "xmm0" "movaps");
// CHECK: #NO_APP
check_reg!(xmm0_i64x2 i64x2 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f16x8:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
// CHECK: #NO_APP
check_reg!(xmm0_f16x8 f16x8 "xmm0" "movaps");
// CHECK-LABEL: xmm0_f32x4:
// CHECK: #APP
// CHECK: movaps xmm0, xmm0
@ -822,6 +960,12 @@ check_reg!(xmm0_f32x4 f32x4 "xmm0" "movaps");
// CHECK: #NO_APP
check_reg!(xmm0_f64x2 f64x2 "xmm0" "movaps");
// CHECK-LABEL: ymm0_f16:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16 f16 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_i32:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -846,6 +990,12 @@ check_reg!(ymm0_i64 i64 "ymm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(ymm0_f64 f64 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f128:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f128 f128 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_ptr:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -876,6 +1026,12 @@ check_reg!(ymm0_i32x4 i32x4 "ymm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(ymm0_i64x2 i64x2 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f16x8:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16x8 f16x8 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f32x4:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -912,6 +1068,12 @@ check_reg!(ymm0_i32x8 i32x8 "ymm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(ymm0_i64x4 i64x4 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f16x16:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
// CHECK: #NO_APP
check_reg!(ymm0_f16x16 f16x16 "ymm0" "vmovaps");
// CHECK-LABEL: ymm0_f32x8:
// CHECK: #APP
// CHECK: vmovaps ymm0, ymm0
@ -924,6 +1086,12 @@ check_reg!(ymm0_f32x8 f32x8 "ymm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(ymm0_f64x4 f64x4 "ymm0" "vmovaps");
// CHECK-LABEL: zmm0_f16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16 f16 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_i32:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -948,6 +1116,12 @@ check_reg!(zmm0_i64 i64 "zmm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(zmm0_f64 f64 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f128:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f128 f128 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_ptr:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -978,6 +1152,12 @@ check_reg!(zmm0_i32x4 i32x4 "zmm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(zmm0_i64x2 i64x2 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x8:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x8 f16x8 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x4:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -1014,6 +1194,12 @@ check_reg!(zmm0_i32x8 i32x8 "zmm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(zmm0_i64x4 i64x4 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x16 f16x16 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x8:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
@ -1050,6 +1236,12 @@ check_reg!(zmm0_i32x16 i32x16 "zmm0" "vmovaps");
// CHECK: #NO_APP
check_reg!(zmm0_i64x8 i64x8 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f16x32:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0
// CHECK: #NO_APP
check_reg!(zmm0_f16x32 f16x32 "zmm0" "vmovaps");
// CHECK-LABEL: zmm0_f32x16:
// CHECK: #APP
// CHECK: vmovaps zmm0, zmm0

View File

@ -4,7 +4,7 @@ error: type `i128` cannot be used with this register class
LL | asm!("{}", in(reg) 0i128);
| ^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `__m128` cannot be used with this register class
--> $DIR/type-check-3.rs:16:28
@ -12,7 +12,7 @@ error: type `__m128` cannot be used with this register class
LL | asm!("{}", in(reg) _mm_setzero_ps());
| ^^^^^^^^^^^^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `__m256` cannot be used with this register class
--> $DIR/type-check-3.rs:18:28
@ -20,7 +20,7 @@ error: type `__m256` cannot be used with this register class
LL | asm!("{}", in(reg) _mm256_setzero_ps());
| ^^^^^^^^^^^^^^^^^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
error: type `u8` cannot be used with this register class
--> $DIR/type-check-3.rs:20:32
@ -28,7 +28,7 @@ error: type `u8` cannot be used with this register class
LL | asm!("{}", in(xmm_reg) 0u8);
| ^^^
|
= note: register class `xmm_reg` supports these types: i32, i64, f32, f64, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2
= note: register class `xmm_reg` supports these types: i32, i64, f16, f32, f64, f128, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
error: `avx512bw` target feature is not enabled
--> $DIR/type-check-3.rs:29:29
@ -81,7 +81,7 @@ error: type `i8` cannot be used with this register class
LL | asm!("{}", in(reg) 0i8);
| ^^^
|
= note: register class `reg` supports these types: i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64
= help: consider using the `reg_byte` register class instead
error: incompatible types for asm inout argument