From dfc55145270c5ec8de1bcd19ae05f056100108a5 Mon Sep 17 00:00:00 2001 From: beetrees Date: Thu, 13 Jun 2024 16:09:45 +0100 Subject: [PATCH] Add `f16` and `f128` inline ASM support for `x86` and `x86-64` --- compiler/rustc_codegen_llvm/src/asm.rs | 100 +++++++ .../src/check/intrinsicck.rs | 4 + compiler/rustc_target/src/asm/mod.rs | 12 + compiler/rustc_target/src/asm/x86.rs | 22 +- tests/assembly/asm/x86-types.rs | 244 ++++++++++++++++-- tests/ui/asm/x86_64/type-check-3.stderr | 10 +- 6 files changed, 350 insertions(+), 42 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index db28c6857b7..60e63b956db 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>( InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg), Abi::Vector { .. }, ) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)), + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if bx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + bx.bitcast(value, bx.type_vector(bx.type_i32(), 4)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => { + let value = bx.insert_element( + bx.const_undef(bx.type_vector(bx.type_f16(), 8)), + value, + bx.const_usize(0), + ); + bx.bitcast(value, bx.type_vector(bx.type_i16(), 8)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_i16(), count)) + } ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), @@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>( InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg), Abi::Vector { .. }, ) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)), + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if bx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + bx.bitcast(value, bx.type_f128()) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => { + let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8)); + bx.extract_element(value, bx.const_usize(0)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_f16(), count)) + } ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), @@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>( InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg), Abi::Vector { .. }, ) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8), + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if cx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + cx.type_vector(cx.type_i32(), 4) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8), + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + cx.type_vector(cx.type_i16(), count) + } ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), diff --git a/compiler/rustc_hir_analysis/src/check/intrinsicck.rs b/compiler/rustc_hir_analysis/src/check/intrinsicck.rs index 2672614a895..88cbd0217f2 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsicck.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsicck.rs @@ -62,8 +62,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> { ty::Int(IntTy::I64) | ty::Uint(UintTy::U64) => Some(InlineAsmType::I64), ty::Int(IntTy::I128) | ty::Uint(UintTy::U128) => Some(InlineAsmType::I128), ty::Int(IntTy::Isize) | ty::Uint(UintTy::Usize) => Some(asm_ty_isize), + ty::Float(FloatTy::F16) => Some(InlineAsmType::F16), ty::Float(FloatTy::F32) => Some(InlineAsmType::F32), ty::Float(FloatTy::F64) => Some(InlineAsmType::F64), + ty::Float(FloatTy::F128) => Some(InlineAsmType::F128), ty::FnPtr(_) => Some(asm_ty_isize), ty::RawPtr(ty, _) if self.is_thin_ptr_ty(ty) => Some(asm_ty_isize), ty::Adt(adt, args) if adt.repr().simd() => { @@ -105,8 +107,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> { width => bug!("unsupported pointer width: {width}"), }) } + ty::Float(FloatTy::F16) => Some(InlineAsmType::VecF16(size)), ty::Float(FloatTy::F32) => Some(InlineAsmType::VecF32(size)), ty::Float(FloatTy::F64) => Some(InlineAsmType::VecF64(size)), + ty::Float(FloatTy::F128) => Some(InlineAsmType::VecF128(size)), _ => None, } } diff --git a/compiler/rustc_target/src/asm/mod.rs b/compiler/rustc_target/src/asm/mod.rs index 5f4ce5ed599..b057bf94a08 100644 --- a/compiler/rustc_target/src/asm/mod.rs +++ b/compiler/rustc_target/src/asm/mod.rs @@ -707,15 +707,19 @@ pub enum InlineAsmType { I32, I64, I128, + F16, F32, F64, + F128, VecI8(u64), VecI16(u64), VecI32(u64), VecI64(u64), VecI128(u64), + VecF16(u64), VecF32(u64), VecF64(u64), + VecF128(u64), } impl InlineAsmType { @@ -730,15 +734,19 @@ impl InlineAsmType { Self::I32 => 4, Self::I64 => 8, Self::I128 => 16, + Self::F16 => 2, Self::F32 => 4, Self::F64 => 8, + Self::F128 => 16, Self::VecI8(n) => n * 1, Self::VecI16(n) => n * 2, Self::VecI32(n) => n * 4, Self::VecI64(n) => n * 8, Self::VecI128(n) => n * 16, + Self::VecF16(n) => n * 2, Self::VecF32(n) => n * 4, Self::VecF64(n) => n * 8, + Self::VecF128(n) => n * 16, }) } } @@ -751,15 +759,19 @@ impl fmt::Display for InlineAsmType { Self::I32 => f.write_str("i32"), Self::I64 => f.write_str("i64"), Self::I128 => f.write_str("i128"), + Self::F16 => f.write_str("f16"), Self::F32 => f.write_str("f32"), Self::F64 => f.write_str("f64"), + Self::F128 => f.write_str("f128"), Self::VecI8(n) => write!(f, "i8x{n}"), Self::VecI16(n) => write!(f, "i16x{n}"), Self::VecI32(n) => write!(f, "i32x{n}"), Self::VecI64(n) => write!(f, "i64x{n}"), Self::VecI128(n) => write!(f, "i128x{n}"), + Self::VecF16(n) => write!(f, "f16x{n}"), Self::VecF32(n) => write!(f, "f32x{n}"), Self::VecF64(n) => write!(f, "f64x{n}"), + Self::VecF128(n) => write!(f, "f128x{n}"), } } } diff --git a/compiler/rustc_target/src/asm/x86.rs b/compiler/rustc_target/src/asm/x86.rs index 28413a5bcbd..8452961c17c 100644 --- a/compiler/rustc_target/src/asm/x86.rs +++ b/compiler/rustc_target/src/asm/x86.rs @@ -107,26 +107,26 @@ impl X86InlineAsmRegClass { match self { Self::reg | Self::reg_abcd => { if arch == InlineAsmArch::X86_64 { - types! { _: I16, I32, I64, F32, F64; } + types! { _: I16, I32, I64, F16, F32, F64; } } else { - types! { _: I16, I32, F32; } + types! { _: I16, I32, F16, F32; } } } Self::reg_byte => types! { _: I8; }, Self::xmm_reg => types! { - sse: I32, I64, F32, F64, - VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2); + sse: I32, I64, F16, F32, F64, F128, + VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2); }, Self::ymm_reg => types! { - avx: I32, I64, F32, F64, - VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2), - VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4); + avx: I32, I64, F16, F32, F64, F128, + VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2), + VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4); }, Self::zmm_reg => types! { - avx512f: I32, I64, F32, F64, - VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2), - VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4), - VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF32(16), VecF64(8); + avx512f: I32, I64, F16, F32, F64, F128, + VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2), + VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4), + VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF16(32), VecF32(16), VecF64(8); }, Self::kreg => types! { avx512f: I8, I16; diff --git a/tests/assembly/asm/x86-types.rs b/tests/assembly/asm/x86-types.rs index 2b4ebb05349..8e229614420 100644 --- a/tests/assembly/asm/x86-types.rs +++ b/tests/assembly/asm/x86-types.rs @@ -7,7 +7,7 @@ //@ compile-flags: -C llvm-args=--x86-asm-syntax=intel //@ compile-flags: -C target-feature=+avx512bw -#![feature(no_core, lang_items, rustc_attrs, repr_simd)] +#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16, f128)] #![crate_type = "rlib"] #![no_core] #![allow(asm_sub_register, non_camel_case_types)] @@ -41,6 +41,8 @@ pub struct i32x4(i32, i32, i32, i32); #[repr(simd)] pub struct i64x2(i64, i64); #[repr(simd)] +pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16); +#[repr(simd)] pub struct f32x4(f32, f32, f32, f32); #[repr(simd)] pub struct f64x2(f64, f64); @@ -87,6 +89,8 @@ pub struct i32x8(i32, i32, i32, i32, i32, i32, i32, i32); #[repr(simd)] pub struct i64x4(i64, i64, i64, i64); #[repr(simd)] +pub struct f16x16(f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16, f16); +#[repr(simd)] pub struct f32x8(f32, f32, f32, f32, f32, f32, f32, f32); #[repr(simd)] pub struct f64x4(f64, f64, f64, f64); @@ -198,35 +202,59 @@ pub struct i32x16(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i3 #[repr(simd)] pub struct i64x8(i64, i64, i64, i64, i64, i64, i64, i64); #[repr(simd)] +pub struct f16x32( + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, + f16, +); +#[repr(simd)] pub struct f32x16(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32); #[repr(simd)] pub struct f64x8(f64, f64, f64, f64, f64, f64, f64, f64); -impl Copy for i8 {} -impl Copy for i16 {} -impl Copy for i32 {} -impl Copy for f32 {} -impl Copy for i64 {} -impl Copy for f64 {} -impl Copy for ptr {} -impl Copy for i8x16 {} -impl Copy for i16x8 {} -impl Copy for i32x4 {} -impl Copy for i64x2 {} -impl Copy for f32x4 {} -impl Copy for f64x2 {} -impl Copy for i8x32 {} -impl Copy for i16x16 {} -impl Copy for i32x8 {} -impl Copy for i64x4 {} -impl Copy for f32x8 {} -impl Copy for f64x4 {} -impl Copy for i8x64 {} -impl Copy for i16x32 {} -impl Copy for i32x16 {} -impl Copy for i64x8 {} -impl Copy for f32x16 {} -impl Copy for f64x8 {} +macro_rules! impl_copy { + ($($ty:ident)*) => { + $( + impl Copy for $ty {} + )* + }; +} + +impl_copy!( + i8 i16 f16 i32 f32 i64 f64 f128 ptr + i8x16 i16x8 i32x4 i64x2 f16x8 f32x4 f64x2 + i8x32 i16x16 i32x8 i64x4 f16x16 f32x8 f64x4 + i8x64 i16x32 i32x16 i64x8 f16x32 f32x16 f64x8 +); extern "C" { fn extern_func(); @@ -292,6 +320,13 @@ macro_rules! check_reg { // CHECK: #NO_APP check!(reg_i16 i16 reg "mov"); +// CHECK-LABEL: reg_f16: +// CHECK: #APP +// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}} +// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}} +// CHECK: #NO_APP +check!(reg_f16 f16 reg "mov"); + // CHECK-LABEL: reg_i32: // CHECK: #APP // x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}} @@ -334,6 +369,13 @@ check!(reg_ptr ptr reg "mov"); // CHECK: #NO_APP check!(reg_abcd_i16 i16 reg_abcd "mov"); +// CHECK-LABEL: reg_abcd_f16: +// CHECK: #APP +// x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}} +// i686: mov e{{[a-z0-9]+}}, e{{[a-z0-9]+}} +// CHECK: #NO_APP +check!(reg_abcd_f16 f16 reg_abcd "mov"); + // CHECK-LABEL: reg_abcd_i32: // CHECK: #APP // x86_64: mov r{{[a-z0-9]+}}, r{{[a-z0-9]+}} @@ -375,6 +417,12 @@ check!(reg_abcd_ptr ptr reg_abcd "mov"); // CHECK: #NO_APP check!(reg_byte i8 reg_byte "mov"); +// CHECK-LABEL: xmm_reg_f16: +// CHECK: #APP +// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} +// CHECK: #NO_APP +check!(xmm_reg_f16 f16 xmm_reg "movaps"); + // CHECK-LABEL: xmm_reg_i32: // CHECK: #APP // CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} @@ -399,6 +447,12 @@ check!(xmm_reg_i64 i64 xmm_reg "movaps"); // CHECK: #NO_APP check!(xmm_reg_f64 f64 xmm_reg "movaps"); +// CHECK-LABEL: xmm_reg_f128: +// CHECK: #APP +// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} +// CHECK: #NO_APP +check!(xmm_reg_f128 f128 xmm_reg "movaps"); + // CHECK-LABEL: xmm_reg_ptr: // CHECK: #APP // CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} @@ -429,6 +483,12 @@ check!(xmm_reg_i32x4 i32x4 xmm_reg "movaps"); // CHECK: #NO_APP check!(xmm_reg_i64x2 i64x2 xmm_reg "movaps"); +// CHECK-LABEL: xmm_reg_f16x8: +// CHECK: #APP +// CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} +// CHECK: #NO_APP +check!(xmm_reg_f16x8 f16x8 xmm_reg "movaps"); + // CHECK-LABEL: xmm_reg_f32x4: // CHECK: #APP // CHECK: movaps xmm{{[0-9]+}}, xmm{{[0-9]+}} @@ -441,6 +501,12 @@ check!(xmm_reg_f32x4 f32x4 xmm_reg "movaps"); // CHECK: #NO_APP check!(xmm_reg_f64x2 f64x2 xmm_reg "movaps"); +// CHECK-LABEL: ymm_reg_f16: +// CHECK: #APP +// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} +// CHECK: #NO_APP +check!(ymm_reg_f16 f16 ymm_reg "vmovaps"); + // CHECK-LABEL: ymm_reg_i32: // CHECK: #APP // CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} @@ -465,6 +531,12 @@ check!(ymm_reg_i64 i64 ymm_reg "vmovaps"); // CHECK: #NO_APP check!(ymm_reg_f64 f64 ymm_reg "vmovaps"); +// CHECK-LABEL: ymm_reg_f128: +// CHECK: #APP +// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} +// CHECK: #NO_APP +check!(ymm_reg_f128 f128 ymm_reg "vmovaps"); + // CHECK-LABEL: ymm_reg_ptr: // CHECK: #APP // CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} @@ -495,6 +567,12 @@ check!(ymm_reg_i32x4 i32x4 ymm_reg "vmovaps"); // CHECK: #NO_APP check!(ymm_reg_i64x2 i64x2 ymm_reg "vmovaps"); +// CHECK-LABEL: ymm_reg_f16x8: +// CHECK: #APP +// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} +// CHECK: #NO_APP +check!(ymm_reg_f16x8 f16x8 ymm_reg "vmovaps"); + // CHECK-LABEL: ymm_reg_f32x4: // CHECK: #APP // CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} @@ -531,6 +609,12 @@ check!(ymm_reg_i32x8 i32x8 ymm_reg "vmovaps"); // CHECK: #NO_APP check!(ymm_reg_i64x4 i64x4 ymm_reg "vmovaps"); +// CHECK-LABEL: ymm_reg_f16x16: +// CHECK: #APP +// CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} +// CHECK: #NO_APP +check!(ymm_reg_f16x16 f16x16 ymm_reg "vmovaps"); + // CHECK-LABEL: ymm_reg_f32x8: // CHECK: #APP // CHECK: vmovaps ymm{{[0-9]+}}, ymm{{[0-9]+}} @@ -543,6 +627,12 @@ check!(ymm_reg_f32x8 f32x8 ymm_reg "vmovaps"); // CHECK: #NO_APP check!(ymm_reg_f64x4 f64x4 ymm_reg "vmovaps"); +// CHECK-LABEL: zmm_reg_f16: +// CHECK: #APP +// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} +// CHECK: #NO_APP +check!(zmm_reg_f16 f16 zmm_reg "vmovaps"); + // CHECK-LABEL: zmm_reg_i32: // CHECK: #APP // CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} @@ -567,6 +657,12 @@ check!(zmm_reg_i64 i64 zmm_reg "vmovaps"); // CHECK: #NO_APP check!(zmm_reg_f64 f64 zmm_reg "vmovaps"); +// CHECK-LABEL: zmm_reg_f128: +// CHECK: #APP +// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} +// CHECK: #NO_APP +check!(zmm_reg_f128 f128 zmm_reg "vmovaps"); + // CHECK-LABEL: zmm_reg_ptr: // CHECK: #APP // CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} @@ -597,6 +693,12 @@ check!(zmm_reg_i32x4 i32x4 zmm_reg "vmovaps"); // CHECK: #NO_APP check!(zmm_reg_i64x2 i64x2 zmm_reg "vmovaps"); +// CHECK-LABEL: zmm_reg_f16x8: +// CHECK: #APP +// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} +// CHECK: #NO_APP +check!(zmm_reg_f16x8 f16x8 zmm_reg "vmovaps"); + // CHECK-LABEL: zmm_reg_f32x4: // CHECK: #APP // CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} @@ -633,6 +735,12 @@ check!(zmm_reg_i32x8 i32x8 zmm_reg "vmovaps"); // CHECK: #NO_APP check!(zmm_reg_i64x4 i64x4 zmm_reg "vmovaps"); +// CHECK-LABEL: zmm_reg_f16x16: +// CHECK: #APP +// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} +// CHECK: #NO_APP +check!(zmm_reg_f16x16 f16x16 zmm_reg "vmovaps"); + // CHECK-LABEL: zmm_reg_f32x8: // CHECK: #APP // CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} @@ -669,6 +777,12 @@ check!(zmm_reg_i32x16 i32x16 zmm_reg "vmovaps"); // CHECK: #NO_APP check!(zmm_reg_i64x8 i64x8 zmm_reg "vmovaps"); +// CHECK-LABEL: zmm_reg_f16x32: +// CHECK: #APP +// CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} +// CHECK: #NO_APP +check!(zmm_reg_f16x32 f16x32 zmm_reg "vmovaps"); + // CHECK-LABEL: zmm_reg_f32x16: // CHECK: #APP // CHECK: vmovaps zmm{{[0-9]+}}, zmm{{[0-9]+}} @@ -717,6 +831,12 @@ check!(kreg_ptr ptr kreg "kmovq"); // CHECK: #NO_APP check_reg!(eax_i16 i16 "eax" "mov"); +// CHECK-LABEL: eax_f16: +// CHECK: #APP +// CHECK: mov eax, eax +// CHECK: #NO_APP +check_reg!(eax_f16 f16 "eax" "mov"); + // CHECK-LABEL: eax_i32: // CHECK: #APP // CHECK: mov eax, eax @@ -756,6 +876,12 @@ check_reg!(eax_ptr ptr "eax" "mov"); #[cfg(i686)] check_reg!(ah_byte i8 "ah" "mov"); +// CHECK-LABEL: xmm0_f16: +// CHECK: #APP +// CHECK: movaps xmm0, xmm0 +// CHECK: #NO_APP +check_reg!(xmm0_f16 f16 "xmm0" "movaps"); + // CHECK-LABEL: xmm0_i32: // CHECK: #APP // CHECK: movaps xmm0, xmm0 @@ -780,6 +906,12 @@ check_reg!(xmm0_i64 i64 "xmm0" "movaps"); // CHECK: #NO_APP check_reg!(xmm0_f64 f64 "xmm0" "movaps"); +// CHECK-LABEL: xmm0_f128: +// CHECK: #APP +// CHECK: movaps xmm0, xmm0 +// CHECK: #NO_APP +check_reg!(xmm0_f128 f128 "xmm0" "movaps"); + // CHECK-LABEL: xmm0_ptr: // CHECK: #APP // CHECK: movaps xmm0, xmm0 @@ -810,6 +942,12 @@ check_reg!(xmm0_i32x4 i32x4 "xmm0" "movaps"); // CHECK: #NO_APP check_reg!(xmm0_i64x2 i64x2 "xmm0" "movaps"); +// CHECK-LABEL: xmm0_f16x8: +// CHECK: #APP +// CHECK: movaps xmm0, xmm0 +// CHECK: #NO_APP +check_reg!(xmm0_f16x8 f16x8 "xmm0" "movaps"); + // CHECK-LABEL: xmm0_f32x4: // CHECK: #APP // CHECK: movaps xmm0, xmm0 @@ -822,6 +960,12 @@ check_reg!(xmm0_f32x4 f32x4 "xmm0" "movaps"); // CHECK: #NO_APP check_reg!(xmm0_f64x2 f64x2 "xmm0" "movaps"); +// CHECK-LABEL: ymm0_f16: +// CHECK: #APP +// CHECK: vmovaps ymm0, ymm0 +// CHECK: #NO_APP +check_reg!(ymm0_f16 f16 "ymm0" "vmovaps"); + // CHECK-LABEL: ymm0_i32: // CHECK: #APP // CHECK: vmovaps ymm0, ymm0 @@ -846,6 +990,12 @@ check_reg!(ymm0_i64 i64 "ymm0" "vmovaps"); // CHECK: #NO_APP check_reg!(ymm0_f64 f64 "ymm0" "vmovaps"); +// CHECK-LABEL: ymm0_f128: +// CHECK: #APP +// CHECK: vmovaps ymm0, ymm0 +// CHECK: #NO_APP +check_reg!(ymm0_f128 f128 "ymm0" "vmovaps"); + // CHECK-LABEL: ymm0_ptr: // CHECK: #APP // CHECK: vmovaps ymm0, ymm0 @@ -876,6 +1026,12 @@ check_reg!(ymm0_i32x4 i32x4 "ymm0" "vmovaps"); // CHECK: #NO_APP check_reg!(ymm0_i64x2 i64x2 "ymm0" "vmovaps"); +// CHECK-LABEL: ymm0_f16x8: +// CHECK: #APP +// CHECK: vmovaps ymm0, ymm0 +// CHECK: #NO_APP +check_reg!(ymm0_f16x8 f16x8 "ymm0" "vmovaps"); + // CHECK-LABEL: ymm0_f32x4: // CHECK: #APP // CHECK: vmovaps ymm0, ymm0 @@ -912,6 +1068,12 @@ check_reg!(ymm0_i32x8 i32x8 "ymm0" "vmovaps"); // CHECK: #NO_APP check_reg!(ymm0_i64x4 i64x4 "ymm0" "vmovaps"); +// CHECK-LABEL: ymm0_f16x16: +// CHECK: #APP +// CHECK: vmovaps ymm0, ymm0 +// CHECK: #NO_APP +check_reg!(ymm0_f16x16 f16x16 "ymm0" "vmovaps"); + // CHECK-LABEL: ymm0_f32x8: // CHECK: #APP // CHECK: vmovaps ymm0, ymm0 @@ -924,6 +1086,12 @@ check_reg!(ymm0_f32x8 f32x8 "ymm0" "vmovaps"); // CHECK: #NO_APP check_reg!(ymm0_f64x4 f64x4 "ymm0" "vmovaps"); +// CHECK-LABEL: zmm0_f16: +// CHECK: #APP +// CHECK: vmovaps zmm0, zmm0 +// CHECK: #NO_APP +check_reg!(zmm0_f16 f16 "zmm0" "vmovaps"); + // CHECK-LABEL: zmm0_i32: // CHECK: #APP // CHECK: vmovaps zmm0, zmm0 @@ -948,6 +1116,12 @@ check_reg!(zmm0_i64 i64 "zmm0" "vmovaps"); // CHECK: #NO_APP check_reg!(zmm0_f64 f64 "zmm0" "vmovaps"); +// CHECK-LABEL: zmm0_f128: +// CHECK: #APP +// CHECK: vmovaps zmm0, zmm0 +// CHECK: #NO_APP +check_reg!(zmm0_f128 f128 "zmm0" "vmovaps"); + // CHECK-LABEL: zmm0_ptr: // CHECK: #APP // CHECK: vmovaps zmm0, zmm0 @@ -978,6 +1152,12 @@ check_reg!(zmm0_i32x4 i32x4 "zmm0" "vmovaps"); // CHECK: #NO_APP check_reg!(zmm0_i64x2 i64x2 "zmm0" "vmovaps"); +// CHECK-LABEL: zmm0_f16x8: +// CHECK: #APP +// CHECK: vmovaps zmm0, zmm0 +// CHECK: #NO_APP +check_reg!(zmm0_f16x8 f16x8 "zmm0" "vmovaps"); + // CHECK-LABEL: zmm0_f32x4: // CHECK: #APP // CHECK: vmovaps zmm0, zmm0 @@ -1014,6 +1194,12 @@ check_reg!(zmm0_i32x8 i32x8 "zmm0" "vmovaps"); // CHECK: #NO_APP check_reg!(zmm0_i64x4 i64x4 "zmm0" "vmovaps"); +// CHECK-LABEL: zmm0_f16x16: +// CHECK: #APP +// CHECK: vmovaps zmm0, zmm0 +// CHECK: #NO_APP +check_reg!(zmm0_f16x16 f16x16 "zmm0" "vmovaps"); + // CHECK-LABEL: zmm0_f32x8: // CHECK: #APP // CHECK: vmovaps zmm0, zmm0 @@ -1050,6 +1236,12 @@ check_reg!(zmm0_i32x16 i32x16 "zmm0" "vmovaps"); // CHECK: #NO_APP check_reg!(zmm0_i64x8 i64x8 "zmm0" "vmovaps"); +// CHECK-LABEL: zmm0_f16x32: +// CHECK: #APP +// CHECK: vmovaps zmm0, zmm0 +// CHECK: #NO_APP +check_reg!(zmm0_f16x32 f16x32 "zmm0" "vmovaps"); + // CHECK-LABEL: zmm0_f32x16: // CHECK: #APP // CHECK: vmovaps zmm0, zmm0 diff --git a/tests/ui/asm/x86_64/type-check-3.stderr b/tests/ui/asm/x86_64/type-check-3.stderr index 34bfcd71cac..202b97ca5c0 100644 --- a/tests/ui/asm/x86_64/type-check-3.stderr +++ b/tests/ui/asm/x86_64/type-check-3.stderr @@ -4,7 +4,7 @@ error: type `i128` cannot be used with this register class LL | asm!("{}", in(reg) 0i128); | ^^^^^ | - = note: register class `reg` supports these types: i16, i32, i64, f32, f64 + = note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64 error: type `__m128` cannot be used with this register class --> $DIR/type-check-3.rs:16:28 @@ -12,7 +12,7 @@ error: type `__m128` cannot be used with this register class LL | asm!("{}", in(reg) _mm_setzero_ps()); | ^^^^^^^^^^^^^^^^ | - = note: register class `reg` supports these types: i16, i32, i64, f32, f64 + = note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64 error: type `__m256` cannot be used with this register class --> $DIR/type-check-3.rs:18:28 @@ -20,7 +20,7 @@ error: type `__m256` cannot be used with this register class LL | asm!("{}", in(reg) _mm256_setzero_ps()); | ^^^^^^^^^^^^^^^^^^^ | - = note: register class `reg` supports these types: i16, i32, i64, f32, f64 + = note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64 error: type `u8` cannot be used with this register class --> $DIR/type-check-3.rs:20:32 @@ -28,7 +28,7 @@ error: type `u8` cannot be used with this register class LL | asm!("{}", in(xmm_reg) 0u8); | ^^^ | - = note: register class `xmm_reg` supports these types: i32, i64, f32, f64, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2 + = note: register class `xmm_reg` supports these types: i32, i64, f16, f32, f64, f128, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2 error: `avx512bw` target feature is not enabled --> $DIR/type-check-3.rs:29:29 @@ -81,7 +81,7 @@ error: type `i8` cannot be used with this register class LL | asm!("{}", in(reg) 0i8); | ^^^ | - = note: register class `reg` supports these types: i16, i32, i64, f32, f64 + = note: register class `reg` supports these types: i16, i32, i64, f16, f32, f64 = help: consider using the `reg_byte` register class instead error: incompatible types for asm inout argument