diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3cbe5ccf5566..34312e228444 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -132,6 +132,16 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(16); return; } + } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { + // The copy of two FPR16 registers is implemented by the copy of two FPR32 + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned Dst = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, + &AArch64::FPR32RegClass); + unsigned Src = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) + .addReg(Src); + return; } else { CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg); return; diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 251fee2af2fd..41c6c86f36b8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -8852,6 +8852,12 @@ def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), // Patterns for handling half-precision values // +// Convert between f16 value and f32 value +def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))), + (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>; +def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))), + (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>; + // Convert f16 value coming in as i16 value to f32 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; diff --git a/llvm/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/llvm/test/CodeGen/AArch64/neon-load-store-v1i32.ll new file mode 100644 index 000000000000..92f704d5d16e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-load-store-v1i32.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly +define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) { +; CHECK-LABEL: load.store.v1i8: +; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i8>* %ptr + store <1 x i8> %a, <1 x i8>* %ptr2 + ret void +} + +define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) { +; CHECK-LABEL: load.store.v1i16: +; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i16>* %ptr + store <1 x i16> %a, <1 x i16>* %ptr2 + ret void +} + +define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) { +; CHECK-LABEL: load.store.v1i32: +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}] + %a = load <1 x i32>* %ptr + store <1 x i32> %a, <1 x i32>* %ptr2 + ret void +}