diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 40fec6c26694..51501c1a9c78 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -103,6 +103,7 @@ private: MachineRegisterInfo &MRI) const; bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, MachineIRBuilder &MIRBuilder) const; @@ -1849,6 +1850,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I, } case TargetOpcode::G_INTRINSIC_TRUNC: return selectIntrinsicTrunc(I, MRI); + case TargetOpcode::G_INTRINSIC_ROUND: + return selectIntrinsicRound(I, MRI); case TargetOpcode::G_BUILD_VECTOR: return selectBuildVector(I, MRI); case TargetOpcode::G_MERGE_VALUES: @@ -1923,6 +1926,61 @@ bool AArch64InstructionSelector::selectIntrinsicTrunc( return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool AArch64InstructionSelector::selectIntrinsicRound( + MachineInstr &I, MachineRegisterInfo &MRI) const { + const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); + + // Select the correct opcode. + unsigned Opc = 0; + if (!SrcTy.isVector()) { + switch (SrcTy.getSizeInBits()) { + default: + case 16: + Opc = AArch64::FRINTAHr; + break; + case 32: + Opc = AArch64::FRINTASr; + break; + case 64: + Opc = AArch64::FRINTADr; + break; + } + } else { + unsigned NumElts = SrcTy.getNumElements(); + switch (SrcTy.getElementType().getSizeInBits()) { + default: + break; + case 16: + if (NumElts == 4) + Opc = AArch64::FRINTAv4f16; + else if (NumElts == 8) + Opc = AArch64::FRINTAv8f16; + break; + case 32: + if (NumElts == 2) + Opc = AArch64::FRINTAv2f32; + else if (NumElts == 4) + Opc = AArch64::FRINTAv4f32; + break; + case 64: + if (NumElts == 2) + Opc = AArch64::FRINTAv2f64; + break; + } + } + + if (!Opc) { + // Didn't get an opcode above, bail. + LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); + return false; + } + + // Legalization would have set us up perfectly for this; we just need to + // set the opcode and move on. + I.setDesc(TII.get(Opc)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + bool AArch64InstructionSelector::selectVectorICmp( MachineInstr &I, MachineRegisterInfo &MRI) const { unsigned DstReg = I.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir new file mode 100644 index 000000000000..4efec65d9151 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-round.mir @@ -0,0 +1,198 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -mattr=+fullfp16 -o - | FileCheck %s + +... +--- +name: test_f64.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_f64.intrinsic_round + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FRINTADr:%[0-9]+]]:fpr64 = FRINTADr [[COPY]] + ; CHECK: $d0 = COPY [[FRINTADr]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = G_INTRINSIC_ROUND %0 + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: test_f32.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $s0 + + ; CHECK-LABEL: name: test_f32.intrinsic_round + ; CHECK: liveins: $s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[FRINTASr:%[0-9]+]]:fpr32 = FRINTASr [[COPY]] + ; CHECK: $s0 = COPY [[FRINTASr]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = G_INTRINSIC_ROUND %0 + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: test_f16.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $h0 + + ; CHECK-LABEL: name: test_f16.intrinsic_round + ; CHECK: liveins: $h0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK: [[FRINTAHr:%[0-9]+]]:fpr16 = FRINTAHr [[COPY]] + ; CHECK: $h0 = COPY [[FRINTAHr]] + ; CHECK: RET_ReallyLR implicit $h0 + %0:fpr(s16) = COPY $h0 + %1:fpr(s16) = G_INTRINSIC_ROUND %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: test_v4f16.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v4f16.intrinsic_round + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FRINTAv4f16_:%[0-9]+]]:fpr64 = FRINTAv4f16 [[COPY]] + ; CHECK: $d0 = COPY [[FRINTAv4f16_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = G_INTRINSIC_ROUND %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8f16.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v8f16.intrinsic_round + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FRINTAv8f16_:%[0-9]+]]:fpr128 = FRINTAv8f16 [[COPY]] + ; CHECK: $q0 = COPY [[FRINTAv8f16_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = G_INTRINSIC_ROUND %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f32.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: test_v2f32.intrinsic_round + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FRINTAv2f32_:%[0-9]+]]:fpr64 = FRINTAv2f32 [[COPY]] + ; CHECK: $d0 = COPY [[FRINTAv2f32_]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = G_INTRINSIC_ROUND %0 + $d0 = COPY %1(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4f32.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v4f32.intrinsic_round + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FRINTAv4f32_:%[0-9]+]]:fpr128 = FRINTAv4f32 [[COPY]] + ; CHECK: $q0 = COPY [[FRINTAv4f32_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = G_INTRINSIC_ROUND %0 + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f64.intrinsic_round +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: test_v2f64.intrinsic_round + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FRINTAv2f64_:%[0-9]+]]:fpr128 = FRINTAv2f64 [[COPY]] + ; CHECK: $q0 = COPY [[FRINTAv2f64_]] + ; CHECK: RET_ReallyLR implicit $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = G_INTRINSIC_ROUND %0 + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll index 16344184b27b..1718ed4d9a96 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -230,6 +230,11 @@ define %v4f16 @test_v4f16.round(%v4f16 %a) { ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frinta.4h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v4f16.round: + ; GISEL-NOFP16-COUNT-4: frinta s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frinta.4h + ; GISEL-FP16-NEXT: ret %1 = call %v4f16 @llvm.round.v4f16(%v4f16 %a) ret %v4f16 %1 } @@ -475,6 +480,11 @@ define %v8f16 @test_v8f16.round(%v8f16 %a) { ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: frinta.8h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v8f16.round: + ; GISEL-NOFP16-COUNT-8: frinta s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: frinta.8h + ; GISEL-FP16-NEXT: ret %1 = call %v8f16 @llvm.round.v8f16(%v8f16 %a) ret %v8f16 %1 } diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index b677feecf672..1ed4bdec9fb9 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1273,10 +1273,21 @@ define half @test_nearbyint(half %a) #0 { ; CHECK-CVT-NEXT: fcvt h0, [[INT32]] ; CHECK-CVT-NEXT: ret +; GISEL-CVT-LABEL: test_round: +; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; GISEL-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] +; GISEL-CVT-NEXT: fcvt h0, [[INT32]] +; GISEL-CVT-NEXT: ret + + ; CHECK-FP16-LABEL: test_round: ; CHECK-FP16-NEXT: frinta h0, h0 ; CHECK-FP16-NEXT: ret +; GISEL-FP16-LABEL: test_round: +; GISEL-FP16-NEXT: frinta h0, h0 +; GISEL-FP16-NEXT: ret + define half @test_round(half %a) #0 { %r = call half @llvm.round.f16(half %a) ret half %r