From 81b106584f2baf33e09be2362c35c1bf2f6bfe94 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 14 Aug 2021 23:35:27 +0200 Subject: [PATCH] [AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476) This is a non-intrusive fix for https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport to the 13.x release branch. It expands on the current hack by distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have the obvious meaning and 2 means "anything else". The new optimization from D98564 should only be performed for CmpValue of 0 or 1. For main, I think we should switch the analyzeCompare() and optimizeCompare() APIs to use int64_t instead of int, which is in line with MachineOperand's notion of an immediate, and avoids this problem altogether. Differential Revision: https://reviews.llvm.org/D108076 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 34 +++++++++------- .../CodeGen/AArch64/csinc-cmp-removal.mir | 39 +++++++++++++++++++ llvm/test/CodeGen/AArch64/pr51476.ll | 35 +++++++++++++++++ 3 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/pr51476.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3a0cbbb275b5..0ec4b5753ee1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, if (!MI.getOperand(1).isReg()) return false; + auto NormalizeCmpValue = [](int64_t Value) -> int { + // Comparison immediates may be 64-bit, but CmpValue is only an int. + // Normalize to 0/1/2 return value, where 2 indicates any value apart from + // 0 or 1. + // TODO: Switch CmpValue to int64_t in the API to avoid this. + if (Value == 0 || Value == 1) + return Value; + return 2; + }; + switch (MI.getOpcode()) { default: break; @@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm()); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr - CmpValue = AArch64_AM::decodeLogicalImmediate( + CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64)); return true; } @@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); + // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1. + assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) && + "CmpValue must be 0, 1, or 2!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. diff --git a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir index 4222e84b113c..2098218d23f3 100644 --- a/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir +++ b/llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir @@ -307,3 +307,42 @@ body: | RET_ReallyLR ... +--- +name: subswr_wrong_cmp_value +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: subswr_wrong_cmp_value + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv + ; CHECK: Bcc 1, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $x1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = COPY $x1 + %2:gpr64 = IMPLICIT_DEF + %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv + %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + %5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/pr51476.ll b/llvm/test/CodeGen/AArch64/pr51476.ll new file mode 100644 index 000000000000..6abd41a12154 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr51476.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(i8 %arg) nounwind { +; CHECK-LABEL: test: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: cmp w0, #3 +; CHECK-NEXT: strb w0, [sp, #12] +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %do_call +; CHECK-NEXT: bl unknown +; CHECK-NEXT: .LBB0_2: // %common.ret +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %tmp = alloca i8 + %cmp1 = icmp ne i8 %arg, 1 + %zext = zext i1 %cmp1 to i8 + store i8 %zext, i8* %tmp + %zext2 = load i8, i8* %tmp + %cmp2 = icmp eq i8 %zext2, 3 + br i1 %cmp2, label %exit, label %do_call + +do_call: + call void @unknown(i8 %zext2) + ret void + +exit: + ret void +} + +declare void @unknown(i8)