forked from OSchip/llvm-project
[AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)
This is a non-intrusive fix for https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport to the 13.x release branch. It expands on the current hack by distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have the obvious meaning and 2 means "anything else". The new optimization from D98564 should only be performed for CmpValue of 0 or 1. For main, I think we should switch the analyzeCompare() and optimizeCompare() APIs to use int64_t instead of int, which is in line with MachineOperand's notion of an immediate, and avoids this problem altogether. Differential Revision: https://reviews.llvm.org/D108076
This commit is contained in:
parent
49de6070a2
commit
81b106584f
|
@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
|||
if (!MI.getOperand(1).isReg())
|
||||
return false;
|
||||
|
||||
auto NormalizeCmpValue = [](int64_t Value) -> int {
|
||||
// Comparison immediates may be 64-bit, but CmpValue is only an int.
|
||||
// Normalize to 0/1/2 return value, where 2 indicates any value apart from
|
||||
// 0 or 1.
|
||||
// TODO: Switch CmpValue to int64_t in the API to avoid this.
|
||||
if (Value == 0 || Value == 1)
|
||||
return Value;
|
||||
return 2;
|
||||
};
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
|
@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
|||
SrcReg = MI.getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
// FIXME: In order to convert CmpValue to 0 or 1
|
||||
CmpValue = MI.getOperand(2).getImm() != 0;
|
||||
CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
|
||||
return true;
|
||||
case AArch64::ANDSWri:
|
||||
case AArch64::ANDSXri:
|
||||
|
@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
|||
SrcReg = MI.getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
// FIXME:The return val type of decodeLogicalImmediate is uint64_t,
|
||||
// while the type of CmpValue is int. When converting uint64_t to int,
|
||||
// the high 32 bits of uint64_t will be lost.
|
||||
// In fact it causes a bug in spec2006-483.xalancbmk
|
||||
// CmpValue is only used to compare with zero in OptimizeCompareInstr
|
||||
CmpValue = AArch64_AM::decodeLogicalImmediate(
|
||||
CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
|
||||
MI.getOperand(2).getImm(),
|
||||
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
|
||||
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
|
|||
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
|
||||
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
|
||||
|
||||
// Continue only if we have a "ri" where immediate is zero.
|
||||
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
|
||||
// function.
|
||||
assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
|
||||
// Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
|
||||
assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
|
||||
"CmpValue must be 0, 1, or 2!");
|
||||
if (SrcReg2 != 0)
|
||||
return false;
|
||||
|
||||
|
@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
|
|||
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
|
||||
return false;
|
||||
|
||||
if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
|
||||
if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
|
||||
return true;
|
||||
return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
|
||||
return (CmpValue == 0 || CmpValue == 1) &&
|
||||
removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
|
||||
}
|
||||
|
||||
/// Get opcode of S version of Instr.
|
||||
|
|
|
@ -307,3 +307,42 @@ body: |
|
|||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: subswr_wrong_cmp_value
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: subswr_wrong_cmp_value
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
|
||||
; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
|
||||
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
|
||||
; CHECK: Bcc 1, %bb.2, implicit $nzcv
|
||||
; CHECK: B %bb.1
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: B %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
liveins: $x1
|
||||
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
%1:gpr64common = COPY $x1
|
||||
%2:gpr64 = IMPLICIT_DEF
|
||||
%3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
|
||||
%4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
%5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
|
||||
Bcc 1, %bb.2, implicit $nzcv
|
||||
B %bb.1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2(0x80000000)
|
||||
B %bb.2
|
||||
|
||||
bb.2:
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define void @test(i8 %arg) nounwind {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: and w8, w0, #0xff
|
||||
; CHECK-NEXT: cmp w8, #1
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: cmp w0, #3
|
||||
; CHECK-NEXT: strb w0, [sp, #12]
|
||||
; CHECK-NEXT: b.eq .LBB0_2
|
||||
; CHECK-NEXT: // %bb.1: // %do_call
|
||||
; CHECK-NEXT: bl unknown
|
||||
; CHECK-NEXT: .LBB0_2: // %common.ret
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%tmp = alloca i8
|
||||
%cmp1 = icmp ne i8 %arg, 1
|
||||
%zext = zext i1 %cmp1 to i8
|
||||
store i8 %zext, i8* %tmp
|
||||
%zext2 = load i8, i8* %tmp
|
||||
%cmp2 = icmp eq i8 %zext2, 3
|
||||
br i1 %cmp2, label %exit, label %do_call
|
||||
|
||||
do_call:
|
||||
call void @unknown(i8 %zext2)
|
||||
ret void
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @unknown(i8)
|
Loading…
Reference in New Issue