forked from OSchip/llvm-project
[TargetLower] Update shouldFormOverflowOp check if math is used.
On some targets, like SPARC, forming overflow ops is only profitable if the math result is used: https://godbolt.org/z/DxSmdB This patch adds a new MathUsed parameter to allow the targets to make the decision and defaults to only allowing it if the math result is used. That is the conservative choice. This patch also updates AArch64ISelLowering, X86ISelLowering, ARMISelLowering.h, SystemZISelLowering.h to allow forming overflow ops if the math result is not used. On those targets using the overflow intrinsic for the overflow check only generates better code. Reviewers: nikic, RKSimon, lebedev.ri, spatel Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D74722
This commit is contained in:
parent
63236078d2
commit
216afd3301
|
@ -2676,17 +2676,21 @@ public:
|
|||
/// node operation. Targets may want to override this independently of whether
|
||||
/// the operation is legal/custom for the given type because it may obscure
|
||||
/// matching of other patterns.
|
||||
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
|
||||
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool MathUsed) const {
|
||||
// TODO: The default logic is inherited from code in CodeGenPrepare.
|
||||
// The opcode should not make a difference by default?
|
||||
if (Opcode != ISD::UADDO)
|
||||
return false;
|
||||
|
||||
// Allow the transform as long as we have an integer type that is not
|
||||
// obviously illegal and unsupported.
|
||||
// obviously illegal and unsupported and if the math result is used
|
||||
// besides the overflow check. On some targets (e.g. SPARC), it is
|
||||
// not profitable to form on overflow op if the math result has no
|
||||
// concrete users.
|
||||
if (VT.isVector())
|
||||
return false;
|
||||
return VT.isSimple() || !isOperationExpand(Opcode, VT);
|
||||
return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
|
||||
}
|
||||
|
||||
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
|
||||
|
|
|
@ -1272,7 +1272,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
|
|||
return false;
|
||||
|
||||
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
|
||||
TLI->getValueType(*DL, Add->getType())))
|
||||
TLI->getValueType(*DL, Add->getType()),
|
||||
Add->hasNUsesOrMore(2)))
|
||||
return false;
|
||||
|
||||
// We don't want to move around uses of condition values this late, so we
|
||||
|
@ -1339,7 +1340,8 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
|
|||
return false;
|
||||
|
||||
if (!TLI->shouldFormOverflowOp(ISD::USUBO,
|
||||
TLI->getValueType(*DL, Sub->getType())))
|
||||
TLI->getValueType(*DL, Sub->getType()),
|
||||
Sub->hasNUsesOrMore(2)))
|
||||
return false;
|
||||
|
||||
if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
|
||||
|
|
|
@ -471,6 +471,13 @@ public:
|
|||
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
unsigned Index) const override;
|
||||
|
||||
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool MathUsed) const override {
|
||||
// Using overflow ops for overflow checks only should beneficial on
|
||||
// AArch64.
|
||||
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
|
||||
}
|
||||
|
||||
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
||||
AtomicOrdering Ord) const override;
|
||||
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
||||
|
|
|
@ -524,6 +524,12 @@ class VectorType;
|
|||
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
unsigned Index) const override;
|
||||
|
||||
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool MathUsed) const override {
|
||||
// Using overflow ops for overflow checks only should beneficial on ARM.
|
||||
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
|
||||
}
|
||||
|
||||
/// Returns true if an argument of type Ty needs to be passed in a
|
||||
/// contiguous block of registers in calling convention CallConv.
|
||||
bool functionArgumentNeedsConsecutiveRegisters(
|
||||
|
|
|
@ -439,6 +439,14 @@ public:
|
|||
bool *Fast) const override;
|
||||
bool isTruncateFree(Type *, Type *) const override;
|
||||
bool isTruncateFree(EVT, EVT) const override;
|
||||
|
||||
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool MathUsed) const override {
|
||||
// Using overflow ops for overflow checks only should beneficial on
|
||||
// SystemZ.
|
||||
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
|
||||
}
|
||||
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
|
|
|
@ -5168,7 +5168,8 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
|
|||
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
|
||||
}
|
||||
|
||||
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
|
||||
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool) const {
|
||||
// TODO: Allow vectors?
|
||||
if (VT.isVector())
|
||||
return false;
|
||||
|
|
|
@ -1150,7 +1150,8 @@ namespace llvm {
|
|||
/// Overflow nodes should get combined/lowered to optimal instructions
|
||||
/// (they should allow eliminating explicit compares by getting flags from
|
||||
/// math ops).
|
||||
bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
|
||||
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
|
||||
bool MathUsed) const override;
|
||||
|
||||
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
|
||||
unsigned AddrSpace) const override {
|
||||
|
|
|
@ -10,10 +10,9 @@ target triple = "sparc64-unknown-linux"
|
|||
|
||||
define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
|
||||
; CHECK-LABEL: @uaddo1_overflow_used(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
|
||||
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
|
||||
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[A]]
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: ret i64 [[Q]]
|
||||
;
|
||||
%add = add i64 %b, %a
|
||||
|
@ -40,10 +39,9 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
|
|||
|
||||
define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
|
||||
; CHECK-LABEL: @uaddo2_overflow_used(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
|
||||
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
|
||||
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[B]]
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: ret i64 [[Q]]
|
||||
;
|
||||
%add = add i64 %b, %a
|
||||
|
@ -70,10 +68,9 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
|
|||
|
||||
define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
|
||||
; CHECK-LABEL: @uaddo3_overflow_used(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
|
||||
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
|
||||
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B]], [[ADD]]
|
||||
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
|
||||
; CHECK-NEXT: ret i64 [[Q]]
|
||||
;
|
||||
%add = add i64 %b, %a
|
||||
|
|
Loading…
Reference in New Issue