forked from OSchip/llvm-project
[x86] improve CMOV codegen by pushing add into operands, part 3
In this episode, we are trying to avoid an x86 micro-arch quirk where complex (3 operand) LEA potentially costs significantly more than simple LEA. So we simultaneously push and pull the math around the CMOV to balance the operations. I looked at the debug spew during instruction selection and decided against trying a later DAGToDAG transform -- it seems very difficult to match if the trailing memops are already selected and managing the creation of extra instructions at that level is always tricky. Differential Revision: https://reviews.llvm.org/D106918
This commit is contained in:
parent
960cb490dd
commit
4c41caa287
|
@ -49961,11 +49961,34 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
|
|||
if (!isSuitableCmov(Cmov))
|
||||
return SDValue();
|
||||
|
||||
// add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
SDValue FalseOp = Cmov.getOperand(0);
|
||||
SDValue TrueOp = Cmov.getOperand(1);
|
||||
|
||||
// We will push the add through the select, but we can potentially do better
|
||||
// if we know there is another add in the sequence and this is pointer math.
|
||||
// In that case, we can absorb an add into the trailing memory op and avoid
|
||||
// a 3-operand LEA which is likely slower than a 2-operand LEA.
|
||||
// TODO: If target has "slow3OpsLEA", do this even without the trailing memop?
|
||||
if (OtherOp.getOpcode() == ISD::ADD && OtherOp.hasOneUse() &&
|
||||
!isa<ConstantSDNode>(OtherOp.getOperand(0)) &&
|
||||
all_of(N->uses(), [&](SDNode *Use) {
|
||||
auto *MemNode = dyn_cast<MemSDNode>(Use);
|
||||
return MemNode && MemNode->getBasePtr().getNode() == N;
|
||||
})) {
|
||||
// add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y
|
||||
// TODO: We are arbitrarily choosing op0 as the 1st piece of the sum, but
|
||||
// it is possible that choosing op1 might be better.
|
||||
SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1);
|
||||
FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp);
|
||||
TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp);
|
||||
Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp,
|
||||
Cmov.getOperand(2), Cmov.getOperand(3));
|
||||
return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y);
|
||||
}
|
||||
|
||||
// add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
|
||||
FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
|
||||
TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
|
||||
return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
|
||||
|
|
|
@ -279,11 +279,11 @@ define void @bullet_load_store(i32 %x, i64 %y, %class.btAxis* %p) {
|
|||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq (%rsi,%rsi,4), %rax
|
||||
; CHECK-NEXT: shlq $4, %rax
|
||||
; CHECK-NEXT: leaq 66(%rdx), %rcx
|
||||
; CHECK-NEXT: addq $60, %rdx
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: leaq 60(%rdx,%rax), %rcx
|
||||
; CHECK-NEXT: leaq 66(%rdx,%rax), %rax
|
||||
; CHECK-NEXT: cmoveq %rcx, %rax
|
||||
; CHECK-NEXT: decw (%rax)
|
||||
; CHECK-NEXT: cmovneq %rcx, %rdx
|
||||
; CHECK-NEXT: decw (%rdx,%rax)
|
||||
; CHECK-NEXT: retq
|
||||
%and = and i32 %x, 1
|
||||
%b = icmp eq i32 %and, 0
|
||||
|
@ -299,11 +299,11 @@ define void @bullet_load_store(i32 %x, i64 %y, %class.btAxis* %p) {
|
|||
define void @complex_lea_alt1(i1 %b, i16* readnone %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: complex_lea_alt1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq 60(%rdx,%rsi), %rax
|
||||
; CHECK-NEXT: leaq 66(%rdx,%rsi), %rcx
|
||||
; CHECK-NEXT: leaq 60(%rdx), %rax
|
||||
; CHECK-NEXT: addq $66, %rdx
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: cmovneq %rax, %rcx
|
||||
; CHECK-NEXT: decw (%rcx)
|
||||
; CHECK-NEXT: cmovneq %rax, %rdx
|
||||
; CHECK-NEXT: decw (%rdx,%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%i = ptrtoint i16* %ptr to i64
|
||||
%sum = add i64 %idx, %i
|
||||
|
@ -320,11 +320,11 @@ define void @complex_lea_alt1(i1 %b, i16* readnone %ptr, i64 %idx) {
|
|||
define void @complex_lea_alt2(i1 %b, i16* readnone %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: complex_lea_alt2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq 60(%rsi,%rdx), %rax
|
||||
; CHECK-NEXT: leaq 66(%rsi,%rdx), %rcx
|
||||
; CHECK-NEXT: leaq 60(%rsi), %rax
|
||||
; CHECK-NEXT: addq $66, %rsi
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: cmovneq %rax, %rcx
|
||||
; CHECK-NEXT: decw (%rcx)
|
||||
; CHECK-NEXT: cmovneq %rax, %rsi
|
||||
; CHECK-NEXT: decw (%rsi,%rdx)
|
||||
; CHECK-NEXT: retq
|
||||
%i = ptrtoint i16* %ptr to i64
|
||||
%sum = add i64 %i, %idx
|
||||
|
@ -433,11 +433,11 @@ define void @complex_lea_alt6(i1 %b, i16* readnone %ptr, i64 %idx) {
|
|||
define void @complex_lea_alt7(i1 %b, i16* readnone %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: complex_lea_alt7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq 60(%rdx,%rsi), %rax
|
||||
; CHECK-NEXT: leaq 66(%rdx,%rsi), %rcx
|
||||
; CHECK-NEXT: leaq 60(%rdx), %rax
|
||||
; CHECK-NEXT: addq $66, %rdx
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: cmovneq %rax, %rcx
|
||||
; CHECK-NEXT: decw (%rcx)
|
||||
; CHECK-NEXT: cmovneq %rax, %rdx
|
||||
; CHECK-NEXT: decw (%rdx,%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%i = ptrtoint i16* %ptr to i64
|
||||
%o = add i64 %idx, %i
|
||||
|
@ -455,11 +455,11 @@ define void @complex_lea_alt7(i1 %b, i16* readnone %ptr, i64 %idx) {
|
|||
define void @complex_lea_alt8(i1 %b, i16* readnone %ptr, i64 %idx) {
|
||||
; CHECK-LABEL: complex_lea_alt8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: leaq 60(%rsi,%rdx), %rax
|
||||
; CHECK-NEXT: leaq 66(%rsi,%rdx), %rcx
|
||||
; CHECK-NEXT: leaq 60(%rsi), %rax
|
||||
; CHECK-NEXT: addq $66, %rsi
|
||||
; CHECK-NEXT: testb $1, %dil
|
||||
; CHECK-NEXT: cmovneq %rax, %rcx
|
||||
; CHECK-NEXT: decw (%rcx)
|
||||
; CHECK-NEXT: cmovneq %rax, %rsi
|
||||
; CHECK-NEXT: decw (%rsi,%rdx)
|
||||
; CHECK-NEXT: retq
|
||||
%i = ptrtoint i16* %ptr to i64
|
||||
%o = add i64 %i, %idx
|
||||
|
|
Loading…
Reference in New Issue