[x86] try to widen 'shl' as part of LEA formation

The test file has pairs of tests that are logically equivalent:
https://rise4fun.com/Alive/2zQ

%t4 = and i8 %t1, 8
%t5 = zext i8 %t4 to i16
%sh = shl i16 %t5, 2
%t6 = add i16 %sh, %t0
=>
%t4 = and i8 %t1, 8
%sh2 = shl i8 %t4, 2
%z5 = zext i8 %sh2 to i16
%t6 = add i16 %z5, %t0

...so if we can fold the shift op into LEA in the 1st pattern, then we
should be able to do the same in the 2nd pattern (unnecessary 'movzbl'
is a separate bug I think).

We don't want to do this any sooner though because that would conflict
with generic transforms that try to narrow the width of the shift.

Differential Revision: https://reviews.llvm.org/D60789

llvm-svn: 358622
This commit is contained in:
Sanjay Patel 2019-04-17 22:38:51 +00:00
parent 4c177038e0
commit fb363a778f
2 changed files with 44 additions and 8 deletions

View File

@ -1906,6 +1906,42 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
}
case ISD::ZERO_EXTEND: {
// Try to widen a zexted shift left to the same size as its use, so we can
// match the shift as a scale factor.
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
break;
if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
break;
// Give up if the shift is not a valid scale factor [1,2,3].
SDValue Shl = N.getOperand(0);
auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
if (!ShAmtC || ShAmtC->getZExtValue() > 3)
break;
// The narrow shift must only shift out zero bits (it must be 'nuw').
// That makes it safe to widen to the destination type.
APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
ShAmtC->getZExtValue());
if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
break;
// zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
// Convert the shift to scale factor.
AM.Scale = 1 << ShAmtC->getZExtValue();
AM.IndexReg = Zext;
insertDAGNode(*CurDAG, N, Zext);
insertDAGNode(*CurDAG, N, NewShl);
CurDAG->ReplaceAllUsesWith(N, NewShl);
return false;
}
}
return matchAddressBase(N, AM);

View File

@ -21,10 +21,10 @@ define i16 @and_i8_zext_shl_add_i16(i16 %t0, i8 %t1) {
define i16 @and_i8_shl_zext_add_i16(i16 %t0, i8 %t1) {
; CHECK-LABEL: and_i8_shl_zext_add_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: andb $8, %sil
; CHECK-NEXT: shlb $2, %sil
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: leal (%rdi,%rax,4), %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t4 = and i8 %t1, 8
@ -52,10 +52,10 @@ define i32 @and_i8_zext_shl_add_i32(i32 %t0, i8 %t1) {
define i32 @and_i8_shl_zext_add_i32(i32 %t0, i8 %t1) {
; CHECK-LABEL: and_i8_shl_zext_add_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: andb $8, %sil
; CHECK-NEXT: shlb $3, %sil
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: leal (%rdi,%rax,8), %eax
; CHECK-NEXT: retq
%t4 = and i8 %t1, 8
%sh = shl i8 %t4, 3
@ -112,9 +112,8 @@ define i64 @and_i8_shl_zext_add_i64(i64 %t0, i8 %t1) {
; CHECK-LABEL: and_i8_shl_zext_add_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $8, %sil
; CHECK-NEXT: addb %sil, %sil
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: leaq (%rdi,%rax,2), %rax
; CHECK-NEXT: retq
%t4 = and i8 %t1, 8
%sh = shl i8 %t4, 1
@ -142,8 +141,7 @@ define i64 @and_i32_shl_zext_add_i64(i64 %t0, i32 %t1) {
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
; CHECK-NEXT: andl $8, %esi
; CHECK-NEXT: leal (,%rsi,8), %eax
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: leaq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
%t4 = and i32 %t1, 8
%sh = shl i32 %t4, 3
@ -152,6 +150,8 @@ define i64 @and_i32_shl_zext_add_i64(i64 %t0, i32 %t1) {
ret i64 %t6
}
; Negative test - shift can't be converted to scale factor.
define i64 @and_i32_zext_shl_add_i64_overshift(i64 %t0, i32 %t1) {
; CHECK-LABEL: and_i32_zext_shl_add_i64_overshift:
; CHECK: # %bb.0: