forked from OSchip/llvm-project
[X86][Codegen] Shift amount mod: sh? i64 x, (32-y) --> sh? i64 x, -(y+32)
I've seen this in the RawSpeed's BitPumpMSB*::push() hotpath, after fixing the buffer abstraction to a more sane one, when looking into a +5% runtime regression. I was hoping that this would fix it, but it does not look it does. This seems to be at least not worse than the original pattern. But i'm actually mainly interested in the case where we already compute `(y+32)` (see last test), https://alive2.llvm.org/ce/z/ZCzJio Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D101944
This commit is contained in:
parent
dc00cbb505
commit
5f78ba001c
|
@ -3854,14 +3854,29 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
|
|||
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
|
||||
// to generate a NEG instead of a SUB of a constant.
|
||||
} else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
|
||||
Add0C->getAPIntValue() != 0 &&
|
||||
Add0C->getAPIntValue().urem(Size) == 0) {
|
||||
Add0C->getZExtValue() != 0) {
|
||||
EVT SubVT = ShiftAmt.getValueType();
|
||||
SDValue X;
|
||||
if (Add0C->getZExtValue() % Size == 0)
|
||||
X = Add1;
|
||||
else if (ShiftAmt.hasOneUse() && Size == 64 &&
|
||||
Add0C->getZExtValue() % 32 == 0) {
|
||||
// We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
|
||||
// This is mainly beneficial if we already compute (x+n*32).
|
||||
if (Add1.getOpcode() == ISD::TRUNCATE) {
|
||||
Add1 = Add1.getOperand(0);
|
||||
SubVT = Add1.getValueType();
|
||||
}
|
||||
X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1,
|
||||
CurDAG->getZExtOrTrunc(Add0, DL, SubVT));
|
||||
insertDAGNode(*CurDAG, OrigShiftAmt, X);
|
||||
} else
|
||||
return false;
|
||||
// Insert a negate op.
|
||||
// TODO: This isn't guaranteed to replace the sub if there is a logic cone
|
||||
// that uses it that's not a shift.
|
||||
EVT SubVT = ShiftAmt.getValueType();
|
||||
SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
|
||||
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
|
||||
SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
|
||||
NewShiftAmt = Neg;
|
||||
|
||||
// Insert these operands into a valid topological order so they can
|
||||
|
|
|
@ -8,16 +8,17 @@ define i64 @t0(i64 %val, i64 %shamt) nounwind {
|
|||
; X64-NOBMI2-LABEL: t0:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: movb $32, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
|
||||
; X64-NOBMI2-NEXT: negq %rcx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: t0:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $32, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
|
||||
; X64-BMI2-NEXT: addq $32, %rsi
|
||||
; X64-BMI2-NEXT: negq %rsi
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
;
|
||||
; X32-NOBMI2-LABEL: t0:
|
||||
|
@ -228,16 +229,17 @@ define i64 @t4(i64 %val, i64 %shamt) nounwind {
|
|||
; X64-NOBMI2-LABEL: t4:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: movb $96, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: leaq 96(%rsi), %rcx
|
||||
; X64-NOBMI2-NEXT: negq %rcx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: t4:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: movb $96, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
|
||||
; X64-BMI2-NEXT: addq $96, %rsi
|
||||
; X64-BMI2-NEXT: negq %rsi
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
;
|
||||
; X32-NOBMI2-LABEL: t4:
|
||||
|
@ -282,21 +284,21 @@ define i64 @t4(i64 %val, i64 %shamt) nounwind {
|
|||
define i64 @t5_cse(i64 %val, i64 %shamt, i64*%dst) nounwind {
|
||||
; X64-NOBMI2-LABEL: t5_cse:
|
||||
; X64-NOBMI2: # %bb.0:
|
||||
; X64-NOBMI2-NEXT: movq %rsi, %rcx
|
||||
; X64-NOBMI2-NEXT: movq %rdi, %rax
|
||||
; X64-NOBMI2-NEXT: leaq 32(%rsi), %rcx
|
||||
; X64-NOBMI2-NEXT: addq $32, %rcx
|
||||
; X64-NOBMI2-NEXT: movq %rcx, (%rdx)
|
||||
; X64-NOBMI2-NEXT: movb $32, %cl
|
||||
; X64-NOBMI2-NEXT: subb %sil, %cl
|
||||
; X64-NOBMI2-NEXT: negq %rcx
|
||||
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-NOBMI2-NEXT: shlq %cl, %rax
|
||||
; X64-NOBMI2-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: t5_cse:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: leaq 32(%rsi), %rax
|
||||
; X64-BMI2-NEXT: movq %rax, (%rdx)
|
||||
; X64-BMI2-NEXT: movb $32, %al
|
||||
; X64-BMI2-NEXT: subb %sil, %al
|
||||
; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax
|
||||
; X64-BMI2-NEXT: addq $32, %rsi
|
||||
; X64-BMI2-NEXT: movq %rsi, (%rdx)
|
||||
; X64-BMI2-NEXT: negq %rsi
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: retq
|
||||
;
|
||||
; X32-NOBMI2-LABEL: t5_cse:
|
||||
|
|
Loading…
Reference in New Issue