forked from OSchip/llvm-project
X86ISelDAGToDAG: Transform TEST + MOV64ri to SHR + TEST
Optimize a pattern where a sequence of 8/16 or 32 bits is tested for zero: LLVM normalizes this towards and `AND` with mask which is usually good, but does not work well on X86 when the mask does not fit into a 64bit register. This DagToDAG peephole transforms sequences like: ``` movabsq $562941363486720, %rax # imm = 0x1FFFE00000000 testq %rax, %rdi ``` to ``` shrq $33, %rdi testw %di, %di ``` The result has a shorter encoding and saves a register if the tested value isn't used otherwise. Differential Revision: https://reviews.llvm.org/D121320
This commit is contained in:
parent
baae814377
commit
84ef62126a
|
@ -5621,12 +5621,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
onlyUsesZeroFlag(SDValue(Node, 0))) {
|
||||
unsigned ShiftOpcode = ISD::DELETED_NODE;
|
||||
unsigned ShiftAmt;
|
||||
if (isMask_64(~Mask)) {
|
||||
ShiftOpcode = X86::SHR64ri;
|
||||
ShiftAmt = countTrailingZeros(Mask);
|
||||
} else if (isMask_64(Mask)) {
|
||||
ShiftOpcode = X86::SHL64ri;
|
||||
ShiftAmt = countLeadingZeros(Mask);
|
||||
unsigned SubRegIdx;
|
||||
MVT SubRegVT;
|
||||
unsigned TestOpcode;
|
||||
if (isShiftedMask_64(Mask)) {
|
||||
unsigned LeadingZeros = countLeadingZeros(Mask);
|
||||
unsigned TrailingZeros = countTrailingZeros(Mask);
|
||||
// If the mask covers the most significant bit, then we can replace
|
||||
// TEST+AND with a SHR and check eflags.
|
||||
// This emits a redundant TEST which is subsequently eliminated.
|
||||
if (LeadingZeros == 0) {
|
||||
ShiftOpcode = X86::SHR64ri;
|
||||
ShiftAmt = TrailingZeros;
|
||||
SubRegIdx = 0;
|
||||
TestOpcode = X86::TEST64rr;
|
||||
// If the mask covers the least signifcant bit, then we can replace
|
||||
// TEST+AND with a SHL and check eflags.
|
||||
// This emits a redundant TEST which is subsequently eliminated.
|
||||
} else if (TrailingZeros == 0) {
|
||||
ShiftOpcode = X86::SHL64ri;
|
||||
ShiftAmt = LeadingZeros;
|
||||
SubRegIdx = 0;
|
||||
TestOpcode = X86::TEST64rr;
|
||||
} else if (MaskC->hasOneUse()) {
|
||||
// If the mask is 8/16 or 32bits wide, then we can replace it with
|
||||
// a SHR and a TEST8rr/TEST16rr/TEST32rr.
|
||||
unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
|
||||
if (PopCount == 8) {
|
||||
ShiftOpcode = X86::SHR64ri;
|
||||
ShiftAmt = TrailingZeros;
|
||||
SubRegIdx = X86::sub_8bit;
|
||||
SubRegVT = MVT::i8;
|
||||
TestOpcode = X86::TEST8rr;
|
||||
} else if (PopCount == 16) {
|
||||
ShiftOpcode = X86::SHR64ri;
|
||||
ShiftAmt = TrailingZeros;
|
||||
SubRegIdx = X86::sub_16bit;
|
||||
SubRegVT = MVT::i16;
|
||||
TestOpcode = X86::TEST16rr;
|
||||
} else if (PopCount == 32) {
|
||||
ShiftOpcode = X86::SHR64ri;
|
||||
ShiftAmt = TrailingZeros;
|
||||
SubRegIdx = X86::sub_32bit;
|
||||
SubRegVT = MVT::i32;
|
||||
TestOpcode = X86::TEST32rr;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ShiftOpcode != ISD::DELETED_NODE) {
|
||||
SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
|
||||
|
@ -5634,8 +5674,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
|
||||
N0.getOperand(0), ShiftC),
|
||||
0);
|
||||
if (SubRegIdx != 0) {
|
||||
Shift =
|
||||
CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
|
||||
}
|
||||
MachineSDNode *Test =
|
||||
CurDAG->getMachineNode(X86::TEST64rr, dl, MVT::i32, Shift, Shift);
|
||||
CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
|
||||
ReplaceNode(Node, Test);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -598,9 +598,8 @@ define i32 @lowmask_i32_mask8(i32 %val) {
|
|||
define i1 @shifted_mask64_testb(i64 %a) {
|
||||
; CHECK-LABEL: shifted_mask64_testb:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
|
||||
; CHECK-NEXT: # imm = 0x3FC000000000000
|
||||
; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7]
|
||||
; CHECK-NEXT: shrq $50, %rdi # encoding: [0x48,0xc1,0xef,0x32]
|
||||
; CHECK-NEXT: testb %dil, %dil # encoding: [0x40,0x84,0xff]
|
||||
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
|
||||
|
@ -611,9 +610,8 @@ define i1 @shifted_mask64_testb(i64 %a) {
|
|||
define i1 @shifted_mask64_testw(i64 %a) {
|
||||
; CHECK-LABEL: shifted_mask64_testw:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movabsq $562941363486720, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0xfe,0xff,0x01,0x00]
|
||||
; CHECK-NEXT: # imm = 0x1FFFE00000000
|
||||
; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7]
|
||||
; CHECK-NEXT: shrq $33, %rdi # encoding: [0x48,0xc1,0xef,0x21]
|
||||
; CHECK-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
|
||||
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%v0 = and i64 %a, 562941363486720 ; 0xffff << 33
|
||||
|
@ -624,9 +622,8 @@ define i1 @shifted_mask64_testw(i64 %a) {
|
|||
define i1 @shifted_mask64_testl(i64 %a) {
|
||||
; CHECK-LABEL: shifted_mask64_testl:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movabsq $549755813760, %rax # encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x7f,0x00,0x00,0x00]
|
||||
; CHECK-NEXT: # imm = 0x7FFFFFFF80
|
||||
; CHECK-NEXT: testq %rax, %rdi # encoding: [0x48,0x85,0xc7]
|
||||
; CHECK-NEXT: shrq $7, %rdi # encoding: [0x48,0xc1,0xef,0x07]
|
||||
; CHECK-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
|
||||
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
|
||||
; CHECK-NEXT: retq # encoding: [0xc3]
|
||||
%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7
|
||||
|
|
|
@ -472,16 +472,16 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
|
|||
; X64-BMI1-NEXT: movq %rsi, %rcx
|
||||
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-BMI1-NEXT: shlq %cl, %rdi
|
||||
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
|
||||
; X64-BMI1-NEXT: testq %rax, %rdi
|
||||
; X64-BMI1-NEXT: shrq $16, %rdi
|
||||
; X64-BMI1-NEXT: testl %edi, %edi
|
||||
; X64-BMI1-NEXT: sete %al
|
||||
; X64-BMI1-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
|
||||
; X64-BMI2-NEXT: testq %rcx, %rax
|
||||
; X64-BMI2-NEXT: shrq $16, %rax
|
||||
; X64-BMI2-NEXT: testl %eax, %eax
|
||||
; X64-BMI2-NEXT: sete %al
|
||||
; X64-BMI2-NEXT: retq
|
||||
%t0 = lshr i64 281474976645120, %y
|
||||
|
|
|
@ -435,16 +435,16 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
|
|||
; X64-BMI1-NEXT: movq %rsi, %rcx
|
||||
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; X64-BMI1-NEXT: shrq %cl, %rdi
|
||||
; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
|
||||
; X64-BMI1-NEXT: testq %rax, %rdi
|
||||
; X64-BMI1-NEXT: shrq $16, %rdi
|
||||
; X64-BMI1-NEXT: testl %edi, %edi
|
||||
; X64-BMI1-NEXT: sete %al
|
||||
; X64-BMI1-NEXT: retq
|
||||
;
|
||||
; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
|
||||
; X64-BMI2: # %bb.0:
|
||||
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
|
||||
; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000
|
||||
; X64-BMI2-NEXT: testq %rcx, %rax
|
||||
; X64-BMI2-NEXT: shrq $16, %rax
|
||||
; X64-BMI2-NEXT: testl %eax, %eax
|
||||
; X64-BMI2-NEXT: sete %al
|
||||
; X64-BMI2-NEXT: retq
|
||||
%t0 = shl i64 281474976645120, %y
|
||||
|
|
Loading…
Reference in New Issue