[X86] Artificially lower the complexity of the scalar ANDN patterns so that AND with immediate will match first.

This allows the immediate to folded into the and instead of being forced to move into a register. This can sometimes result in shorter encodings since the and can sign extend an immediate. This also allows us to match an and to a movzx after a not. This can cause an extra move if the input to the separate NOT has an additional user which requires a copy before the NOT. llvm-svn: 324260
2018-02-05 18:31:04 +00:00 · 2018-02-05 18:31:04 +00:00 · 9a06f24704
parent c0f116e060
commit 9a06f24704
4 changed files with 27 additions and 22 deletions
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@ -1285,12 +1285,13 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
           Sched<[WriteALULd, ReadAfterLd]>;
 }

-let Predicates = [HasBMI], Defs = [EFLAGS] in {
+// Complexity is reduced to give and with immediate a chance to match first.
+let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
  defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V;
  defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W;
 }

-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI], AddedComplexity = -6 in {
  def : Pat<(and (not GR32:$src1), GR32:$src2),
            (ANDN32rr GR32:$src1, GR32:$src2)>;
  def : Pat<(and (not GR64:$src1), GR64:$src2),
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@ -6687,9 +6687,8 @@ define i32 @mask16_zext(i16 %x) {
 ;
 ; SKX-LABEL: mask16_zext:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SKX-NEXT:    # sched: [1:0.25]
-; SKX-NEXT:    andnl %eax, %edi, %eax # sched: [1:0.50]
+; SKX-NEXT:    notl %edi # sched: [1:0.25]
+; SKX-NEXT:    movzwl %di, %eax # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@ -188,8 +188,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
 define i1 @and_cmp_const(i32 %x) {
 ; CHECK-LABEL: and_cmp_const:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $43, %eax
-; CHECK-NEXT:    andnl %eax, %edi, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    andl $43, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
  %and = and i32 %x, 43
--- a/llvm/test/CodeGen/X86/pr32282.ll
+++ b/llvm/test/CodeGen/X86/pr32282.ll
@ -12,24 +12,28 @@
 define void @foo() {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    movl d, %eax
-; X86-NEXT:    movl d+4, %ecx
-; X86-NEXT:    movl $701685459, %edx # imm = 0x29D2DED3
-; X86-NEXT:    andnl %edx, %ecx, %ecx
-; X86-NEXT:    movl $-564453154, %edx # imm = 0xDE5B20DE
-; X86-NEXT:    andnl %edx, %eax, %edx
-; X86-NEXT:    shrdl $21, %ecx, %edx
-; X86-NEXT:    shrl $21, %ecx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl d, %ecx
+; X86-NEXT:    notl %ecx
+; X86-NEXT:    movl d+4, %edx
+; X86-NEXT:    notl %edx
+; X86-NEXT:    andl $701685459, %edx # imm = 0x29D2DED3
+; X86-NEXT:    andl $-564453154, %ecx # imm = 0xDE5B20DE
+; X86-NEXT:    shrdl $21, %edx, %ecx
+; X86-NEXT:    shrl $21, %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    testb %al, %al
-; X86-NEXT:    cmovnel %ecx, %edx
-; X86-NEXT:    cmovnel %eax, %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    cmovnel %eax, %esi
+; X86-NEXT:    cmovel %ecx, %edx
 ; X86-NEXT:    andl $-2, %edx
 ; X86-NEXT:    addl $7, %edx
-; X86-NEXT:    adcxl %eax, %ecx
-; X86-NEXT:    pushl %ecx
+; X86-NEXT:    adcxl %eax, %esi
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl %edx
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
@ -37,12 +41,13 @@ define void @foo() {
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    .cfi_adjust_cfa_offset 4
-; X86-NEXT:    calll __divdi3
+; X86-NEXT:    calll __divdi3@PLT
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    .cfi_adjust_cfa_offset -16
 ; X86-NEXT:    orl %eax, %edx
 ; X86-NEXT:    setne {{[0-9]+}}(%esp)
-; X86-NEXT:    popl %eax
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: foo: