[X86] Promote i16 SRA instructions to i32

We already promote SRL and SHL to i32. This will introduce sign extends sometimes which might be harder to deal with than the zero we use for promoting SRL. I ran this through some of our internal benchmark lists and didn't see any major regressions. I think there might be some DAG combine improvement opportunities in the test changes here. Differential Revision: https://reviews.llvm.org/D60278 llvm-svn: 357743
2019-04-05 06:32:50 +00:00 · 2019-04-05 06:32:50 +00:00 · 94f1772b1e
parent c39636cc2c
commit 94f1772b1e
6 changed files with 33 additions and 26 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -42796,6 +42796,7 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
    case ISD::ZERO_EXTEND:
    case ISD::ANY_EXTEND:
    case ISD::SHL:
+    case ISD::SRA:
    case ISD::SRL:
    case ISD::SUB:
    case ISD::ADD:
@ -42871,6 +42872,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
  case ISD::ANY_EXTEND:
    break;
  case ISD::SHL:
+  case ISD::SRA:
  case ISD::SRL: {
    SDValue N0 = Op.getOperand(0);
    // Look out for (store (shl (load), x)).
--- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
@ -108,8 +108,9 @@ entry:
 define i64 @fun8(i16 zeroext %v) {
 ; CHECK-LABEL: fun8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sarw $4, %di
-; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    shrl $4, %eax
+; CHECK-NEXT:    movzwl %ax, %eax
 ; CHECK-NEXT:    shlq $4, %rax
 ; CHECK-NEXT:    retq
 entry:
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@ -37,9 +37,9 @@ define i8 @test_i8(i8 %a) nounwind {
 define i16 @test_i16(i16 %a) nounwind {
 ; X86-NO-CMOV-LABEL: test_i16:
 ; X86-NO-CMOV:       # %bb.0:
-; X86-NO-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-CMOV-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
-; X86-NO-CMOV-NEXT:    sarw $15, %cx
+; X86-NO-CMOV-NEXT:    sarl $15, %ecx
 ; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    # kill: def $ax killed $ax killed $eax
--- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
+++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
@ -297,16 +297,16 @@ define <8 x i16> @ashr_op0_constant(i16* %p) nounwind {
 ; SSE-LABEL: ashr_op0_constant:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movb (%rdi), %cl
-; SSE-NEXT:    movw $-42, %ax
-; SSE-NEXT:    sarw %cl, %ax
+; SSE-NEXT:    movl $-42, %eax
+; SSE-NEXT:    sarl %cl, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ashr_op0_constant:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movb (%rdi), %cl
-; AVX-NEXT:    movw $-42, %ax
-; AVX-NEXT:    sarw %cl, %ax
+; AVX-NEXT:    movl $-42, %eax
+; AVX-NEXT:    sarl %cl, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
  %x = load i16, i16* %p
@ -318,15 +318,15 @@ define <8 x i16> @ashr_op0_constant(i16* %p) nounwind {
 define <8 x i16> @ashr_op1_constant(i16* %p) nounwind {
 ; SSE-LABEL: ashr_op1_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzwl (%rdi), %eax
-; SSE-NEXT:    sarw $7, %ax
+; SSE-NEXT:    movswl (%rdi), %eax
+; SSE-NEXT:    sarl $7, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ashr_op1_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzwl (%rdi), %eax
-; AVX-NEXT:    sarw $7, %ax
+; AVX-NEXT:    movswl (%rdi), %eax
+; AVX-NEXT:    sarl $7, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
  %x = load i16, i16* %p
@ -365,10 +365,11 @@ define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind {
 ; SSE-NEXT:    shrl $16, %ecx
 ; SSE-NEXT:    addl %eax, %ecx
 ; SSE-NEXT:    movzwl %cx, %eax
-; SSE-NEXT:    sarw $5, %cx
+; SSE-NEXT:    movswl %ax, %ecx
 ; SSE-NEXT:    shrl $15, %eax
-; SSE-NEXT:    addl %ecx, %eax
-; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    sarl $5, %ecx
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    movd %ecx, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sdiv_op1_constant:
@ -378,10 +379,11 @@ define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind {
 ; AVX-NEXT:    shrl $16, %ecx
 ; AVX-NEXT:    addl %eax, %ecx
 ; AVX-NEXT:    movzwl %cx, %eax
-; AVX-NEXT:    sarw $5, %cx
+; AVX-NEXT:    movswl %ax, %ecx
 ; AVX-NEXT:    shrl $15, %eax
-; AVX-NEXT:    addl %ecx, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    sarl $5, %ecx
+; AVX-NEXT:    addl %eax, %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
 ; AVX-NEXT:    retq
  %x = load i16, i16* %p
  %b = sdiv i16 %x, 42
--- a/llvm/test/CodeGen/X86/pr32420.ll
+++ b/llvm/test/CodeGen/X86/pr32420.ll
@ -14,12 +14,14 @@ define i32 @PR32420() {
 ; CHECK-NEXT:    movzwl (%rcx), %eax
 ; CHECK-NEXT:    movl %eax, %edx
 ; CHECK-NEXT:    shll $12, %edx
-; CHECK-NEXT:    sarw $12, %dx
+; CHECK-NEXT:    movswl %dx, %edx
+; CHECK-NEXT:    shrl $12, %edx
 ; CHECK-NEXT:    movq _b@{{.*}}(%rip), %rsi
 ; CHECK-NEXT:    orw (%rsi), %dx
 ; CHECK-NEXT:    movl (%rcx), %ecx
 ; CHECK-NEXT:    shll $12, %ecx
-; CHECK-NEXT:    sarw $12, %cx
+; CHECK-NEXT:    movswl %cx, %ecx
+; CHECK-NEXT:    shrl $12, %ecx
 ; CHECK-NEXT:    andl %edx, %ecx
 ; CHECK-NEXT:    movw %cx, (%rsi)
 ; CHECK-NEXT:    retq
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@ -1045,10 +1045,10 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind sp
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    cmpq $.Lslh_ret_addr23, %rcx
 ; X64-NEXT:    cmovneq %r15, %rax
-; X64-NEXT:    movzwl (%rbx), %ecx
-; X64-NEXT:    sarw $7, %cx
-; X64-NEXT:    movzwl %cx, %edi
+; X64-NEXT:    movswl (%rbx), %edi
+; X64-NEXT:    shrl $7, %edi
 ; X64-NEXT:    notl %edi
+; X64-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-NEXT:    orl %eax, %edi
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
@ -1098,10 +1098,10 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind sp
 ; X64-LFENCE-NEXT:    movl (%rbx), %edi
 ; X64-LFENCE-NEXT:    shll $7, %edi
 ; X64-LFENCE-NEXT:    callq sink
-; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
-; X64-LFENCE-NEXT:    sarw $7, %ax
-; X64-LFENCE-NEXT:    movzwl %ax, %edi
+; X64-LFENCE-NEXT:    movswl (%rbx), %edi
+; X64-LFENCE-NEXT:    shrl $7, %edi
 ; X64-LFENCE-NEXT:    notl %edi
+; X64-LFENCE-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-LFENCE-NEXT:    callq sink
 ; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
 ; X64-LFENCE-NEXT:    rolw $9, %ax