From 33225ef314503cfc0aa744a3199c86e9887e9eec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 20 Jul 2017 06:19:55 +0000 Subject: [PATCH] [X86] Use SARX/SHLX/SHLX instructions for (shift x (and y, (BitWidth-1))) Fixes PR33841. llvm-svn: 308591 --- llvm/lib/Target/X86/X86InstrCompiler.td | 31 ++++++++++++++++++++ llvm/test/CodeGen/X86/pr32329.ll | 2 +- llvm/test/CodeGen/X86/shift-bmi2.ll | 39 ++++++++----------------- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index d003d027ddb9..9d188d3d5e54 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1680,6 +1680,37 @@ multiclass MaskedDoubleShiftAmountPats { defm : MaskedDoubleShiftAmountPats; defm : MaskedDoubleShiftAmountPats; +let Predicates = [HasBMI2] in { + let AddedComplexity = 1 in { + def : Pat<(sra GR32:$src1, (and GR8:$src2, immShift32)), + (SARX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra GR64:$src1, (and GR8:$src2, immShift64)), + (SARX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl GR32:$src1, (and GR8:$src2, immShift32)), + (SHRX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl GR64:$src1, (and GR8:$src2, immShift64)), + (SHRX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl GR32:$src1, (and GR8:$src2, immShift32)), + (SHLX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl GR64:$src1, (and GR8:$src2, immShift64)), + (SHLX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } +} + // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; diff --git a/llvm/test/CodeGen/X86/pr32329.ll b/llvm/test/CodeGen/X86/pr32329.ll index f2b79b67877f..e07340bf301a 100644 --- a/llvm/test/CodeGen/X86/pr32329.ll +++ b/llvm/test/CodeGen/X86/pr32329.ll @@ -52,7 +52,7 @@ define void @foo() local_unnamed_addr { ; X86-NEXT: movl $9, %esi ; X86-NEXT: xorl %ebp, %ebp ; X86-NEXT: shldl %cl, %esi, %ebp -; X86-NEXT: shll %cl, %esi +; X86-NEXT: shlxl %ecx, %esi, %esi ; X86-NEXT: testb $32, %cl ; X86-NEXT: cmovnel %esi, %ebp ; X86-NEXT: movl $0, %ecx diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll index 98f9349274a8..f9bca503218e 100644 --- a/llvm/test/CodeGen/X86/shift-bmi2.ll +++ b/llvm/test/CodeGen/X86/shift-bmi2.ll @@ -213,16 +213,13 @@ define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { define i32 @shl32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: shl32and: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: shll %cl, %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: shl32and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: shll %cl, %esi -; BMI264-NEXT: movl %esi, %eax +; BMI264-NEXT: shlxl %edi, %esi, %eax ; BMI264-NEXT: retq %shamt = and i32 %t, 31 %res = shl i32 %val, %shamt @@ -232,9 +229,7 @@ define i32 @shl32and(i32 %t, i32 %val) nounwind { define i64 @shl64and(i64 %t, i64 %val) nounwind { ; BMI264-LABEL: shl64and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: shlq %cl, %rsi -; BMI264-NEXT: movq %rsi, %rax +; BMI264-NEXT: shlxq %rdi, %rsi, %rax ; BMI264-NEXT: retq %shamt = and i64 %t, 63 %res = shl i64 %val, %shamt @@ -244,16 +239,13 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind { define i32 @lshr32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: lshr32and: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: shrl %cl, %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: lshr32and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: shrl %cl, %esi -; BMI264-NEXT: movl %esi, %eax +; BMI264-NEXT: shrxl %edi, %esi, %eax ; BMI264-NEXT: retq %shamt = and i32 %t, 31 %res = lshr i32 %val, %shamt @@ -263,9 +255,7 @@ define i32 @lshr32and(i32 %t, i32 %val) nounwind { define i64 @lshr64and(i64 %t, i64 %val) nounwind { ; BMI264-LABEL: lshr64and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: shrq %cl, %rsi -; BMI264-NEXT: movq %rsi, %rax +; BMI264-NEXT: shrxq %rdi, %rsi, %rax ; BMI264-NEXT: retq %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt @@ -275,16 +265,13 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind { define i32 @ashr32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: ashr32and: ; BMI2: # BB#0: -; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: sarl %cl, %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; ; BMI264-LABEL: ashr32and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: sarl %cl, %esi -; BMI264-NEXT: movl %esi, %eax +; BMI264-NEXT: sarxl %edi, %esi, %eax ; BMI264-NEXT: retq %shamt = and i32 %t, 31 %res = ashr i32 %val, %shamt @@ -294,9 +281,7 @@ define i32 @ashr32and(i32 %t, i32 %val) nounwind { define i64 @ashr64and(i64 %t, i64 %val) nounwind { ; BMI264-LABEL: ashr64and: ; BMI264: # BB#0: -; BMI264-NEXT: movl %edi, %ecx -; BMI264-NEXT: sarq %cl, %rsi -; BMI264-NEXT: movq %rsi, %rax +; BMI264-NEXT: sarxq %rdi, %rsi, %rax ; BMI264-NEXT: retq %shamt = and i64 %t, 63 %res = ashr i64 %val, %shamt