diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index b161763263a6..1a55f8462fec 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -178,6 +178,51 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ret i32 %tmp } +define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { +; X86-FAST-LABEL: var_shift_i32_optsize: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: shldl %cl, %edx, %eax +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: var_shift_i32_optsize: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl %eax, %edx +; X86-SLOW-NEXT: shldl %cl, %esi, %edx +; X86-SLOW-NEXT: andb $31, %cl +; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: # %bb.1: +; X86-SLOW-NEXT: movl %edx, %eax +; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: popl %esi +; X86-SLOW-NEXT: retl +; +; X64-FAST-LABEL: var_shift_i32_optsize: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movl %edx, %ecx +; X64-FAST-NEXT: movl %edi, %eax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-FAST-NEXT: shldl %cl, %esi, %eax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: var_shift_i32_optsize: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movl %edx, %ecx +; X64-SLOW-NEXT: movl %edi, %eax +; X64-SLOW-NEXT: shldl %cl, %esi, %eax +; X64-SLOW-NEXT: andb $31, %cl +; X64-SLOW-NEXT: cmovel %edi, %eax +; X64-SLOW-NEXT: retq + %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) + ret i32 %tmp +} + define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: @@ -198,36 +243,36 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-NEXT: shll %cl, %edi ; X86-FAST-NEXT: shldl %cl, %eax, %ebp ; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: je .LBB3_2 +; X86-FAST-NEXT: je .LBB4_2 ; X86-FAST-NEXT: # %bb.1: ; X86-FAST-NEXT: movl %edi, %ebp ; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB3_2: +; X86-FAST-NEXT: .LBB4_2: ; X86-FAST-NEXT: movb $64, %cl ; X86-FAST-NEXT: subb %bl, %cl ; X86-FAST-NEXT: movl %edx, %esi ; X86-FAST-NEXT: shrl %cl, %esi ; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: jne .LBB3_3 +; X86-FAST-NEXT: jne .LBB4_3 ; X86-FAST-NEXT: # %bb.4: ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-FAST-NEXT: testl %ebx, %ebx -; X86-FAST-NEXT: jne .LBB3_6 -; X86-FAST-NEXT: jmp .LBB3_7 -; X86-FAST-NEXT: .LBB3_3: +; X86-FAST-NEXT: jne .LBB4_6 +; X86-FAST-NEXT: jmp .LBB4_7 +; X86-FAST-NEXT: .LBB4_3: ; X86-FAST-NEXT: movl %esi, %ecx ; X86-FAST-NEXT: xorl %esi, %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: testl %ebx, %ebx -; X86-FAST-NEXT: je .LBB3_7 -; X86-FAST-NEXT: .LBB3_6: +; X86-FAST-NEXT: je .LBB4_7 +; X86-FAST-NEXT: .LBB4_6: ; X86-FAST-NEXT: orl %esi, %ebp ; X86-FAST-NEXT: orl %ecx, %edi ; X86-FAST-NEXT: movl %edi, %eax ; X86-FAST-NEXT: movl %ebp, %edx -; X86-FAST-NEXT: .LBB3_7: +; X86-FAST-NEXT: .LBB4_7: ; X86-FAST-NEXT: addl $4, %esp ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi @@ -261,11 +306,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-SLOW-NEXT: testb %dl, %dl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: je .LBB4_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %eax, %ebp ; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: .LBB4_2: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-SLOW-NEXT: movl %ebp, %eax ; X86-SLOW-NEXT: movl %ebx, %ecx @@ -276,41 +321,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-SLOW-NEXT: negb %cl ; X86-SLOW-NEXT: shrl %cl, %edi ; X86-SLOW-NEXT: testb %ch, %ch -; X86-SLOW-NEXT: je .LBB3_4 +; X86-SLOW-NEXT: je .LBB4_4 ; X86-SLOW-NEXT: # %bb.3: ; X86-SLOW-NEXT: orl %edi, %eax ; X86-SLOW-NEXT: movl %eax, %ebp -; X86-SLOW-NEXT: .LBB3_4: +; X86-SLOW-NEXT: .LBB4_4: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl %eax, %edi ; X86-SLOW-NEXT: movl %ebx, %ecx ; X86-SLOW-NEXT: shll %cl, %edi ; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: je .LBB3_6 +; X86-SLOW-NEXT: je .LBB4_6 ; X86-SLOW-NEXT: # %bb.5: ; X86-SLOW-NEXT: movl %edi, %ebp ; X86-SLOW-NEXT: xorl %edi, %edi -; X86-SLOW-NEXT: .LBB3_6: +; X86-SLOW-NEXT: .LBB4_6: ; X86-SLOW-NEXT: movb %dh, %cl ; X86-SLOW-NEXT: shrl %cl, %esi ; X86-SLOW-NEXT: testb $32, %dh -; X86-SLOW-NEXT: jne .LBB3_7 +; X86-SLOW-NEXT: jne .LBB4_7 ; X86-SLOW-NEXT: # %bb.8: ; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload ; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: jne .LBB3_10 -; X86-SLOW-NEXT: jmp .LBB3_11 -; X86-SLOW-NEXT: .LBB3_7: +; X86-SLOW-NEXT: jne .LBB4_10 +; X86-SLOW-NEXT: jmp .LBB4_11 +; X86-SLOW-NEXT: .LBB4_7: ; X86-SLOW-NEXT: movl %esi, %ecx ; X86-SLOW-NEXT: xorl %esi, %esi ; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: je .LBB3_11 -; X86-SLOW-NEXT: .LBB3_10: +; X86-SLOW-NEXT: je .LBB4_11 +; X86-SLOW-NEXT: .LBB4_10: ; X86-SLOW-NEXT: orl %esi, %ebp ; X86-SLOW-NEXT: orl %ecx, %edi ; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edi, %eax -; X86-SLOW-NEXT: .LBB3_11: +; X86-SLOW-NEXT: .LBB4_11: ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-SLOW-NEXT: addl $8, %esp ; X86-SLOW-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index 5bd31dd27ccc..09b8da577fba 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -177,6 +177,51 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ret i32 %tmp } +define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { +; X86-FAST-LABEL: var_shift_i32_optsize: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: shrdl %cl, %edx, %eax +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: var_shift_i32_optsize: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl %eax, %edx +; X86-SLOW-NEXT: shrdl %cl, %esi, %edx +; X86-SLOW-NEXT: andb $31, %cl +; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: # %bb.1: +; X86-SLOW-NEXT: movl %edx, %eax +; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: popl %esi +; X86-SLOW-NEXT: retl +; +; X64-FAST-LABEL: var_shift_i32_optsize: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movl %edx, %ecx +; X64-FAST-NEXT: movl %esi, %eax +; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-FAST-NEXT: shrdl %cl, %edi, %eax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: var_shift_i32_optsize: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movl %edx, %ecx +; X64-SLOW-NEXT: movl %esi, %eax +; X64-SLOW-NEXT: shrdl %cl, %edi, %eax +; X64-SLOW-NEXT: andb $31, %cl +; X64-SLOW-NEXT: cmovel %esi, %eax +; X64-SLOW-NEXT: retq + %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) + ret i32 %tmp +} + define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: @@ -198,30 +243,30 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-NEXT: shll %cl, %edi ; X86-FAST-NEXT: shldl %cl, %eax, %esi ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: je .LBB3_2 +; X86-FAST-NEXT: je .LBB4_2 ; X86-FAST-NEXT: # %bb.1: ; X86-FAST-NEXT: movl %edi, %esi ; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB3_2: +; X86-FAST-NEXT: .LBB4_2: ; X86-FAST-NEXT: movl %edx, %ebp ; X86-FAST-NEXT: movl %ebx, %ecx ; X86-FAST-NEXT: shrl %cl, %ebp ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: shrdl %cl, %edx, %eax ; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: je .LBB3_4 +; X86-FAST-NEXT: je .LBB4_4 ; X86-FAST-NEXT: # %bb.3: ; X86-FAST-NEXT: movl %ebp, %eax ; X86-FAST-NEXT: xorl %ebp, %ebp -; X86-FAST-NEXT: .LBB3_4: +; X86-FAST-NEXT: .LBB4_4: ; X86-FAST-NEXT: testl %ebx, %ebx -; X86-FAST-NEXT: je .LBB3_6 +; X86-FAST-NEXT: je .LBB4_6 ; X86-FAST-NEXT: # %bb.5: ; X86-FAST-NEXT: orl %ebp, %esi ; X86-FAST-NEXT: orl %eax, %edi ; X86-FAST-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-FAST-NEXT: movl %esi, %edx -; X86-FAST-NEXT: .LBB3_6: +; X86-FAST-NEXT: .LBB4_6: ; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-FAST-NEXT: addl $4, %esp ; X86-FAST-NEXT: popl %esi @@ -256,11 +301,11 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-SLOW-NEXT: shrl %cl, %edi ; X86-SLOW-NEXT: testb %ch, %ch ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: je .LBB3_2 +; X86-SLOW-NEXT: je .LBB4_2 ; X86-SLOW-NEXT: # %bb.1: ; X86-SLOW-NEXT: orl %edi, %edx ; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: .LBB3_2: +; X86-SLOW-NEXT: .LBB4_2: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SLOW-NEXT: movl %ecx, %edx ; X86-SLOW-NEXT: movl %ebx, %ecx @@ -273,41 +318,41 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-SLOW-NEXT: shll %cl, %edi ; X86-SLOW-NEXT: testb %ah, %ah ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: je .LBB3_4 +; X86-SLOW-NEXT: je .LBB4_4 ; X86-SLOW-NEXT: # %bb.3: ; X86-SLOW-NEXT: orl %edx, %edi ; X86-SLOW-NEXT: movl %edi, %ebp -; X86-SLOW-NEXT: .LBB3_4: +; X86-SLOW-NEXT: .LBB4_4: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: movl %ebx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi ; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: je .LBB3_6 +; X86-SLOW-NEXT: je .LBB4_6 ; X86-SLOW-NEXT: # %bb.5: ; X86-SLOW-NEXT: movl %edi, %ebp ; X86-SLOW-NEXT: xorl %edi, %edi -; X86-SLOW-NEXT: .LBB3_6: +; X86-SLOW-NEXT: .LBB4_6: ; X86-SLOW-NEXT: movl %eax, %ecx ; X86-SLOW-NEXT: shll %cl, %esi ; X86-SLOW-NEXT: testb $32, %al ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: jne .LBB3_7 +; X86-SLOW-NEXT: jne .LBB4_7 ; X86-SLOW-NEXT: # %bb.8: ; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: jne .LBB3_10 -; X86-SLOW-NEXT: jmp .LBB3_11 -; X86-SLOW-NEXT: .LBB3_7: +; X86-SLOW-NEXT: jne .LBB4_10 +; X86-SLOW-NEXT: jmp .LBB4_11 +; X86-SLOW-NEXT: .LBB4_7: ; X86-SLOW-NEXT: movl %esi, %eax ; X86-SLOW-NEXT: xorl %esi, %esi ; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: je .LBB3_11 -; X86-SLOW-NEXT: .LBB3_10: +; X86-SLOW-NEXT: je .LBB4_11 +; X86-SLOW-NEXT: .LBB4_10: ; X86-SLOW-NEXT: orl %ebp, %esi ; X86-SLOW-NEXT: orl %edi, %eax ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: .LBB3_11: +; X86-SLOW-NEXT: .LBB4_11: ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-SLOW-NEXT: addl $8, %esp ; X86-SLOW-NEXT: popl %esi