forked from OSchip/llvm-project
[X86] Add XOP vector shift by scalar amount tests
Helps improve test coverage of the XOP modes in X86TargetLowering::isVectorShiftByScalarCheap
This commit is contained in:
parent
c7be6a86f4
commit
d7258c6a83
|
@ -1,7 +1,8 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefixes=ALL,XOP
|
||||
|
||||
; PR37428 - https://bugs.llvm.org/show_bug.cgi?id=37428
|
||||
; This is a larger-than-usual regression test to verify that several backend
|
||||
|
@ -339,6 +340,135 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture
|
|||
; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; AVX2-NEXT: movl %r8d, %ecx
|
||||
; AVX2-NEXT: jmp .LBB0_8
|
||||
;
|
||||
; XOP-LABEL: vector_variable_shift_left_loop:
|
||||
; XOP: # %bb.0: # %entry
|
||||
; XOP-NEXT: subq $24, %rsp
|
||||
; XOP-NEXT: testl %edx, %edx
|
||||
; XOP-NEXT: jle .LBB0_9
|
||||
; XOP-NEXT: # %bb.1: # %for.body.preheader
|
||||
; XOP-NEXT: movl %ecx, %r9d
|
||||
; XOP-NEXT: movl %edx, %eax
|
||||
; XOP-NEXT: cmpl $31, %edx
|
||||
; XOP-NEXT: ja .LBB0_3
|
||||
; XOP-NEXT: # %bb.2:
|
||||
; XOP-NEXT: xorl %edx, %edx
|
||||
; XOP-NEXT: jmp .LBB0_6
|
||||
; XOP-NEXT: .LBB0_3: # %vector.ph
|
||||
; XOP-NEXT: movl %eax, %edx
|
||||
; XOP-NEXT: andl $-32, %edx
|
||||
; XOP-NEXT: vmovd %r9d, %xmm0
|
||||
; XOP-NEXT: vmovd %r8d, %xmm1
|
||||
; XOP-NEXT: xorl %ecx, %ecx
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; XOP-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm13 = xmm0[0],zero,xmm0[1],zero
|
||||
; XOP-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm14 = xmm1[0],zero,xmm1[1],zero
|
||||
; XOP-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; XOP-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm0[0],zero,xmm0[1],zero
|
||||
; XOP-NEXT: vpxor %xmm11, %xmm11, %xmm11
|
||||
; XOP-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
|
||||
; XOP-NEXT: .p2align 4, 0x90
|
||||
; XOP-NEXT: .LBB0_4: # %vector.body
|
||||
; XOP-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; XOP-NEXT: vpmovzxdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
|
||||
; XOP-NEXT: # xmm1 = mem[0],zero,mem[1],zero
|
||||
; XOP-NEXT: vpmovzxdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
|
||||
; XOP-NEXT: # xmm2 = mem[0],zero,mem[1],zero
|
||||
; XOP-NEXT: vmovq {{.*#+}} xmm3 = mem[0],zero
|
||||
; XOP-NEXT: vmovq {{.*#+}} xmm4 = mem[0],zero
|
||||
; XOP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
|
||||
; XOP-NEXT: vmovq {{.*#+}} xmm6 = mem[0],zero
|
||||
; XOP-NEXT: vpcomeqb %xmm11, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpmovsxbd %xmm3, %xmm7
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
|
||||
; XOP-NEXT: vpmovsxbd %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomeqb %xmm11, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpmovsxbd %xmm4, %xmm8
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
|
||||
; XOP-NEXT: vpmovsxbd %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomeqb %xmm11, %xmm5, %xmm5
|
||||
; XOP-NEXT: vmovdqu (%rdi,%rcx,4), %xmm9
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm9, %xmm10
|
||||
; XOP-NEXT: vpslld %xmm1, %xmm9, %xmm0
|
||||
; XOP-NEXT: vblendvps %xmm7, %xmm10, %xmm0, %xmm9
|
||||
; XOP-NEXT: vpmovsxbd %xmm5, %xmm7
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
|
||||
; XOP-NEXT: vpmovsxbd %xmm5, %xmm5
|
||||
; XOP-NEXT: vpcomeqb %xmm11, %xmm6, %xmm6
|
||||
; XOP-NEXT: vmovdqu 16(%rdi,%rcx,4), %xmm0
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm0, %xmm2
|
||||
; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpmovsxbd %xmm6, %xmm1
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,2,3]
|
||||
; XOP-NEXT: vpmovsxbd %xmm6, %xmm6
|
||||
; XOP-NEXT: vblendvps %xmm3, %xmm2, %xmm0, %xmm10
|
||||
; XOP-NEXT: vmovdqu 32(%rdi,%rcx,4), %xmm2
|
||||
; XOP-NEXT: vpslld %xmm15, %xmm2, %xmm3
|
||||
; XOP-NEXT: vpslld %xmm14, %xmm2, %xmm2
|
||||
; XOP-NEXT: vblendvps %xmm8, %xmm3, %xmm2, %xmm8
|
||||
; XOP-NEXT: vmovdqu 48(%rdi,%rcx,4), %xmm3
|
||||
; XOP-NEXT: vpslld %xmm15, %xmm3, %xmm0
|
||||
; XOP-NEXT: vpslld %xmm14, %xmm3, %xmm3
|
||||
; XOP-NEXT: vblendvps %xmm4, %xmm0, %xmm3, %xmm0
|
||||
; XOP-NEXT: vmovdqu 64(%rdi,%rcx,4), %xmm3
|
||||
; XOP-NEXT: vpslld %xmm13, %xmm3, %xmm4
|
||||
; XOP-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm3, %xmm3
|
||||
; XOP-NEXT: vblendvps %xmm7, %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vmovdqu 80(%rdi,%rcx,4), %xmm4
|
||||
; XOP-NEXT: vpslld %xmm13, %xmm4, %xmm7
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm4, %xmm4
|
||||
; XOP-NEXT: vblendvps %xmm5, %xmm7, %xmm4, %xmm4
|
||||
; XOP-NEXT: vmovdqu 96(%rdi,%rcx,4), %xmm5
|
||||
; XOP-NEXT: vpslld %xmm12, %xmm5, %xmm7
|
||||
; XOP-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm5, %xmm5
|
||||
; XOP-NEXT: vblendvps %xmm1, %xmm7, %xmm5, %xmm1
|
||||
; XOP-NEXT: vmovdqu 112(%rdi,%rcx,4), %xmm5
|
||||
; XOP-NEXT: vpslld %xmm12, %xmm5, %xmm7
|
||||
; XOP-NEXT: vpslld %xmm2, %xmm5, %xmm5
|
||||
; XOP-NEXT: vblendvps %xmm6, %xmm7, %xmm5, %xmm5
|
||||
; XOP-NEXT: vmovups %xmm9, (%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm10, 16(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm8, 32(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm0, 48(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm3, 64(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm4, 80(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm1, 96(%rdi,%rcx,4)
|
||||
; XOP-NEXT: vmovups %xmm5, 112(%rdi,%rcx,4)
|
||||
; XOP-NEXT: addq $32, %rcx
|
||||
; XOP-NEXT: cmpq %rcx, %rdx
|
||||
; XOP-NEXT: jne .LBB0_4
|
||||
; XOP-NEXT: # %bb.5: # %middle.block
|
||||
; XOP-NEXT: cmpq %rax, %rdx
|
||||
; XOP-NEXT: jne .LBB0_6
|
||||
; XOP-NEXT: .LBB0_9: # %for.cond.cleanup
|
||||
; XOP-NEXT: addq $24, %rsp
|
||||
; XOP-NEXT: vzeroupper
|
||||
; XOP-NEXT: retq
|
||||
; XOP-NEXT: .p2align 4, 0x90
|
||||
; XOP-NEXT: .LBB0_8: # %for.body
|
||||
; XOP-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; XOP-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; XOP-NEXT: shll %cl, (%rdi,%rdx,4)
|
||||
; XOP-NEXT: incq %rdx
|
||||
; XOP-NEXT: cmpq %rdx, %rax
|
||||
; XOP-NEXT: je .LBB0_9
|
||||
; XOP-NEXT: .LBB0_6: # %for.body
|
||||
; XOP-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; XOP-NEXT: cmpb $0, (%rsi,%rdx)
|
||||
; XOP-NEXT: movl %r9d, %ecx
|
||||
; XOP-NEXT: je .LBB0_8
|
||||
; XOP-NEXT: # %bb.7: # %for.body
|
||||
; XOP-NEXT: # in Loop: Header=BB0_6 Depth=1
|
||||
; XOP-NEXT: movl %r8d, %ecx
|
||||
; XOP-NEXT: jmp .LBB0_8
|
||||
entry:
|
||||
%cmp12 = icmp sgt i32 %count, 0
|
||||
br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
@ -546,6 +676,35 @@ define void @vector_variable_shift_left_loop_simpler(i32* nocapture %arr, i8* no
|
|||
; AVX2-NEXT: jne .LBB1_2
|
||||
; AVX2-NEXT: .LBB1_3: # %exit
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: vector_variable_shift_left_loop_simpler:
|
||||
; XOP: # %bb.0: # %entry
|
||||
; XOP-NEXT: testl %edx, %edx
|
||||
; XOP-NEXT: jle .LBB1_3
|
||||
; XOP-NEXT: # %bb.1: # %vector.ph
|
||||
; XOP-NEXT: movl %edx, %eax
|
||||
; XOP-NEXT: andl $-4, %eax
|
||||
; XOP-NEXT: vmovd %ecx, %xmm0
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; XOP-NEXT: vmovd %r8d, %xmm1
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; XOP-NEXT: vmovd %r9d, %xmm2
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
|
||||
; XOP-NEXT: xorl %ecx, %ecx
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: .p2align 4, 0x90
|
||||
; XOP-NEXT: .LBB1_2: # %vector.body
|
||||
; XOP-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; XOP-NEXT: vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; XOP-NEXT: vpcomeqd %xmm3, %xmm4, %xmm4
|
||||
; XOP-NEXT: vblendvps %xmm4, %xmm0, %xmm1, %xmm4
|
||||
; XOP-NEXT: vpshld %xmm4, %xmm2, %xmm4
|
||||
; XOP-NEXT: vmovdqu %xmm4, (%rdi,%rcx,4)
|
||||
; XOP-NEXT: addq $4, %rcx
|
||||
; XOP-NEXT: cmpq %rcx, %rax
|
||||
; XOP-NEXT: jne .LBB1_2
|
||||
; XOP-NEXT: .LBB1_3: # %exit
|
||||
; XOP-NEXT: retq
|
||||
entry:
|
||||
%cmp16 = icmp sgt i32 %count, 0
|
||||
%wide.trip.count = zext i32 %count to i64
|
||||
|
|
Loading…
Reference in New Issue