forked from OSchip/llvm-project
[X86][SSE] Update PMULDQ schedule tests to survive more aggressive SimplifyDemandedBits
llvm-svn: 345136
This commit is contained in:
parent
4a7cd63795
commit
84cc110732
|
@ -4734,46 +4734,52 @@ define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
|
|||
ret <4 x i64> %6
|
||||
}
|
||||
|
||||
define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
|
||||
define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> *%a3) {
|
||||
; GENERIC-LABEL: test_pmuldq:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
|
||||
; GENERIC-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
|
||||
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: test_pmuldq:
|
||||
; HASWELL: # %bb.0:
|
||||
; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
|
||||
; HASWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
|
||||
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; HASWELL-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; BROADWELL-LABEL: test_pmuldq:
|
||||
; BROADWELL: # %bb.0:
|
||||
; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||
; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
|
||||
; BROADWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:1.00]
|
||||
; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; BROADWELL-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKYLAKE-LABEL: test_pmuldq:
|
||||
; SKYLAKE: # %bb.0:
|
||||
; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
|
||||
; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
|
||||
; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
|
||||
; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; SKYLAKE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKX-LABEL: test_pmuldq:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
|
||||
; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
|
||||
; SKX-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
|
||||
; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; ZNVER1-LABEL: test_pmuldq:
|
||||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: vpmuldq (%rdi), %ymm2, %ymm2 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
|
||||
; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vpor %ymm2, %ymm0, %ymm0 # sched: [1:0.25]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
|
||||
%2 = bitcast <4 x i64> %1 to <8 x i32>
|
||||
%3 = load <8 x i32>, <8 x i32> *%a2, align 32
|
||||
%4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
|
||||
%2 = load <8 x i32>, <8 x i32> *%a3, align 32
|
||||
%3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a2, <8 x i32> %2)
|
||||
%4 = or <4 x i64> %1, %3
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
|
|
@ -4704,106 +4704,122 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
|
|||
ret <2 x i64> %5
|
||||
}
|
||||
|
||||
define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||
define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) {
|
||||
; GENERIC-LABEL: test_pmuldq:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
|
||||
; GENERIC-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
|
||||
; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SLM-LABEL: test_pmuldq:
|
||||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
|
||||
; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
|
||||
; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
|
||||
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
|
||||
; SLM-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; SANDY-SSE-LABEL: test_pmuldq:
|
||||
; SANDY-SSE: # %bb.0:
|
||||
; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
|
||||
; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
|
||||
; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SANDY-LABEL: test_pmuldq:
|
||||
; SANDY: # %bb.0:
|
||||
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
|
||||
; SANDY-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
|
||||
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; HASWELL-SSE-LABEL: test_pmuldq:
|
||||
; HASWELL-SSE: # %bb.0:
|
||||
; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
|
||||
; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
|
||||
; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; HASWELL-LABEL: test_pmuldq:
|
||||
; HASWELL: # %bb.0:
|
||||
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
|
||||
; HASWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
|
||||
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; HASWELL-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; BROADWELL-SSE-LABEL: test_pmuldq:
|
||||
; BROADWELL-SSE: # %bb.0:
|
||||
; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00]
|
||||
; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:1.00]
|
||||
; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; BROADWELL-LABEL: test_pmuldq:
|
||||
; BROADWELL: # %bb.0:
|
||||
; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
|
||||
; BROADWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:1.00]
|
||||
; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; BROADWELL-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKYLAKE-SSE-LABEL: test_pmuldq:
|
||||
; SKYLAKE-SSE: # %bb.0:
|
||||
; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
|
||||
; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
|
||||
; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
|
||||
; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKYLAKE-LABEL: test_pmuldq:
|
||||
; SKYLAKE: # %bb.0:
|
||||
; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
|
||||
; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
|
||||
; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
|
||||
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; SKYLAKE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKX-SSE-LABEL: test_pmuldq:
|
||||
; SKX-SSE: # %bb.0:
|
||||
; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
|
||||
; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
|
||||
; SKX-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
|
||||
; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
|
||||
; SKX-SSE-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; SKX-LABEL: test_pmuldq:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
|
||||
; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
|
||||
; SKX-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
|
||||
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
;
|
||||
; BTVER2-SSE-LABEL: test_pmuldq:
|
||||
; BTVER2-SSE: # %bb.0:
|
||||
; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
|
||||
; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
|
||||
; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
|
||||
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; BTVER2-LABEL: test_pmuldq:
|
||||
; BTVER2: # %bb.0:
|
||||
; BTVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [7:1.00]
|
||||
; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||
; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
||||
; BTVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
|
||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||
;
|
||||
; ZNVER1-SSE-LABEL: test_pmuldq:
|
||||
; ZNVER1-SSE: # %bb.0:
|
||||
; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
|
||||
; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
|
||||
; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
|
||||
; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
|
||||
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
|
||||
;
|
||||
; ZNVER1-LABEL: test_pmuldq:
|
||||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; ZNVER1-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
|
||||
%2 = bitcast <2 x i64> %1 to <4 x i32>
|
||||
%3 = load <4 x i32>, <4 x i32> *%a2, align 16
|
||||
%4 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %2, <4 x i32> %3)
|
||||
%2 = load <4 x i32>, <4 x i32> *%a3, align 16
|
||||
%3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a2, <4 x i32> %2)
|
||||
%4 = or <2 x i64> %1, %3
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue