forked from OSchip/llvm-project
[x86] Refactor the tests for popcnt.
Extracted from the D6531 patch by Bruno Cardoso Lopes, and re-generated to reflect the current state of the world. This should let Bruno's D6531 actually show the delta between the approaches by running the x86 test case update script after re-building. llvm-svn: 238391
This commit is contained in:
parent
9b0a561b03
commit
88862eaefa
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,159 +0,0 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
|
||||
|
||||
; Vector version of:
|
||||
; v = v - ((v >> 1) & 0x55555555)
|
||||
; v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
|
||||
; v = (v + (v >> 4) & 0xF0F0F0F)
|
||||
; v = v + (v >> 8)
|
||||
; v = v + (v >> 16)
|
||||
; v = v + (v >> 32) ; i64 only
|
||||
|
||||
define <8 x i32> @test0(<8 x i32> %x) {
|
||||
; AVX2-LABEL: @test0
|
||||
entry:
|
||||
; AVX2: vpsrld $1, %ymm
|
||||
; AVX2-NEXT: vpbroadcastd
|
||||
; AVX2-NEXT: vpand
|
||||
; AVX2-NEXT: vpsubd
|
||||
; AVX2-NEXT: vpbroadcastd
|
||||
; AVX2-NEXT: vpand
|
||||
; AVX2-NEXT: vpsrld $2
|
||||
; AVX2-NEXT: vpand
|
||||
; AVX2-NEXT: vpaddd
|
||||
; AVX2-NEXT: vpsrld $4
|
||||
; AVX2-NEXT: vpaddd
|
||||
; AVX2-NEXT: vpbroadcastd
|
||||
; AVX2-NEXT: vpand
|
||||
; AVX2-NEXT: vpsrld $8
|
||||
; AVX2-NEXT: vpaddd
|
||||
; AVX2-NEXT: vpsrld $16
|
||||
; AVX2-NEXT: vpaddd
|
||||
; AVX2-NEXT: vpbroadcastd
|
||||
; AVX2-NEXT: vpand
|
||||
%y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x)
|
||||
ret <8 x i32> %y
|
||||
}
|
||||
|
||||
define <4 x i64> @test1(<4 x i64> %x) {
|
||||
; AVX2-NOPOPCNT-LABEL: @test1
|
||||
entry:
|
||||
; AVX2-NOPOPCNT: vpsrlq $1, %ymm
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsubq
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
%y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
|
||||
ret <4 x i64> %y
|
||||
}
|
||||
|
||||
define <4 x i32> @test2(<4 x i32> %x) {
|
||||
; AVX2-NOPOPCNT-LABEL: @test2
|
||||
; AVX1-NOPOPCNT-LABEL: @test2
|
||||
entry:
|
||||
; AVX2-NOPOPCNT: vpsrld $1, %xmm
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsubd
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrld $2
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrld $4
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrld $8
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrld $16
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT: vpsrld $1, %xmm
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsubd
|
||||
; AVX1-NOPOPCNT-NEXT: vmovdqa
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrld $2
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrld $4
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrld $8
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrld $16
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddd
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
%y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
|
||||
ret <4 x i32> %y
|
||||
}
|
||||
|
||||
define <2 x i64> @test3(<2 x i64> %x) {
|
||||
; AVX2-NOPOPCNT-LABEL: @test3
|
||||
; AVX1-NOPOPCNT-LABEL: @test3
|
||||
entry:
|
||||
; AVX2-NOPOPCNT: vpsrlq $1, %xmm
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsubq
|
||||
; AVX2-NOPOPCNT-NEXT: vmovdqa
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
|
||||
; AVX2-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX2-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT: vpsrlq $1, %xmm
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsubq
|
||||
; AVX1-NOPOPCNT-NEXT: vmovdqa
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrlq $2
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrlq $4
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrlq $8
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrlq $16
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX1-NOPOPCNT-NEXT: vpsrlq $32
|
||||
; AVX1-NOPOPCNT-NEXT: vpaddq
|
||||
; AVX1-NOPOPCNT-NEXT: vpand
|
||||
%y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
|
||||
ret <2 x i64> %y
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
|
||||
|
||||
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
|
||||
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
|
||||
|
Loading…
Reference in New Issue