[x86] Refactor the tests for popcnt.

Extracted from the D6531 patch by Bruno Cardoso Lopes, and re-generated
to reflect the current state of the world. This should let Bruno's D6531
actually show the delta between the approaches by running the x86 test
case update script after re-building.

llvm-svn: 238391
This commit is contained in:
Chandler Carruth 2015-05-28 02:40:15 +00:00
parent 9b0a561b03
commit 88862eaefa
3 changed files with 2902 additions and 159 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,159 +0,0 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
; Vector version of:
; v = v - ((v >> 1) & 0x55555555)
; v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
; v = (v + (v >> 4) & 0xF0F0F0F)
; v = v + (v >> 8)
; v = v + (v >> 16)
; v = v + (v >> 32) ; i64 only
define <8 x i32> @test0(<8 x i32> %x) {
; AVX2-LABEL: @test0
entry:
; AVX2: vpsrld $1, %ymm
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsubd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsrld $2
; AVX2-NEXT: vpand
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpsrld $4
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
; AVX2-NEXT: vpsrld $8
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpsrld $16
; AVX2-NEXT: vpaddd
; AVX2-NEXT: vpbroadcastd
; AVX2-NEXT: vpand
%y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x)
ret <8 x i32> %y
}
define <4 x i64> @test1(<4 x i64> %x) {
; AVX2-NOPOPCNT-LABEL: @test1
entry:
; AVX2-NOPOPCNT: vpsrlq $1, %ymm
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpbroadcastq
; AVX2-NOPOPCNT-NEXT: vpand
%y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
ret <4 x i64> %y
}
define <4 x i32> @test2(<4 x i32> %x) {
; AVX2-NOPOPCNT-LABEL: @test2
; AVX1-NOPOPCNT-LABEL: @test2
entry:
; AVX2-NOPOPCNT: vpsrld $1, %xmm
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrld $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpsrld $4
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrld $8
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpsrld $16
; AVX2-NOPOPCNT-NEXT: vpaddd
; AVX2-NOPOPCNT-NEXT: vpbroadcastd
; AVX2-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT: vpsrld $1, %xmm
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsubd
; AVX1-NOPOPCNT-NEXT: vmovdqa
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrld $2
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpsrld $4
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrld $8
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpsrld $16
; AVX1-NOPOPCNT-NEXT: vpaddd
; AVX1-NOPOPCNT-NEXT: vpand
%y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
ret <4 x i32> %y
}
define <2 x i64> @test3(<2 x i64> %x) {
; AVX2-NOPOPCNT-LABEL: @test3
; AVX1-NOPOPCNT-LABEL: @test3
entry:
; AVX2-NOPOPCNT: vpsrlq $1, %xmm
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsubq
; AVX2-NOPOPCNT-NEXT: vmovdqa
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $2
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $4
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX2-NOPOPCNT-NEXT: vpsrlq $8
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $16
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpsrlq $32
; AVX2-NOPOPCNT-NEXT: vpaddq
; AVX2-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT: vpsrlq $1, %xmm
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsubq
; AVX1-NOPOPCNT-NEXT: vmovdqa
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrlq $2
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $4
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpand
; AVX1-NOPOPCNT-NEXT: vpsrlq $8
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $16
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpsrlq $32
; AVX1-NOPOPCNT-NEXT: vpaddq
; AVX1-NOPOPCNT-NEXT: vpand
%y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
ret <2 x i64> %y
}
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)