llvm-project/llvm/test/CodeGen/NVPTX/intrinsics.ll

; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s

; CHECK-LABEL test_fabsf(
define float @test_fabsf(float %f) {
; CHECK: abs.f32
  %x = call float @llvm.fabs.f32(float %f)
  ret float %x
}

; CHECK-LABEL: test_fabs(
define double @test_fabs(double %d) {
; CHECK: abs.f64
  %x = call double @llvm.fabs.f64(double %d)
  ret double %x
}

; CHECK-LABEL: test_nvvm_sqrt(
define float @test_nvvm_sqrt(float %a) {
; CHECK: sqrt.rn.f32
  %val = call float @llvm.nvvm.sqrt.f(float %a)
  ret float %val
}

; CHECK-LABEL: test_llvm_sqrt(
define float @test_llvm_sqrt(float %a) {
; CHECK: sqrt.rn.f32
  %val = call float @llvm.sqrt.f32(float %a)
  ret float %val
}

; CHECK-LABEL: test_bitreverse32(
define i32 @test_bitreverse32(i32 %a) {
; CHECK: brev.b32
  %val = call i32 @llvm.bitreverse.i32(i32 %a)
  ret i32 %val
}

; CHECK-LABEL: test_bitreverse64(
define i64 @test_bitreverse64(i64 %a) {
; CHECK: brev.b64
  %val = call i64 @llvm.bitreverse.i64(i64 %a)
  ret i64 %val
}

; CHECK-LABEL: test_popc32(
define i32 @test_popc32(i32 %a) {
; CHECK: popc.b32
  %val = call i32 @llvm.ctpop.i32(i32 %a)
  ret i32 %val
}

; CHECK-LABEL: test_popc64
define i64 @test_popc64(i64 %a) {
; CHECK: popc.b64
; CHECK: cvt.u64.u32
  %val = call i64 @llvm.ctpop.i64(i64 %a)
  ret i64 %val
}

; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
; if this function returns an i32, there's no need to do any type conversions
; in the ptx.
; CHECK-LABEL: test_popc64_trunc
define i32 @test_popc64_trunc(i64 %a) {
; CHECK: popc.b64
; CHECK-NOT: cvt.
  %val = call i64 @llvm.ctpop.i64(i64 %a)
  %trunc = trunc i64 %val to i32
  ret i32 %trunc
}

; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
; then converting back to i16.
; CHECK-LABEL: test_popc16
define void @test_popc16(i16 %a, i16* %b) {
; CHECK: cvt.u32.u16
; CHECK: popc.b32
; CHECK: cvt.u16.u32
  %val = call i16 @llvm.ctpop.i16(i16 %a)
  store i16 %val, i16* %b
  ret void
}

; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
; to do any conversions after calling popc.b32, because that returns an i32.
; CHECK-LABEL: test_popc16_to_32
define i32 @test_popc16_to_32(i16 %a) {
; CHECK: cvt.u32.u16
; CHECK: popc.b32
; CHECK-NOT: cvt.
  %val = call i16 @llvm.ctpop.i16(i16 %a)
  %zext = zext i16 %val to i32
  ret i32 %zext
}

declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
declare float @llvm.sqrt.f32(float)
declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
declare i16 @llvm.ctpop.i16(i16)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
Add llvm.fabs intrinsic. llvm-svn: 157594 2012-05-29 05:48:37 +08:00			`; RUN: llc < %s -march=nvptx -mcpu=sm_20 \| FileCheck %s`
			`; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 \| FileCheck %s`

[NVPTX] Modernize intrinics.ll test. llvm-svn: 292069 2017-01-16 00:54:57 +08:00			`; CHECK-LABEL test_fabsf(`
			`define float @test_fabsf(float %f) {`
			`; CHECK: abs.f32`
			`%x = call float @llvm.fabs.f32(float %f)`
			`ret float %x`
Add llvm.fabs intrinsic. llvm-svn: 157594 2012-05-29 05:48:37 +08:00			`}`

[NVPTX] Modernize intrinics.ll test. llvm-svn: 292069 2017-01-16 00:54:57 +08:00			`; CHECK-LABEL: test_fabs(`
			`define double @test_fabs(double %d) {`
			`; CHECK: abs.f64`
			`%x = call double @llvm.fabs.f64(double %d)`
			`ret double %x`
Add llvm.fabs intrinsic. llvm-svn: 157594 2012-05-29 05:48:37 +08:00			`}`

[NVPTX] Modernize intrinics.ll test. llvm-svn: 292069 2017-01-16 00:54:57 +08:00			`; CHECK-LABEL: test_nvvm_sqrt(`
[NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic llvm-svn: 182394 2013-05-22 00:51:30 +08:00			`define float @test_nvvm_sqrt(float %a) {`
[NVPTX] Modernize intrinics.ll test. llvm-svn: 292069 2017-01-16 00:54:57 +08:00			`; CHECK: sqrt.rn.f32`
[NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic llvm-svn: 182394 2013-05-22 00:51:30 +08:00			`%val = call float @llvm.nvvm.sqrt.f(float %a)`
			`ret float %val`
			`}`

[NVPTX] Add explicit check for llvm.sqrt.f32 to intrinsics.ll. Test-only change. llvm-svn: 292690 2017-01-21 08:59:23 +08:00			`; CHECK-LABEL: test_llvm_sqrt(`
			`define float @test_llvm_sqrt(float %a) {`
Fix some broken CHECK lines. The colon is important. llvm-svn: 292761 2017-01-23 04:28:56 +08:00			`; CHECK: sqrt.rn.f32`
[NVPTX] Add explicit check for llvm.sqrt.f32 to intrinsics.ll. Test-only change. llvm-svn: 292690 2017-01-21 08:59:23 +08:00			`%val = call float @llvm.sqrt.f32(float %a)`
			`ret float %val`
			`}`

[NVPTX] Add lowering for llvm.bitreverse. Reviewers: tra Subscribers: llvm-commits, jholewinski Differential Revision: https://reviews.llvm.org/D28720 llvm-svn: 292301 2017-01-18 08:08:10 +08:00			`; CHECK-LABEL: test_bitreverse32(`
			`define i32 @test_bitreverse32(i32 %a) {`
			`; CHECK: brev.b32`
			`%val = call i32 @llvm.bitreverse.i32(i32 %a)`
			`ret i32 %val`
			`}`

			`; CHECK-LABEL: test_bitreverse64(`
			`define i64 @test_bitreverse64(i64 %a) {`
			`; CHECK: brev.b64`
			`%val = call i64 @llvm.bitreverse.i64(i64 %a)`
			`ret i64 %val`
			`}`

[NVPTX] Improve lowering of llvm.ctpop. Summary: Avoid an unnecessary conversion operation when using the result of ctpop.i32 or ctpop.i16 as an i32, as in both cases the ptx instruction we run returns an i32. (Previously if we used the value as an i32, we'd do an unnecessary zext+trunc.) Reviewers: tra Subscribers: jholewinski, llvm-commits Differential Revision: https://reviews.llvm.org/D28721 llvm-svn: 292302 2017-01-18 08:08:27 +08:00			`; CHECK-LABEL: test_popc32(`
			`define i32 @test_popc32(i32 %a) {`
			`; CHECK: popc.b32`
			`%val = call i32 @llvm.ctpop.i32(i32 %a)`
			`ret i32 %val`
			`}`

			`; CHECK-LABEL: test_popc64`
			`define i64 @test_popc64(i64 %a) {`
			`; CHECK: popc.b64`
			`; CHECK: cvt.u64.u32`
			`%val = call i64 @llvm.ctpop.i64(i64 %a)`
			`ret i64 %val`
			`}`

			`; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so`
			`; if this function returns an i32, there's no need to do any type conversions`
			`; in the ptx.`
			`; CHECK-LABEL: test_popc64_trunc`
			`define i32 @test_popc64_trunc(i64 %a) {`
			`; CHECK: popc.b64`
			`; CHECK-NOT: cvt.`
			`%val = call i64 @llvm.ctpop.i64(i64 %a)`
			`%trunc = trunc i64 %val to i32`
			`ret i32 %trunc`
			`}`

			`; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and`
			`; then converting back to i16.`
			`; CHECK-LABEL: test_popc16`
			`define void @test_popc16(i16 %a, i16* %b) {`
			`; CHECK: cvt.u32.u16`
			`; CHECK: popc.b32`
			`; CHECK: cvt.u16.u32`
			`%val = call i16 @llvm.ctpop.i16(i16 %a)`
			`store i16 %val, i16* %b`
			`ret void`
			`}`

			`; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need`
			`; to do any conversions after calling popc.b32, because that returns an i32.`
			`; CHECK-LABEL: test_popc16_to_32`
			`define i32 @test_popc16_to_32(i16 %a) {`
			`; CHECK: cvt.u32.u16`
			`; CHECK: popc.b32`
			`; CHECK-NOT: cvt.`
			`%val = call i16 @llvm.ctpop.i16(i16 %a)`
			`%zext = zext i16 %val to i32`
			`ret i32 %zext`
			`}`

Add llvm.fabs intrinsic. llvm-svn: 157594 2012-05-29 05:48:37 +08:00			`declare float @llvm.fabs.f32(float)`
			`declare double @llvm.fabs.f64(double)`
[NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic llvm-svn: 182394 2013-05-22 00:51:30 +08:00			`declare float @llvm.nvvm.sqrt.f(float)`
[NVPTX] Add explicit check for llvm.sqrt.f32 to intrinsics.ll. Test-only change. llvm-svn: 292690 2017-01-21 08:59:23 +08:00			`declare float @llvm.sqrt.f32(float)`
[NVPTX] Add lowering for llvm.bitreverse. Reviewers: tra Subscribers: llvm-commits, jholewinski Differential Revision: https://reviews.llvm.org/D28720 llvm-svn: 292301 2017-01-18 08:08:10 +08:00			`declare i32 @llvm.bitreverse.i32(i32)`
			`declare i64 @llvm.bitreverse.i64(i64)`
[NVPTX] Improve lowering of llvm.ctpop. Summary: Avoid an unnecessary conversion operation when using the result of ctpop.i32 or ctpop.i16 as an i32, as in both cases the ptx instruction we run returns an i32. (Previously if we used the value as an i32, we'd do an unnecessary zext+trunc.) Reviewers: tra Subscribers: jholewinski, llvm-commits Differential Revision: https://reviews.llvm.org/D28721 llvm-svn: 292302 2017-01-18 08:08:27 +08:00			`declare i16 @llvm.ctpop.i16(i16)`
			`declare i32 @llvm.ctpop.i32(i32)`
			`declare i64 @llvm.ctpop.i64(i64)`