forked from OSchip/llvm-project
[NVPTX] Integrate ptxas to LIT tests
ptxas is a proprietary compiler from Nvidia that can compile PTX to machine code (SASS). It has a lot of diagnostics to catch errors in PTX, which can be used to verify PTX output from llc. Set -DPXTAS_EXECUTABLE=/path/to/ptxas CMake option to enable it. If this option is not set, then ptxas is substituted to true which effectively disables all ptxas RUN lines. LLVM_PTXAS_EXECUTABLE environment variable takes precedence over the CMake option, and allows to override ptxas executable that is used for LIT without complete re-configuration. Differential Revision: https://reviews.llvm.org/D121727
This commit is contained in:
parent
b1f1688e90
commit
0f1b5f115a
|
@ -1,5 +1,8 @@
|
|||
; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s
|
||||
; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s
|
||||
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc -disable-nvptx-load-store-vectorizer < %s | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; Check that the load-store vectorizer is enabled by default for nvptx, and
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
declare void @foo()
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
declare void @llvm.nvvm.barrier0()
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | %ptxas-verify %}
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
declare void @foo()
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
|
||||
; RUN: opt -mtriple=nvptx-- < %s -S -infer-address-spaces | FileCheck %s --check-prefix IR
|
||||
; RUN: opt -mtriple=nvptx64-- < %s -S -infer-address-spaces | FileCheck %s --check-prefix IR
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
|
||||
@scalar = internal addrspace(3) global float 0.000000e+00, align 4
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK: .visible .global .align 4 .u32 g = 42;
|
||||
; CHECK: .visible .global .align 4 .u32 g2 = generic(g);
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32
|
||||
; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64
|
||||
; RUN: llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64
|
||||
; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
|
||||
|
||||
; ALL-LABEL: conv1
|
||||
define i32 @conv1(i32 addrspace(1)* %ptr) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; Make sure aggregate param types get emitted properly.
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
declare <2 x float> @barv(<2 x float> %input)
|
||||
declare <3 x float> @barv3(<3 x float> %input)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
@texture = internal addrspace(1) global i64 0, align 8
|
||||
; CHECK: .global .texref texture
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) foo0(
|
||||
; CHECK: .param .align 4 .b8 foo0_param_0[8]
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=ALL,CHECK_PTX32 %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=ALL,CHECK_PTX32 %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=ALL,CHECK_PTX64 %s
|
||||
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
|
||||
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
|
||||
|
||||
declare void @llvm.nvvm.cp.async.wait.group(i32)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
|
||||
|
||||
; CHECK-LABEL: .func test(
|
||||
define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s -check-prefixes=CHECK,CHECK32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
|
||||
|
||||
; CHECK-LABEL: .func test_atomics_scope(
|
||||
define void @test_atomics_scope(float* %fp, float %f,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_32%} %}
|
||||
|
||||
|
||||
; CHECK-LABEL: atom0
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
; https://bugs.llvm.org/show_bug.cgi?id=52037 for the gory details.
|
||||
;
|
||||
; RUN: llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 -O3 -o - %s | %ptxas-verify -arch=sm_70 %}
|
||||
|
||||
; CHECK-LABEL: .visible .entry barney(
|
||||
; CHECK-NOT: .local{{.*}}__local_depot
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %}
|
||||
|
||||
declare void @llvm.nvvm.bar.warp.sync(i32)
|
||||
declare void @llvm.nvvm.barrier.sync(i32)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
|
||||
; CHECK: bfe0
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; Disable CGP which also folds branches, so that only BranchFolding is under
|
||||
; the spotlight.
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
; ModuleID = '__kernelgen_main_module'
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: opt < %s -nvptx-lower-args -S | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; Verify that we correctly emit code for extending ldg/ldu. We do not expose
|
||||
; extending variants in the backend, but the ldg/ldu selection code may pick
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit
|
||||
; registers in the backend, so these loads need special handling.
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc -filetype=asm -o - %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -filetype=asm -o - %s | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; 64-bit divides and rems should be split into a fast and slow path where
|
||||
; the fast path uses a 32-bit operation.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; Checks how NVPTX lowers alloca buffers and their passing to functions.
|
||||
;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx"
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
|
||||
;; Kernel function using ptx_kernel calling conv
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
|
||||
|
||||
; Make sure the example doesn't crash with segfault
|
||||
|
||||
; CHECK: .visible .func ({{.*}}) loop
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O2 | %ptxas-verify %}
|
||||
|
||||
; *************************************
|
||||
; * Cases with no min/max
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
;; These tests should run for all targets
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx-nvidia-cuda"
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
define i16 @cvt_u16_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u16.f32 %rs{{[0-9]+}}, %f{{[0-9]+}};
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
|
||||
;; Integer conversions happen inplicitly by loading/storing the proper types
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
|
||||
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
|
||||
|
||||
|
||||
; CHECK-LABEL: cvt_rn_bf16x2_f32
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O0 | %ptxas-verify %}
|
||||
|
||||
define void @foo(i32* %output) {
|
||||
; CHECK-LABEL: .visible .func foo(
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 | %ptxas-verify %}
|
||||
|
||||
define float @foo(float %a) {
|
||||
; CHECK: div.approx.f32
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc -O2 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK
|
||||
; RUN: llc -O0 < %s -march=nvptx -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK
|
||||
; RUN: %if ptxas %{ llc -O2 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
; RUN: %if ptxas %{ llc -O0 < %s -march=nvptx -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; The following IR
|
||||
;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.envreg0()
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
define void @foo(float* nocapture readonly %x_value, double* nocapture %output) #0 {
|
||||
%1 = bitcast float* %x_value to <4 x float>*
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_75 -mattr=+ptx70 | FileCheck %s
|
||||
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
|
||||
|
||||
declare half @llvm.nvvm.ex2.approx.f16(half)
|
||||
declare <2 x half> @llvm.nvvm.ex2.approx.f16x2(<2 x half>)
|
||||
|
|
|
@ -2,20 +2,41 @@
|
|||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_53 \
|
||||
; RUN: %}
|
||||
; ## Full FP16 with FTZ
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: -denormal-fp-math-f32=preserve-sign \
|
||||
; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: -denormal-fp-math-f32=preserve-sign \
|
||||
; RUN: | %ptxas-verify -arch=sm_53 \
|
||||
; RUN: %}
|
||||
; ## FP16 support explicitly disabled.
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
|
||||
; RUN: -verify-machineinstrs \
|
||||
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
|
||||
; RUN: | %ptxas-verify -arch=sm_53 \
|
||||
; RUN: %}
|
||||
; ## FP16 is not supported by hardware.
|
||||
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
|
||||
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
|
||||
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_52 \
|
||||
; RUN: %}
|
||||
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
|
|
|
@ -2,15 +2,31 @@
|
|||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_53 \
|
||||
; RUN: %}
|
||||
; ## FP16 support explicitly disabled.
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
|
||||
; RUN: -verify-machineinstrs \
|
||||
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
|
||||
; RUN: -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_53 \
|
||||
; RUN: %}
|
||||
; ## FP16 is not supported by hardware.
|
||||
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
|
||||
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
|
||||
; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_52 \
|
||||
; RUN: %}
|
||||
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
declare double @llvm.sqrt.f64(double)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | %ptxas-verify %}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z,
|
||||
float %u, float %v) {
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | FileCheck %s -check-prefix=FMA
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | FileCheck %s -check-prefix=MUL
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=1 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-fma-level=0 | %ptxas-verify %}
|
||||
|
||||
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
|
||||
entry:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
declare float @dummy_f32(float, float) #0
|
||||
declare double @dummy_f64(double, double) #0
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx | FileCheck %s --check-prefixes=CHECK,CHECK-NONAN
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK-NAN
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
|
||||
; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
|
||||
|
||||
; ---- minimum ----
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %}
|
||||
|
||||
declare i32 @llvm.nvvm.fns(i32, i32, i32)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_30%} %}
|
||||
|
||||
target triple = "nvptx64-unknown-cuda"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
|
||||
|
||||
target triple = "nvptx64-unknown-cuda"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc -march=nvptx -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -march=nvptx -verify-machineinstrs < %s | %ptxas-verify %}
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-NOT: .align 2
|
||||
define ptx_device void @foo() align 2 {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx-nvidia-cuda"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; PTX32: .visible .global .align 4 .u32 i;
|
||||
; PTX32: .visible .const .align 4 .u32 j;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; Make sure we emit these globals in def-use order
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; PTX does not support .hidden or .protected.
|
||||
; Make sure we do not emit them.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; Make sure the globals constant initializers are not prone to host endianess
|
||||
; issues.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -relocation-model=static | FileCheck %s --check-prefix CHK
|
||||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -relocation-model=static | %ptxas-verify %}
|
||||
|
||||
%MyStruct = type { i32, i32, float }
|
||||
@Gbl = internal addrspace(3) global [1024 x %MyStruct] zeroinitializer
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
|
||||
|
||||
; CHECK: .b8 half_array[8] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
@"half_array" = addrspace(1) constant [4 x half]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx-nvidia-cuda"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: foo
|
||||
; CHECK: setp
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx-nvidia-cuda"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
@G1 = global i128 1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: .visible .func callee(
|
||||
; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee(
|
||||
define i128 @callee(i128) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[32]) foo(
|
||||
define { i128, i128 } @foo(i64 %a, i32 %b) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: abs_i16(
|
||||
define i16 @abs_i16(i16 %a) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK: imad
|
||||
define i32 @imad(i32 %a, i32 %b, i32 %c) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
define float @test(float %x) {
|
||||
entry:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc -march=nvptx < %s | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc -march=nvptx < %s | %ptxas-verify %}
|
||||
|
||||
; Test that %c works with immediates
|
||||
; CHECK-LABEL: test_inlineasm_c_output_template0
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
|
||||
; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
|
||||
; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: test_fabsf(
|
||||
define float @test_fabsf(float %f) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
declare i1 @llvm.nvvm.isspacep.const(i8*) readnone noinline
|
||||
declare i1 @llvm.nvvm.isspacep.global(i8*) readnone noinline
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G32,LS32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=ALL,G64,LS64
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s --check-prefixes=G64,LS32
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %}
|
||||
|
||||
|
||||
;; i8
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
|
||||
;; i8
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
# RUN: %python %s > %t.ll
|
||||
# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
|
||||
# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
|
||||
# RUN: %if ptxas %{ llc < %t.ll -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
|
||||
# RUN: %if ptxas %{ llc < %t.ll -march=nvptx -mcpu=sm_30 | %ptxas-verify %}
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; Check that invariant loads from the global addrspace are lowered to
|
||||
; ld.global.nc.
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
declare <4 x float> @bar()
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
|
||||
declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s -march=nvptx 2>&1 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx | %ptxas-verify %}
|
||||
|
||||
; Allow to make libcalls that are defined in the current module
|
||||
|
||||
declare i8* @malloc(i64)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx-nvidia-cuda"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: plain
|
||||
define void @plain(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX32
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX64
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
; Ensure we access the local stack properly
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | FileCheck %s --check-prefix PTX
|
||||
; RUN: opt < %s -S -nvptx-lower-aggr-copies | FileCheck %s --check-prefix IR
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
; Verify that the NVPTXLowerAggrCopies pass works as expected - calls to
|
||||
; llvm.mem* intrinsics get lowered to loops.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
target triple = "nvptx64-unknown-unknown"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; RUN: opt < %s -S -nvptx-lower-args | FileCheck %s --check-prefix IR
|
||||
; RUN: llc < %s -mcpu=sm_20 | FileCheck %s --check-prefix PTX
|
||||
; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 | %ptxas-verify %}
|
||||
|
||||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue