forked from OSchip/llvm-project
AMDGPU: Move R600 test compatability hack
Instead of handling the r600 intrinsics on amdgcn, handle the amdgcn intrinsics on r600.
This commit is contained in:
parent
f319074824
commit
7af7b96a9b
|
@ -615,21 +615,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||
return LowerImplicitParameter(DAG, VT, DL, 8);
|
||||
|
||||
case Intrinsic::r600_read_tgid_x:
|
||||
case Intrinsic::amdgcn_workgroup_id_x:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_X, VT);
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
case Intrinsic::amdgcn_workgroup_id_y:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_Y, VT);
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_Z, VT);
|
||||
case Intrinsic::r600_read_tidig_x:
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_X, VT);
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
case Intrinsic::amdgcn_workitem_id_y:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_Y, VT);
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
case Intrinsic::amdgcn_workitem_id_z:
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_Z, VT);
|
||||
|
||||
|
|
|
@ -5807,29 +5807,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Z);
|
||||
case Intrinsic::amdgcn_workgroup_id_x:
|
||||
case Intrinsic::r600_read_tgid_x:
|
||||
return getPreloadedValue(DAG, *MFI, VT,
|
||||
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
|
||||
case Intrinsic::amdgcn_workgroup_id_y:
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
return getPreloadedValue(DAG, *MFI, VT,
|
||||
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
|
||||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
return getPreloadedValue(DAG, *MFI, VT,
|
||||
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
case Intrinsic::r600_read_tidig_x:
|
||||
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
|
||||
SDLoc(DAG.getEntryNode()),
|
||||
MFI->getArgInfo().WorkItemIDX);
|
||||
case Intrinsic::amdgcn_workitem_id_y:
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
|
||||
SDLoc(DAG.getEntryNode()),
|
||||
MFI->getArgInfo().WorkItemIDY);
|
||||
case Intrinsic::amdgcn_workitem_id_z:
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
|
||||
SDLoc(DAG.getEntryNode()),
|
||||
MFI->getArgInfo().WorkItemIDZ);
|
||||
|
|
|
@ -139,95 +139,6 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Legacy use of r600 intrinsics by GCN
|
||||
|
||||
; The tgid values are stored in sgprs offset by the number of user
|
||||
; sgprs.
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_x_legacy:
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
|
||||
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
|
||||
define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.x() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_y_legacy:
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.y() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}tgid_z_legacy:
|
||||
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
|
||||
; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
||||
|
||||
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
|
||||
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
|
||||
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
|
||||
define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tgid.z() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-NOHSA: .section .AMDGPU.config
|
||||
; GCN-NOHSA: .long 47180
|
||||
; GCN-NOHSA-NEXT: .long 132{{$}}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_x_legacy:
|
||||
; GCN-NOHSA: buffer_store_dword v0
|
||||
define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-NOHSA: .section .AMDGPU.config
|
||||
; GCN-NOHSA: .long 47180
|
||||
; GCN-NOHSA-NEXT: .long 2180{{$}}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_y_legacy:
|
||||
|
||||
; GCN-NOHSA: buffer_store_dword v1
|
||||
define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.y() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-NOHSA: .section .AMDGPU.config
|
||||
; GCN-NOHSA: .long 47180
|
||||
; GCN-NOHSA-NEXT: .long 4228{{$}}
|
||||
|
||||
; FUNC-LABEL: {{^}}tidig_z_legacy:
|
||||
; GCN-NOHSA: buffer_store_dword v2
|
||||
define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.z() #0
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.ngroups.x() #0
|
||||
declare i32 @llvm.r600.read.ngroups.y() #0
|
||||
declare i32 @llvm.r600.read.ngroups.z() #0
|
||||
|
@ -240,12 +151,4 @@ declare i32 @llvm.r600.read.local.size.x() #0
|
|||
declare i32 @llvm.r600.read.local.size.y() #0
|
||||
declare i32 @llvm.r600.read.local.size.z() #0
|
||||
|
||||
declare i32 @llvm.r600.read.tgid.x() #0
|
||||
declare i32 @llvm.r600.read.tgid.y() #0
|
||||
declare i32 @llvm.r600.read.tgid.z() #0
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.r600.read.tidig.y() #0
|
||||
declare i32 @llvm.r600.read.tidig.z() #0
|
||||
|
||||
attributes #0 = { readnone }
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
; FUNC-LABEL: {{^}}test2:
|
||||
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
@ -96,7 +96,7 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out
|
|||
; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -112,7 +112,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrs
|
|||
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
|
||||
define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%b = load i32, i32 addrspace(1)* %gep.b
|
||||
|
@ -126,7 +126,7 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i
|
|||
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
|
||||
define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %gep.a
|
||||
|
@ -138,7 +138,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrs
|
|||
; FUNC-LABEL: {{^}}v_and_constant_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %gep, align 4
|
||||
%and = and i32 %a, 1234567
|
||||
|
@ -149,7 +149,7 @@ define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrsp
|
|||
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %gep, align 4
|
||||
%and = and i32 %a, 64
|
||||
|
@ -160,7 +160,7 @@ define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 a
|
|||
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
|
||||
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %gep, align 4
|
||||
%and = and i32 %a, -16
|
||||
|
@ -251,7 +251,7 @@ define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
|
|||
; SI: v_and_b32
|
||||
; SI: v_and_b32
|
||||
define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.a, align 8
|
||||
%gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
|
||||
|
@ -266,7 +266,7 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
|
|||
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
|
||||
; SI: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.a, align 8
|
||||
%and = and i64 %a, 1231231234567
|
||||
|
@ -322,7 +322,7 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
|
|||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.a, align 8
|
||||
%and = and i64 %a, 1234567
|
||||
|
@ -337,7 +337,7 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64
|
|||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2
|
||||
define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.a, align 8
|
||||
%and = and i64 %a, 64
|
||||
|
@ -353,7 +353,7 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
|
|||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.a, align 8
|
||||
%and = and i64 %a, -8
|
||||
|
|
|
@ -15,7 +15,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
|
|||
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
|
||||
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
|
||||
; SI-LABEL: s_ctlz_i32:
|
||||
|
@ -120,7 +120,7 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
|
|||
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
|
@ -195,7 +195,7 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
|
|||
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
|
||||
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
|
||||
|
@ -288,7 +288,7 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
|
|||
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
|
||||
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
|
||||
|
@ -576,7 +576,7 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
|
|||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
|
||||
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -663,7 +663,7 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
|
|||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z,
|
||||
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -729,7 +729,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
|
|||
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: -1(nan), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
|
@ -795,7 +795,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
|
|||
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: -1(nan), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
|
@ -872,7 +872,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
|
|||
; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: -1(nan), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
|
@ -948,7 +948,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
|
|||
; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: -1(nan), 2(2.802597e-45)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
|
@ -1017,7 +1017,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
|
|||
; EG-NEXT: MOV * T0.Z, 0.0,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i8, i8 addrspace(1)* %valptr.gep
|
||||
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
|
||||
|
@ -1160,7 +1160,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
|
|||
; EG-NEXT: MOV * T0.Z, 0.0,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i7, i7 addrspace(1)* %valptr.gep
|
||||
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
|
||||
|
|
|
@ -12,7 +12,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
|
|||
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
|
||||
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]],
|
||||
|
@ -36,7 +36,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
|||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -54,7 +54,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
|||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
|
||||
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
|
||||
|
@ -76,7 +76,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
|
|||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
|
||||
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
|
||||
|
@ -89,7 +89,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
|
|||
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_byte [[RESULT]],
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i8, i8 addrspace(1)* %in.gep
|
||||
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
||||
|
@ -131,7 +131,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
|
|||
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
|
||||
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}}
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -142,7 +142,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
|
|||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc:
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -157,7 +157,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
|
|||
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -172,7 +172,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
|
|||
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -187,7 +187,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
|
|||
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||
; GCN: {{buffer|flat}}_store_byte [[FFBH]],
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i8, i8 addrspace(1)* %valptr.gep
|
||||
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
||||
|
@ -206,7 +206,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
|
|||
; GCN-DAG: buffer_store_byte [[RESULT1]]
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -225,7 +225,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
|
|||
; GCN: v_cndmask
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -243,7 +243,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
|
|||
; GCN: v_cndmask
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -261,7 +261,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
|
|||
; GCN: v_cndmask
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -279,7 +279,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
|
|||
; GCN: v_cndmask
|
||||
; GCN: buffer_store_dword
|
||||
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
|
|
@ -8,7 +8,7 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
|
|||
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
|
||||
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i32:
|
||||
; GCN: s_load_dword [[SVAL:s[0-9]+]],
|
||||
|
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val)
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -55,7 +55,7 @@ define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrs
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid
|
||||
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid
|
||||
%val0 = load volatile i32, i32 addrspace(1)* %in0.gep, align 4
|
||||
|
@ -74,7 +74,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out,
|
|||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -91,7 +91,7 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out,
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
|
||||
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
|
||||
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
|
||||
|
@ -111,7 +111,7 @@ define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
|
||||
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
|
||||
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
|
||||
|
@ -139,7 +139,7 @@ define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid
|
||||
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32
|
||||
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
|
||||
|
@ -183,7 +183,7 @@ define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid
|
||||
%val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32
|
||||
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
|
||||
|
@ -199,7 +199,7 @@ define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out,
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -216,7 +216,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noa
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -233,7 +233,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)*
|
|||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -251,7 +251,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -269,7 +269,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
@ -290,7 +290,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %ou
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
|
||||
|
|
|
@ -8,7 +8,7 @@ declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
|
|||
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
|
||||
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_ctpop_i16:
|
||||
; GCN: s_load_dword [[SVAL:s[0-9]+]],
|
||||
|
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_ctpop_i16(i16 addrspace(1)* noalias %out, i16 %val)
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -55,7 +55,7 @@ define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrs
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid
|
||||
%in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid
|
||||
%val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4
|
||||
|
@ -74,7 +74,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out,
|
|||
; GCN: buffer_store_short [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -91,7 +91,7 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out,
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
|
||||
%val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8
|
||||
%ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone
|
||||
|
@ -111,7 +111,7 @@ define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
|
||||
%val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16
|
||||
%ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone
|
||||
|
@ -139,7 +139,7 @@ define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid
|
||||
%val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32
|
||||
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone
|
||||
|
@ -183,7 +183,7 @@ define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <
|
|||
; EG: BCNT_INT
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid
|
||||
%val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32
|
||||
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone
|
||||
|
@ -199,7 +199,7 @@ define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out,
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -216,7 +216,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noa
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -234,7 +234,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)*
|
|||
; GCN: buffer_store_short [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -252,7 +252,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %ou
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -270,7 +270,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
@ -291,7 +291,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %ou
|
|||
|
||||
; EG: BCNT_INT
|
||||
define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%val = load i16, i16 addrspace(1)* %in.gep, align 4
|
||||
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
|
||||
|
@ -34,7 +34,7 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, [8 x i32]
|
|||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep, align 8
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
|
@ -53,7 +53,7 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
|
|||
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep, align 8
|
||||
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
|
||||
|
@ -93,7 +93,7 @@ define amdgpu_kernel void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <
|
|||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid
|
||||
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16
|
||||
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
|
||||
|
@ -113,7 +113,7 @@ define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <
|
|||
; GCN: v_bcnt_u32_b32
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
|
||||
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32
|
||||
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
|
||||
|
@ -193,7 +193,7 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
|
|||
; GCN: buffer_store_dword [[RESULT]],
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid
|
||||
%val = load i128, i128 addrspace(1)* %in.gep, align 8
|
||||
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
|
||||
|
|
|
@ -9,7 +9,7 @@ declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
|
|||
declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
|
||||
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
|
||||
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
|
||||
; SI: s_load_dword [[VAL:s[0-9]+]],
|
||||
|
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
|||
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep, align 4
|
||||
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
|
|||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
|
||||
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
|
||||
|
@ -73,7 +73,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
|
|||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
; EG: FFBL_INT {{\*? *}}[[RESULT]]
|
||||
define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
|
||||
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
|
||||
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
|
||||
|
|
|
@ -13,7 +13,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
|||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_f32:
|
||||
; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
@ -86,7 +86,7 @@ define amdgpu_kernel void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float>
|
|||
; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
|
||||
; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
|
||||
define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -101,7 +101,7 @@ define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* no
|
|||
|
||||
; FUNC-LABEL: @fma_commute_mul_s_f32
|
||||
define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
|
||||
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -125,7 +125,7 @@ define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %ou
|
|||
; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
|
||||
define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind {
|
||||
bb:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
|
||||
%gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
|
@ -21,7 +21,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
|||
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -49,7 +49,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, fl
|
|||
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -76,7 +76,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)*
|
|||
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, fl
|
|||
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -127,7 +127,7 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, fl
|
|||
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -153,7 +153,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, fl
|
|||
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -186,7 +186,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)*
|
|||
|
||||
; GCN-NOT: v_max
|
||||
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -209,7 +209,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)*
|
|||
|
||||
; EG: MAX
|
||||
define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; The two inputs to the instruction are different SGPRs from the same
|
||||
; super register, so we can't fold both SGPR operands even though they
|
||||
|
@ -87,7 +87,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1
|
|||
|
||||
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -111,7 +111,7 @@ define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, fl
|
|||
|
||||
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -135,7 +135,7 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, fl
|
|||
|
||||
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -159,7 +159,7 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, fl
|
|||
|
||||
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -183,7 +183,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, fl
|
|||
|
||||
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -210,7 +210,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)*
|
|||
; GCN-NONAN: v_min_f32_e32
|
||||
; GCN-NONAN: v_min_f32_e32
|
||||
define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -243,7 +243,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)*
|
|||
; GCN-NONAN: v_min_f32_e32
|
||||
; GCN-NONAN-NOT: v_min_
|
||||
define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
@ -265,7 +265,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)*
|
|||
; GCN-NOT: v_min
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}u32_mad24:
|
||||
; EG: MULADD_UINT24
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
; EG: MAX_INT
|
||||
define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %aptr, align 4
|
||||
%b = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
|
@ -29,7 +29,7 @@ define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrs
|
|||
; EG: MAX_INT
|
||||
; EG: MAX_INT
|
||||
define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
|
||||
%a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4
|
||||
%b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4
|
||||
|
@ -105,7 +105,7 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %ou
|
|||
|
||||
; EG: MAX_INT
|
||||
define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %aptr, align 4
|
||||
%b = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
|
@ -131,7 +131,7 @@ define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i
|
|||
|
||||
; EG: MAX_UINT
|
||||
define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %aptr, align 4
|
||||
%b = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
|
@ -190,7 +190,7 @@ define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspac
|
|||
|
||||
; EG: MAX_UINT
|
||||
define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%a = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
%b = load i32, i32 addrspace(1)* %bptr, align 4
|
||||
|
@ -332,7 +332,7 @@ define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64
|
|||
}
|
||||
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
; EG: MIN_INT
|
||||
define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -164,7 +164,7 @@ define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <
|
|||
|
||||
; EG: MIN_INT
|
||||
define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -184,7 +184,7 @@ define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrs
|
|||
|
||||
; EG: MIN_INT
|
||||
define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
|
||||
|
@ -248,7 +248,7 @@ define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %
|
|||
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -271,7 +271,7 @@ define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrs
|
|||
; EG: MIN_UINT
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
|
||||
|
@ -305,7 +305,7 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <
|
|||
; EG: MIN_UINT
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
|
||||
|
@ -334,7 +334,7 @@ define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i
|
|||
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
|
||||
|
@ -358,7 +358,7 @@ define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrs
|
|||
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
|
||||
|
@ -606,7 +606,7 @@ define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64
|
|||
; EG: MIN_INT
|
||||
; EG: MIN_INT
|
||||
define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
|
||||
|
@ -631,7 +631,7 @@ define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <
|
|||
; EG: MIN_UINT
|
||||
; EG: MIN_UINT
|
||||
define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
|
||||
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
|
||||
%out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
|
||||
|
@ -643,7 +643,7 @@ define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -278,7 +278,7 @@ define amdgpu_kernel void @s_mul_i128(i128 addrspace(1)* %out, [8 x i32], i128 %
|
|||
|
||||
; GCN: {{buffer|flat}}_store_dwordx4
|
||||
define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid
|
||||
%gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid
|
||||
%gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid
|
||||
|
@ -289,7 +289,7 @@ define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}setcc_v2i32:
|
||||
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||
|
@ -349,7 +349,7 @@ entry:
|
|||
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
|
||||
%gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
|
||||
%gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
|
||||
|
@ -370,7 +370,7 @@ define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addr
|
|||
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
|
||||
%gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
|
||||
%gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
|
||||
|
|
|
@ -161,7 +161,7 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
|
|||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -188,7 +188,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 a
|
|||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -215,7 +215,7 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 a
|
|||
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -239,7 +239,7 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64
|
|||
; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -473,7 +473,7 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocaptu
|
|||
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -503,7 +503,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %o
|
|||
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
|
||||
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
|
@ -562,7 +562,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addr
|
|||
|
||||
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
|
||||
define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
|
||||
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
|
||||
|
||||
|
@ -583,7 +583,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addr
|
|||
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
|
||||
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
|
||||
define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
|
||||
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
|
||||
|
@ -715,7 +715,7 @@ define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %ou
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
; XUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=FUNC,GCN,VI
|
||||
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s -allow-deprecated-dag-overlap -check-prefixes=FUNC,EG
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
declare i32 @llvm.r600.read.tgid.x() #0
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
||||
|
||||
define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
|
||||
; GCN-LABEL: shl_v2i32:
|
||||
|
@ -341,7 +341,7 @@ define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 a
|
|||
; EG-NEXT: MOV * T0.Z, 0.0,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
|
||||
%b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1
|
||||
|
@ -467,7 +467,7 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
|
|||
; EG-NEXT: OR_INT T0.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
|
||||
%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
|
||||
|
@ -587,7 +587,7 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
|
|||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T7.X, PV.Y,
|
||||
; EG-NEXT: MOV * T10.X, T6.X,
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
|
||||
%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
|
||||
|
@ -905,7 +905,7 @@ define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)
|
|||
; EG-NEXT: LSHR T2.X, PV.W, literal.x,
|
||||
; EG-NEXT: MOV * T1.Y, T0.X,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
%tid = call i32 @llvm.r600.read.tgid.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workgroup.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.in
|
||||
|
|
|
@ -17,7 +17,7 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
|
|||
|
||||
; R600: INT_TO_FLT
|
||||
define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
|
|||
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
|
||||
%value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
|
||||
|
@ -106,7 +106,7 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
|
|||
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i1, i1 addrspace(1)* %in.gep
|
||||
|
@ -115,7 +115,7 @@ define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -28,7 +28,7 @@ define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind
|
|||
|
||||
; EG: MAX_INT
|
||||
define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
%neg = sub i32 0, %val
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
|
|||
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
|
||||
; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
|
||||
define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %gep.in, align 4
|
||||
%neg = sub i32 0, %val
|
||||
|
@ -100,7 +100,7 @@ define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a
|
|||
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1
|
||||
%t0 = insertelement <2 x i32> undef, i32 2, i32 0
|
||||
%t1 = insertelement <2 x i32> %t0, i32 2, i32 1
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid
|
||||
%val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4
|
||||
%neg = sub <2 x i32> %z1, %val
|
||||
|
@ -184,7 +184,7 @@ define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a
|
|||
%t1 = insertelement <4 x i32> %t0, i32 2, i32 1
|
||||
%t2 = insertelement <4 x i32> %t1, i32 2, i32 2
|
||||
%t3 = insertelement <4 x i32> %t2, i32 2, i32 3
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid
|
||||
%val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4
|
||||
%neg = sub <4 x i32> %z3, %val
|
||||
|
@ -268,7 +268,7 @@ define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrs
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -29,10 +29,8 @@
|
|||
; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
|
||||
define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset, i32 %tmp, i32 %tmp1, i32 %x.i.12.i) #0 {
|
||||
entry:
|
||||
%tmp = tail call i32 @llvm.r600.read.local.size.y()
|
||||
%tmp1 = tail call i32 @llvm.r600.read.local.size.z()
|
||||
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
|
||||
%tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z()
|
||||
|
@ -41,7 +39,6 @@ entry:
|
|||
%tmp11 = mul i32 %tmp10, %tmp1
|
||||
%tmp9 = add i32 %tmp11, %tmp4
|
||||
%x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
%x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
|
||||
%mul.26.i = mul i32 %x.i.12.i, %x.i.i
|
||||
%add.i = add i32 %tmp2, %mul.26.i
|
||||
%arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
|
||||
|
@ -78,25 +75,9 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.r600.read.local.size.z() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.z() #1
|
||||
|
||||
attributes #0 = { norecurse nounwind }
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
; FUNC-LABEL: {{^}}ashr_v2i32:
|
||||
; SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
@ -248,7 +248,7 @@ define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
|
|||
; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
|
||||
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
|
||||
define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.in
|
||||
|
@ -276,7 +276,7 @@ define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64
|
|||
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]]
|
||||
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}}
|
||||
define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.in
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
; FUNC-LABEL: {{^}}lshr_i32:
|
||||
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
@ -205,7 +205,7 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
|
|||
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}}
|
||||
define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
|
||||
%a = load i64, i64 addrspace(1)* %gep.in
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
||||
define amdgpu_kernel void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, [8 x i32], i64 %in) {
|
||||
; GCN-LABEL: {{^}}trunc_i64_to_i32_store:
|
||||
|
@ -113,7 +113,7 @@ define amdgpu_kernel void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, [8 x i32],
|
|||
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]]
|
||||
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
|
||||
define amdgpu_kernel void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||
%x = load i64, i64 addrspace(1)* %gep
|
||||
|
|
|
@ -25,7 +25,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
|
|||
; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]]
|
||||
; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]]
|
||||
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -55,7 +55,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
|
|||
; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]
|
||||
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i64, i64 addrspace(1)* %in.gep
|
||||
|
@ -73,7 +73,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
|
|||
|
||||
; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f32:
|
||||
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
|
||||
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
|
||||
|
@ -91,7 +91,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
|
|||
|
||||
; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f16:
|
||||
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
|
||||
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
|
||||
|
@ -100,7 +100,7 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -17,7 +17,7 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
|
|||
|
||||
; R600: INT_TO_FLT
|
||||
define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i32, i32 addrspace(1)* %in.gep
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
|
|||
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
|
||||
%value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
|
||||
|
@ -106,7 +106,7 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out,
|
|||
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%val = load i1, i1 addrspace(1)* %in.gep
|
||||
|
@ -133,7 +133,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
Loading…
Reference in New Issue