AMDGPU: Move R600 test compatability hack

Instead of handling the r600 intrinsics on amdgcn, handle the amdgcn
intrinsics on r600.
This commit is contained in:
Matt Arsenault 2020-02-09 16:38:56 -05:00
parent f319074824
commit 7af7b96a9b
28 changed files with 169 additions and 285 deletions

View File

@ -615,21 +615,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, 8); return LowerImplicitParameter(DAG, VT, DL, 8);
case Intrinsic::r600_read_tgid_x: case Intrinsic::r600_read_tgid_x:
case Intrinsic::amdgcn_workgroup_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_X, VT); R600::T1_X, VT);
case Intrinsic::r600_read_tgid_y: case Intrinsic::r600_read_tgid_y:
case Intrinsic::amdgcn_workgroup_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Y, VT); R600::T1_Y, VT);
case Intrinsic::r600_read_tgid_z: case Intrinsic::r600_read_tgid_z:
case Intrinsic::amdgcn_workgroup_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Z, VT); R600::T1_Z, VT);
case Intrinsic::r600_read_tidig_x: case Intrinsic::r600_read_tidig_x:
case Intrinsic::amdgcn_workitem_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_X, VT); R600::T0_X, VT);
case Intrinsic::r600_read_tidig_y: case Intrinsic::r600_read_tidig_y:
case Intrinsic::amdgcn_workitem_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Y, VT); R600::T0_Y, VT);
case Intrinsic::r600_read_tidig_z: case Intrinsic::r600_read_tidig_z:
case Intrinsic::amdgcn_workitem_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Z, VT); R600::T0_Z, VT);

View File

@ -5807,29 +5807,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerImplicitZextParam(DAG, Op, MVT::i16, return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z); SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x: case Intrinsic::amdgcn_workgroup_id_x:
case Intrinsic::r600_read_tgid_x:
return getPreloadedValue(DAG, *MFI, VT, return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X); AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y: case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
return getPreloadedValue(DAG, *MFI, VT, return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return getPreloadedValue(DAG, *MFI, VT, return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX); MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDY); MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ); MFI->getArgInfo().WorkItemIDZ);

View File

@ -139,95 +139,6 @@ entry:
ret void ret void
} }
; Legacy use of r600 intrinsics by GCN
; The tgid values are stored in sgprs offset by the number of user
; sgprs.
; FUNC-LABEL: {{^}}tgid_x_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}tgid_y_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}tgid_z_legacy:
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
; GCN-NOHSA: buffer_store_dword [[VVAL]]
; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 132{{$}}
; FUNC-LABEL: {{^}}tidig_x_legacy:
; GCN-NOHSA: buffer_store_dword v0
define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 2180{{$}}
; FUNC-LABEL: {{^}}tidig_y_legacy:
; GCN-NOHSA: buffer_store_dword v1
define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
; GCN-NOHSA: .section .AMDGPU.config
; GCN-NOHSA: .long 47180
; GCN-NOHSA-NEXT: .long 4228{{$}}
; FUNC-LABEL: {{^}}tidig_z_legacy:
; GCN-NOHSA: buffer_store_dword v2
define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
declare i32 @llvm.r600.read.ngroups.x() #0 declare i32 @llvm.r600.read.ngroups.x() #0
declare i32 @llvm.r600.read.ngroups.y() #0 declare i32 @llvm.r600.read.ngroups.y() #0
declare i32 @llvm.r600.read.ngroups.z() #0 declare i32 @llvm.r600.read.ngroups.z() #0
@ -240,12 +151,4 @@ declare i32 @llvm.r600.read.local.size.x() #0
declare i32 @llvm.r600.read.local.size.y() #0 declare i32 @llvm.r600.read.local.size.y() #0
declare i32 @llvm.r600.read.local.size.z() #0 declare i32 @llvm.r600.read.local.size.z() #0
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
declare i32 @llvm.r600.read.tgid.z() #0
declare i32 @llvm.r600.read.tidig.x() #0
declare i32 @llvm.r600.read.tidig.y() #0
declare i32 @llvm.r600.read.tidig.z() #0
attributes #0 = { readnone } attributes #0 = { readnone }

View File

@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
; FUNC-LABEL: {{^}}test2: ; FUNC-LABEL: {{^}}test2:
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@ -96,7 +96,7 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out
; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr: ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@ -112,7 +112,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrs
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) { define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%b = load i32, i32 addrspace(1)* %gep.b %b = load i32, i32 addrspace(1)* %gep.b
@ -126,7 +126,7 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) { define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%a = load i32, i32 addrspace(1)* %gep.a %a = load i32, i32 addrspace(1)* %gep.a
@ -138,7 +138,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrs
; FUNC-LABEL: {{^}}v_and_constant_i32 ; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4 %a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 1234567 %and = and i32 %a, 1234567
@ -149,7 +149,7 @@ define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrsp
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4 %a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, 64 %and = and i32 %a, 64
@ -160,7 +160,7 @@ define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 a
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep, align 4 %a = load i32, i32 addrspace(1)* %gep, align 4
%and = and i32 %a, -16 %and = and i32 %a, -16
@ -251,7 +251,7 @@ define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI: v_and_b32 ; SI: v_and_b32
; SI: v_and_b32 ; SI: v_and_b32
define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8 %a = load i64, i64 addrspace(1)* %gep.a, align 8
%gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
@ -266,7 +266,7 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}} ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
; SI: buffer_store_dwordx2 ; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8 %a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1231231234567 %and = and i64 %a, 1231231234567
@ -322,7 +322,7 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI-NOT: and ; SI-NOT: and
; SI: buffer_store_dwordx2 ; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8 %a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 1234567 %and = and i64 %a, 1234567
@ -337,7 +337,7 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64
; SI-NOT: and ; SI-NOT: and
; SI: buffer_store_dwordx2 ; SI: buffer_store_dwordx2
define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8 %a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, 64 %and = and i64 %a, 64
@ -353,7 +353,7 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
; SI-NOT: and ; SI-NOT: and
; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.a, align 8 %a = load i64, i64 addrspace(1)* %gep.a, align 8
%and = and i64 %a, -8 %and = and i64 %a, -8

View File

@ -15,7 +15,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; SI-LABEL: s_ctlz_i32: ; SI-LABEL: s_ctlz_i32:
@ -120,7 +120,7 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -195,7 +195,7 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
@ -288,7 +288,7 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
@ -576,7 +576,7 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
; EG-NEXT: LSHR * T1.X, PV.W, literal.x, ; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -663,7 +663,7 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z, ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z,
; EG-NEXT: LSHR * T1.X, PV.W, literal.x, ; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -729,7 +729,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45) ; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -795,7 +795,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45) ; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -872,7 +872,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x, ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45) ; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -948,7 +948,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W, ; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45) ; EG-NEXT: -1(nan), 2(2.802597e-45)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
@ -1017,7 +1017,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %valptr.gep %val = load i8, i8 addrspace(1)* %valptr.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
@ -1160,7 +1160,7 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
%val = load i7, i7 addrspace(1)* %valptr.gep %val = load i7, i7 addrspace(1)* %valptr.gep
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone

View File

@ -12,7 +12,7 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32: ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
; GCN: s_load_dword [[VAL:s[0-9]+]], ; GCN: s_load_dword [[VAL:s[0-9]+]],
@ -36,7 +36,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -54,7 +54,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
@ -76,7 +76,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
@ -89,7 +89,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_byte [[RESULT]], ; GCN: buffer_store_byte [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %in.gep %val = load i8, i8 addrspace(1)* %in.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
@ -131,7 +131,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}}
define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -142,7 +142,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc: ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc:
define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -157,7 +157,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -172,7 +172,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -187,7 +187,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] ; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
; GCN: {{buffer|flat}}_store_byte [[FFBH]], ; GCN: {{buffer|flat}}_store_byte [[FFBH]],
define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
%val = load i8, i8 addrspace(1)* %valptr.gep %val = load i8, i8 addrspace(1)* %valptr.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
@ -206,7 +206,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
; GCN-DAG: buffer_store_byte [[RESULT1]] ; GCN-DAG: buffer_store_byte [[RESULT1]]
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -225,7 +225,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
; GCN: v_cndmask ; GCN: v_cndmask
; GCN: buffer_store_dword ; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -243,7 +243,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
; GCN: v_cndmask ; GCN: v_cndmask
; GCN: buffer_store_dword ; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -261,7 +261,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
; GCN: v_cndmask ; GCN: v_cndmask
; GCN: buffer_store_dword ; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
@ -279,7 +279,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
; GCN: v_cndmask ; GCN: v_cndmask
; GCN: buffer_store_dword ; GCN: buffer_store_dword
define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone

View File

@ -8,7 +8,7 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32: ; FUNC-LABEL: {{^}}s_ctpop_i32:
; GCN: s_load_dword [[SVAL:s[0-9]+]], ; GCN: s_load_dword [[SVAL:s[0-9]+]],
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val)
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -55,7 +55,7 @@ define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrs
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid
%val0 = load volatile i32, i32 addrspace(1)* %in0.gep, align 4 %val0 = load volatile i32, i32 addrspace(1)* %in0.gep, align 4
@ -74,7 +74,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out,
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind { define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -91,7 +91,7 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out,
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
@ -111,7 +111,7 @@ define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
@ -139,7 +139,7 @@ define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid
%val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
@ -183,7 +183,7 @@ define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid
%val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32 %val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
@ -199,7 +199,7 @@ define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out,
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -216,7 +216,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noa
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -233,7 +233,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)*
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -251,7 +251,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -269,7 +269,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@ -290,7 +290,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %ou
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone

View File

@ -8,7 +8,7 @@ declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i16: ; FUNC-LABEL: {{^}}s_ctpop_i16:
; GCN: s_load_dword [[SVAL:s[0-9]+]], ; GCN: s_load_dword [[SVAL:s[0-9]+]],
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_ctpop_i16(i16 addrspace(1)* noalias %out, i16 %val)
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -55,7 +55,7 @@ define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrs
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind { define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid %in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid
%in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid %in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid
%val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4 %val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4
@ -74,7 +74,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out,
; GCN: buffer_store_short [[RESULT]], ; GCN: buffer_store_short [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind { define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -91,7 +91,7 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out,
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
%val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8 %val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8
%ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone
@ -111,7 +111,7 @@ define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
%val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16 %val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16
%ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone
@ -139,7 +139,7 @@ define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid
%val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32 %val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32
%ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone
@ -183,7 +183,7 @@ define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <
; EG: BCNT_INT ; EG: BCNT_INT
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid
%val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32 %val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32
%ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone
@ -199,7 +199,7 @@ define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out,
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -216,7 +216,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noa
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -234,7 +234,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)*
; GCN: buffer_store_short [[RESULT]], ; GCN: buffer_store_short [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -252,7 +252,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %ou
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -270,7 +270,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone
@ -291,7 +291,7 @@ define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %ou
; EG: BCNT_INT ; EG: BCNT_INT
define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind { define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%val = load i16, i16 addrspace(1)* %in.gep, align 4 %val = load i16, i16 addrspace(1)* %in.gep, align 4
%ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@ -34,7 +34,7 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, [8 x i32]
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep, align 8 %val = load i64, i64 addrspace(1)* %in.gep, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@ -53,7 +53,7 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs
; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep, align 8 %val = load i64, i64 addrspace(1)* %in.gep, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@ -93,7 +93,7 @@ define amdgpu_kernel void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <
; GCN: v_bcnt_u32_b32 ; GCN: v_bcnt_u32_b32
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16 %val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@ -113,7 +113,7 @@ define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <
; GCN: v_bcnt_u32_b32 ; GCN: v_bcnt_u32_b32
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@ -193,7 +193,7 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val)
; GCN: buffer_store_dword [[RESULT]], ; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind { define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid
%val = load i128, i128 addrspace(1)* %in.gep, align 8 %val = load i128, i128 addrspace(1)* %in.gep, align 8
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone %ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone

View File

@ -9,7 +9,7 @@ declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32: ; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
; SI: s_load_dword [[VAL:s[0-9]+]], ; SI: s_load_dword [[VAL:s[0-9]+]],
@ -33,7 +33,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep, align 4 %val = load i32, i32 addrspace(1)* %in.gep, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
@ -51,7 +51,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
@ -73,7 +73,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]]
define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone

View File

@ -13,7 +13,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}fma_f32: ; FUNC-LABEL: {{^}}fma_f32:
; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
@ -86,7 +86,7 @@ define amdgpu_kernel void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float>
; FUNC-LABEL: @fma_commute_mul_inline_imm_f32 ; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}} ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -101,7 +101,7 @@ define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* no
; FUNC-LABEL: @fma_commute_mul_s_f32 ; FUNC-LABEL: @fma_commute_mul_s_f32
define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind { define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@ -125,7 +125,7 @@ define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %ou
; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 ; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind { define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind {
bb: bb:
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64 %tid.ext = sext i32 %tid to i64
%gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext %gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
%gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext %gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext

View File

@ -6,7 +6,7 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32: ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
@ -21,7 +21,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -49,7 +49,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, fl
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -76,7 +76,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)*
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -127,7 +127,7 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -153,7 +153,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
@ -186,7 +186,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)*
; GCN-NOT: v_max ; GCN-NOT: v_max
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
@ -209,7 +209,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)*
; EG: MAX ; EG: MAX
define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1

View File

@ -6,7 +6,7 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
; The two inputs to the instruction are different SGPRs from the same ; The two inputs to the instruction are different SGPRs from the same
; super register, so we can't fold both SGPR operands even though they ; super register, so we can't fold both SGPR operands even though they
@ -87,7 +87,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(float addrspace(1
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -111,7 +111,7 @@ define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -135,7 +135,7 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -159,7 +159,7 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
@ -183,7 +183,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, fl
; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
@ -210,7 +210,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)*
; GCN-NONAN: v_min_f32_e32 ; GCN-NONAN: v_min_f32_e32
; GCN-NONAN: v_min_f32_e32 ; GCN-NONAN: v_min_f32_e32
define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
@ -243,7 +243,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)*
; GCN-NONAN: v_min_f32_e32 ; GCN-NONAN: v_min_f32_e32
; GCN-NONAN-NOT: v_min_ ; GCN-NONAN-NOT: v_min_
define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
@ -265,7 +265,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)*
; GCN-NOT: v_min ; GCN-NOT: v_min
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
%gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1

View File

@ -4,7 +4,7 @@
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 ; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}u32_mad24: ; FUNC-LABEL: {{^}}u32_mad24:
; EG: MULADD_UINT24 ; EG: MULADD_UINT24

View File

@ -7,7 +7,7 @@
; EG: MAX_INT ; EG: MAX_INT
define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
%a = load i32, i32 addrspace(1)* %aptr, align 4 %a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %gep.in, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4
@ -29,7 +29,7 @@ define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrs
; EG: MAX_INT ; EG: MAX_INT
; EG: MAX_INT ; EG: MAX_INT
define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
%a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4 %a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4
%b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4
@ -105,7 +105,7 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %ou
; EG: MAX_INT ; EG: MAX_INT
define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
%a = load i32, i32 addrspace(1)* %aptr, align 4 %a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %gep.in, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4
@ -131,7 +131,7 @@ define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i
; EG: MAX_UINT ; EG: MAX_UINT
define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
%a = load i32, i32 addrspace(1)* %aptr, align 4 %a = load i32, i32 addrspace(1)* %aptr, align 4
%b = load i32, i32 addrspace(1)* %gep.in, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4
@ -190,7 +190,7 @@ define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspac
; EG: MAX_UINT ; EG: MAX_UINT
define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
%a = load i32, i32 addrspace(1)* %gep.in, align 4 %a = load i32, i32 addrspace(1)* %gep.in, align 4
%b = load i32, i32 addrspace(1)* %bptr, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4
@ -332,7 +332,7 @@ define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64
} }
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone } attributes #0 = { nounwind readnone }
attributes #1 = { nounwind } attributes #1 = { nounwind }

View File

@ -9,7 +9,7 @@
; EG: MIN_INT ; EG: MIN_INT
define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
@ -164,7 +164,7 @@ define amdgpu_kernel void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <
; EG: MIN_INT ; EG: MIN_INT
define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
@ -184,7 +184,7 @@ define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrs
; EG: MIN_INT ; EG: MIN_INT
define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid
%out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
@ -248,7 +248,7 @@ define amdgpu_kernel void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
@ -271,7 +271,7 @@ define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrs
; EG: MIN_UINT ; EG: MIN_UINT
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
@ -305,7 +305,7 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <
; EG: MIN_UINT ; EG: MIN_UINT
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid
@ -334,7 +334,7 @@ define amdgpu_kernel void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
@ -358,7 +358,7 @@ define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrs
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid
@ -606,7 +606,7 @@ define amdgpu_kernel void @test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64
; EG: MIN_INT ; EG: MIN_INT
; EG: MIN_INT ; EG: MIN_INT
define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@ -631,7 +631,7 @@ define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <
; EG: MIN_UINT ; EG: MIN_UINT
; EG: MIN_UINT ; EG: MIN_UINT
define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid
%b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid
%out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@ -643,7 +643,7 @@ define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } attributes #1 = { nounwind readnone }

View File

@ -278,7 +278,7 @@ define amdgpu_kernel void @s_mul_i128(i128 addrspace(1)* %out, [8 x i32], i128 %
; GCN: {{buffer|flat}}_store_dwordx4 ; GCN: {{buffer|flat}}_store_dwordx4
define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid %gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid
%gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid %gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid
%gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid %gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid
@ -289,7 +289,7 @@ define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone} attributes #1 = { nounwind readnone}

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; FUNC-LABEL: {{^}}setcc_v2i32: ; FUNC-LABEL: {{^}}setcc_v2i32:
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
@ -349,7 +349,7 @@ entry:
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 { define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
%gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
%gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
@ -370,7 +370,7 @@ define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addr
; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 { define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
%gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
%gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid

View File

@ -161,7 +161,7 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -188,7 +188,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 a
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -215,7 +215,7 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 a
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -239,7 +239,7 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64
; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -473,7 +473,7 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocaptu
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -503,7 +503,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %o
; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@ -562,7 +562,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addr
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 { define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
@ -583,7 +583,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addr
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind { define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
%b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
@ -715,7 +715,7 @@ define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(<3 x i16> addrspace(1)* %ou
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } attributes #1 = { nounwind readnone }

View File

@ -3,9 +3,9 @@
; XUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=FUNC,GCN,VI ; XUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=FUNC,GCN,VI
; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s -allow-deprecated-dag-overlap -check-prefixes=FUNC,EG ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s -allow-deprecated-dag-overlap -check-prefixes=FUNC,EG
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.amdgcn.workgroup.id.x() #0
define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
; GCN-LABEL: shl_v2i32: ; GCN-LABEL: shl_v2i32:
@ -341,7 +341,7 @@ define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 a
; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
%b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1
@ -467,7 +467,7 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
; EG-NEXT: OR_INT T0.X, PV.W, PS, ; EG-NEXT: OR_INT T0.X, PV.W, PS,
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, ; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
@ -587,7 +587,7 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T7.X, PV.Y, ; EG-NEXT: MOV T7.X, PV.Y,
; EG-NEXT: MOV * T10.X, T6.X, ; EG-NEXT: MOV * T10.X, T6.X,
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
%gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
@ -905,7 +905,7 @@ define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)
; EG-NEXT: LSHR T2.X, PV.W, literal.x, ; EG-NEXT: LSHR T2.X, PV.W, literal.x,
; EG-NEXT: MOV * T1.Y, T0.X, ; EG-NEXT: MOV * T1.Y, T0.X,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.r600.read.tgid.x() #0 %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.in %a = load i64, i64 addrspace(1)* %gep.in

View File

@ -17,7 +17,7 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; R600: INT_TO_FLT ; R600: INT_TO_FLT
define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
@ -67,7 +67,7 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
%value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
@ -106,7 +106,7 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm ; SI: s_endpgm
define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i1, i1 addrspace(1)* %in.gep %val = load i1, i1 addrspace(1)* %in.gep
@ -115,7 +115,7 @@ define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } attributes #1 = { nounwind readnone }

View File

@ -28,7 +28,7 @@ define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind
; EG: MAX_INT ; EG: MAX_INT
define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
%val = load i32, i32 addrspace(1)* %gep.in, align 4 %val = load i32, i32 addrspace(1)* %gep.in, align 4
%neg = sub i32 0, %val %neg = sub i32 0, %val
@ -45,7 +45,7 @@ define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]] ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
%val = load i32, i32 addrspace(1)* %gep.in, align 4 %val = load i32, i32 addrspace(1)* %gep.in, align 4
%neg = sub i32 0, %val %neg = sub i32 0, %val
@ -100,7 +100,7 @@ define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a
%z1 = insertelement <2 x i32> %z0, i32 0, i32 1 %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
%t0 = insertelement <2 x i32> undef, i32 2, i32 0 %t0 = insertelement <2 x i32> undef, i32 2, i32 0
%t1 = insertelement <2 x i32> %t0, i32 2, i32 1 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid %gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid
%val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4 %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4
%neg = sub <2 x i32> %z1, %val %neg = sub <2 x i32> %z1, %val
@ -184,7 +184,7 @@ define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a
%t1 = insertelement <4 x i32> %t0, i32 2, i32 1 %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
%t2 = insertelement <4 x i32> %t1, i32 2, i32 2 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
%t3 = insertelement <4 x i32> %t2, i32 2, i32 3 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid
%val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 %val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4
%neg = sub <4 x i32> %z3, %val %neg = sub <4 x i32> %z3, %val
@ -268,7 +268,7 @@ define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrs
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone } attributes #0 = { nounwind readnone }
attributes #1 = { nounwind } attributes #1 = { nounwind }

View File

@ -29,10 +29,8 @@
; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24 ; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24
; GCN: s_endpgm ; GCN: s_endpgm
define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 { define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset, i32 %tmp, i32 %tmp1, i32 %x.i.12.i) #0 {
entry: entry:
%tmp = tail call i32 @llvm.r600.read.local.size.y()
%tmp1 = tail call i32 @llvm.r600.read.local.size.z()
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z()
@ -41,7 +39,6 @@ entry:
%tmp11 = mul i32 %tmp10, %tmp1 %tmp11 = mul i32 %tmp10, %tmp1
%tmp9 = add i32 %tmp11, %tmp4 %tmp9 = add i32 %tmp11, %tmp4
%x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 %x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
%x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
%mul.26.i = mul i32 %x.i.12.i, %x.i.i %mul.26.i = mul i32 %x.i.12.i, %x.i.i
%add.i = add i32 %tmp2, %mul.26.i %add.i = add i32 %tmp2, %mul.26.i
%arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
@ -78,25 +75,9 @@ entry:
ret void ret void
} }
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workgroup.id.x() #1 declare i32 @llvm.amdgcn.workgroup.id.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.y() #1 declare i32 @llvm.amdgcn.workitem.id.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.z() #1 declare i32 @llvm.amdgcn.workitem.id.z() #1
attributes #0 = { norecurse nounwind } attributes #0 = { norecurse nounwind }

View File

@ -2,7 +2,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
; FUNC-LABEL: {{^}}ashr_v2i32: ; FUNC-LABEL: {{^}}ashr_v2i32:
; SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
@ -248,7 +248,7 @@ define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]]
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}} ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}}
define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.in %a = load i64, i64 addrspace(1)* %gep.in
@ -276,7 +276,7 @@ define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64
; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]] ; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]]
; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}} ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}}
define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.in %a = load i64, i64 addrspace(1)* %gep.in

View File

@ -2,7 +2,7 @@
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.amdgcn.workitem.id.x() #0
; FUNC-LABEL: {{^}}lshr_i32: ; FUNC-LABEL: {{^}}lshr_i32:
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
@ -205,7 +205,7 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}} ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}}
; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}} ; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}}
define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() #0 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
%a = load i64, i64 addrspace(1)* %gep.in %a = load i64, i64 addrspace(1)* %gep.in

View File

@ -2,7 +2,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
define amdgpu_kernel void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, [8 x i32], i64 %in) { define amdgpu_kernel void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, [8 x i32], i64 %in) {
; GCN-LABEL: {{^}}trunc_i64_to_i32_store: ; GCN-LABEL: {{^}}trunc_i64_to_i32_store:
@ -113,7 +113,7 @@ define amdgpu_kernel void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, [8 x i32],
; GCN: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]] ; GCN: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]]
; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
define amdgpu_kernel void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) { define amdgpu_kernel void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
%x = load i64, i64 addrspace(1)* %gep %x = load i64, i64 addrspace(1)* %gep

View File

@ -25,7 +25,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64
; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]] ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]]
; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]] ; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]]
define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -55,7 +55,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64
; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]]
; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]] ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]]
define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i64, i64 addrspace(1)* %in.gep %val = load i64, i64 addrspace(1)* %in.gep
@ -73,7 +73,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(<2 x float> addrspace(1)*
; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f32: ; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f32:
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
@ -91,7 +91,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(<2 x half> addrspace(1)*
; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f16: ; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f16:
define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid
%value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
@ -100,7 +100,7 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)*
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } attributes #1 = { nounwind readnone }

View File

@ -17,7 +17,7 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32
; R600: INT_TO_FLT ; R600: INT_TO_FLT
define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i32, i32 addrspace(1)* %in.gep %val = load i32, i32 addrspace(1)* %in.gep
@ -67,7 +67,7 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)*
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
%out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
%value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
@ -106,7 +106,7 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out,
; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
; SI: s_endpgm ; SI: s_endpgm
define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
%val = load i1, i1 addrspace(1)* %in.gep %val = load i1, i1 addrspace(1)* %in.gep
@ -133,7 +133,7 @@ entry:
ret void ret void
} }
declare i32 @llvm.r600.read.tidig.x() #1 declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { nounwind readnone } attributes #1 = { nounwind readnone }