[AMDGPU][GlobalISel] Better legalization of 32-bit ctlz/cttz

Differential Revision: https://reviews.llvm.org/D107474
This commit is contained in:
Jay Foad 2021-08-03 17:11:08 +01:00
parent 24b67a9024
commit 83610d4eb0
13 changed files with 319 additions and 349 deletions

View File

@ -940,7 +940,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(1, S32, S64) .clampScalar(1, S32, S64)
.widenScalarToNextPow2(0, 32) .widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32) .widenScalarToNextPow2(1, 32)
.lower(); .custom();
// The 64-bit versions produce 32-bit results, but only on the SALU. // The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}) getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF})
@ -1758,6 +1758,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeFFloor(MI, MRI, B); return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR: case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, B); return legalizeBuildVector(MI, MRI, B);
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
return legalizeCTLZ_CTTZ(MI, MRI, B);
default: default:
return false; return false;
} }
@ -2779,6 +2782,27 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true; return true;
} }
// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
// ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input
// case with a single min instruction instead of a compare+select.
bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
unsigned NewOpc = MI.getOpcode() == AMDGPU::G_CTLZ
? AMDGPU::G_AMDGPU_FFBH_U32
: AMDGPU::G_AMDGPU_FFBL_B32;
auto Tmp = B.buildInstr(NewOpc, {DstTy}, {Src});
B.buildUMin(Dst, Tmp, B.buildConstant(DstTy, SrcTy.getSizeInBits()));
MI.eraseFromParent();
return true;
}
// Check that this is a G_XOR x, -1 // Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_XOR) if (MI.getOpcode() != TargetOpcode::G_XOR)

View File

@ -89,6 +89,8 @@ public:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const; MachineIRBuilder &B) const;
bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B, bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg, const ArgDescriptor *Arg,

View File

@ -2513,6 +2513,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
llvm_unreachable("narrowScalar should have succeeded"); llvm_unreachable("narrowScalar should have succeeded");
return; return;
} }
case AMDGPU::G_AMDGPU_FFBH_U32:
case AMDGPU::G_AMDGPU_FFBL_B32:
case AMDGPU::G_CTLZ_ZERO_UNDEF: case AMDGPU::G_CTLZ_ZERO_UNDEF:
case AMDGPU::G_CTTZ_ZERO_UNDEF: { case AMDGPU::G_CTTZ_ZERO_UNDEF: {
const RegisterBank *DstBank = const RegisterBank *DstBank =
@ -2528,18 +2530,26 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// We can narrow this more efficiently than Helper can by using ffbh/ffbl // We can narrow this more efficiently than Helper can by using ffbh/ffbl
// which return -1 when the input is zero: // which return -1 when the input is zero:
// (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), 32 + (ffbh lo)) // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
// (cttz_zero_undef hi:lo) -> (umin 32 + (ffbl hi), (ffbl lo)) // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
// (ffbh hi:lo) -> (umin (ffbh hi), (uaddsat (ffbh lo), 32))
// (ffbl hi:lo) -> (umin (uaddsat (ffbh hi), 32), (ffbh lo))
ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank); ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
MachineIRBuilder B(MI, ApplyVALU); MachineIRBuilder B(MI, ApplyVALU);
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1)); SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
? AMDGPU::G_AMDGPU_FFBH_U32 ? AMDGPU::G_AMDGPU_FFBH_U32
: AMDGPU::G_AMDGPU_FFBL_B32; : Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
unsigned Idx = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF; ? AMDGPU::G_AMDGPU_FFBL_B32
: Opc;
unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]}); auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]});
auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]}); auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]});
Y = B.buildAdd(S32, Y, B.buildConstant(S32, 32)); unsigned AddOpc =
Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
? AMDGPU::G_ADD
: AMDGPU::G_UADDSAT;
Y = B.buildInstr(AddOpc, {S32}, {Y, B.buildConstant(S32, 32)});
Register DstReg = MI.getOperand(0).getReg(); Register DstReg = MI.getOperand(0).getReg();
B.buildUMin(DstReg, X, Y); B.buildUMin(DstReg, X, Y);
MI.eraseFromParent(); MI.eraseFromParent();
@ -3651,8 +3661,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar? case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar case AMDGPU::G_FSHR: // TODO: Expand for scalar
case AMDGPU::G_AMDGPU_FFBH_U32:
case AMDGPU::G_AMDGPU_FFBL_B32:
case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG: case AMDGPU::G_AMDGPU_RCP_IFLAG:
@ -3758,6 +3766,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break; break;
} }
case AMDGPU::G_AMDGPU_FFBH_U32:
case AMDGPU::G_AMDGPU_FFBL_B32:
case AMDGPU::G_CTLZ_ZERO_UNDEF: case AMDGPU::G_CTLZ_ZERO_UNDEF:
case AMDGPU::G_CTTZ_ZERO_UNDEF: { case AMDGPU::G_CTTZ_ZERO_UNDEF: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

View File

@ -1123,16 +1123,33 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; SI-LABEL: v_test_uitofp_i64_byte_to_f32: ; SI-LABEL: v_test_uitofp_i64_byte_to_f32:
; SI: ; %bb.0: ; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_ffbh_u32_e32 v2, 0
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_ldexp_f32_e64 v0, v0, 0 ; SI-NEXT: v_min_u32_e32 v2, 32, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v2
; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1
; SI-NEXT: s_setpc_b64 s[30:31] ; SI-NEXT: s_setpc_b64 s[30:31]
; ;
; VI-LABEL: v_test_uitofp_i64_byte_to_f32: ; VI-LABEL: v_test_uitofp_i64_byte_to_f32:
; VI: ; %bb.0: ; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; VI-NEXT: v_ffbh_u32_e32 v2, 0
; VI-NEXT: v_ldexp_f32 v0, v0, 0 ; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
; VI-NEXT: v_ldexp_f32 v0, v0, v1
; VI-NEXT: s_setpc_b64 s[30:31] ; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i64 %arg0, 255 %masked = and i64 %arg0, 255
%itofp = uitofp i64 %masked to float %itofp = uitofp i64 %masked to float

View File

@ -9,12 +9,10 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; CHECK-LABEL: name: ctlz_s32_s32 ; CHECK-LABEL: name: ctlz_s32_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: $vgpr0 = COPY [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]]
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CTLZ %0 %1:_(s32) = G_CTLZ %0
$vgpr0 = COPY %1 $vgpr0 = COPY %1
@ -28,12 +26,10 @@ body: |
liveins: $vgpr0_vgpr1 liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: ctlz_s32_s64 ; CHECK-LABEL: name: ctlz_s32_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: $vgpr0 = COPY [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]]
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CTLZ %0 %1:_(s32) = G_CTLZ %0
$vgpr0 = COPY %1 $vgpr0 = COPY %1
@ -47,12 +43,10 @@ body: |
liveins: $vgpr0_vgpr1 liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: ctlz_s64_s64 ; CHECK-LABEL: name: ctlz_s64_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CTLZ %0 %1:_(s64) = G_CTLZ %0
@ -67,14 +61,12 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; CHECK-LABEL: name: ctlz_s16_s32 ; CHECK-LABEL: name: ctlz_s16_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr0 = COPY [[AND]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s16) = G_CTLZ %0 %1:_(s16) = G_CTLZ %0
@ -93,13 +85,11 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]]
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C3]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
@ -120,15 +110,12 @@ body: |
; CHECK-LABEL: name: ctlz_v2s32_v2s32 ; CHECK-LABEL: name: ctlz_v2s32_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTLZ_ZERO_UNDEF1]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_CTLZ %0 %1:_(<2 x s32>) = G_CTLZ %0
@ -144,15 +131,12 @@ body: |
; CHECK-LABEL: name: ctlz_v2s32_v2s64 ; CHECK-LABEL: name: ctlz_v2s32_v2s64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s64)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s64), [[C]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTLZ_ZERO_UNDEF1]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = G_CTLZ %0 %1:_(<2 x s32>) = G_CTLZ %0
@ -173,19 +157,16 @@ body: |
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C2]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]]
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]]
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTLZ_ZERO_UNDEF]]
; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) ; CHECK: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND1]](s32)
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s32), [[C2]] ; CHECK: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[CTLZ_ZERO_UNDEF1]] ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]]
; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C]]
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]]
@ -212,13 +193,11 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]]
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C3]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
@ -242,15 +221,13 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s64)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64) ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32)

View File

@ -9,12 +9,10 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; CHECK-LABEL: name: cttz_s32_s32 ; CHECK-LABEL: name: cttz_s32_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: $vgpr0 = COPY [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]]
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CTTZ %0 %1:_(s32) = G_CTTZ %0
$vgpr0 = COPY %1 $vgpr0 = COPY %1
@ -28,12 +26,10 @@ body: |
liveins: $vgpr0_vgpr1 liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: cttz_s32_s64 ; CHECK-LABEL: name: cttz_s32_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: $vgpr0 = COPY [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]]
; CHECK: $vgpr0 = COPY [[SELECT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CTTZ %0 %1:_(s32) = G_CTTZ %0
$vgpr0 = COPY %1 $vgpr0 = COPY %1
@ -47,12 +43,10 @@ body: |
liveins: $vgpr0_vgpr1 liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: cttz_s64_s64 ; CHECK-LABEL: name: cttz_s64_s64
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UMIN]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CTTZ %0 %1:_(s64) = G_CTTZ %0
@ -67,14 +61,12 @@ body: |
liveins: $vgpr0 liveins: $vgpr0
; CHECK-LABEL: name: cttz_s16_s32 ; CHECK-LABEL: name: cttz_s16_s32
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
; CHECK: $vgpr0 = COPY [[AND]](s32) ; CHECK: $vgpr0 = COPY [[AND]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s16) = G_CTTZ %0 %1:_(s16) = G_CTTZ %0
@ -116,15 +108,12 @@ body: |
; CHECK-LABEL: name: cttz_v2s32_v2s32 ; CHECK-LABEL: name: cttz_v2s32_v2s32
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s32)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] ; CHECK: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]]
; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTTZ_ZERO_UNDEF1]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_CTTZ %0 %1:_(<2 x s32>) = G_CTTZ %0
@ -140,15 +129,12 @@ body: |
; CHECK-LABEL: name: cttz_v2s32_v2s64 ; CHECK-LABEL: name: cttz_v2s32_v2s64
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s64) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV]](s64)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s64), [[C]] ; CHECK: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[C]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s64)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] ; CHECK: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBL_B32_1]], [[C]]
; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s64) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s64), [[C]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTTZ_ZERO_UNDEF1]]
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s32>) = G_CTTZ %0 %1:_(<2 x s32>) = G_CTTZ %0

View File

@ -77,16 +77,15 @@ body: |
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C1]] ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SELECT]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]] ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6: $vgpr0 = COPY [[INT]](s32) ; GFX6: $vgpr0 = COPY [[INT]](s32)
; GFX8-LABEL: name: test_uitofp_s64_to_s32 ; GFX8-LABEL: name: test_uitofp_s64_to_s32
@ -95,16 +94,15 @@ body: |
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C1]] ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SELECT]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]] ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8: $vgpr0 = COPY [[INT]](s32) ; GFX8: $vgpr0 = COPY [[INT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
@ -397,16 +395,15 @@ body: |
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C2]] ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[SELECT]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]] ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[C2]] ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SELECT]] ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6: $vgpr0 = COPY [[INT]](s32) ; GFX6: $vgpr0 = COPY [[INT]](s32)
; GFX8-LABEL: name: test_uitofp_s33_to_s32 ; GFX8-LABEL: name: test_uitofp_s33_to_s32
@ -418,16 +415,15 @@ body: |
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C2]] ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[UMIN]](s32)
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[SELECT]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]] ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C2]]
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[C2]] ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C2]]
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SELECT]] ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8: $vgpr0 = COPY [[INT]](s32) ; GFX8: $vgpr0 = COPY [[INT]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
@ -448,16 +444,15 @@ body: |
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C1]] ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SELECT]](s32)
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]] ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@ -468,16 +463,15 @@ body: |
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV1]](s32)
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C1]] ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32)
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[SELECT]](s32)
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]] ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[C1]]
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT1]] ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[SELECT]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@ -501,29 +495,27 @@ body: |
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV3]](s32) ; GFX6: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SELECT]](s32)
; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]] ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT1]] ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX6: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV7]](s32) ; GFX6: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[C1]] ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[CTLZ_ZERO_UNDEF1]] ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SELECT2]](s32)
; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]] ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[C1]] ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT3]] ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT2]] ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32) ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
@ -540,29 +532,27 @@ body: |
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV3]](s32) ; GFX8: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV3]](s32)
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[C1]] ; GFX8: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C]]
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[CTLZ_ZERO_UNDEF]] ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32)
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SELECT]](s32)
; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64) ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL]](s64)
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]] ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[C1]]
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]] ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C1]]
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT1]] ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV5]], [[SELECT]]
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[OR]](s32)
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT]] ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN]]
; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32) ; GFX8: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s32), [[SUB]](s32)
; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32) ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
; GFX8: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV7]](s32) ; GFX8: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[UV7]](s32)
; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[C1]] ; GFX8: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C]]
; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[CTLZ_ZERO_UNDEF1]] ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN1]](s32)
; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SELECT2]](s32)
; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64) ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHL1]](s64)
; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]] ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[C1]]
; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[C1]] ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[C1]]
; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT3]] ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV9]], [[SELECT1]]
; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32) ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[OR1]](s32)
; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT2]] ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UMIN1]]
; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32) ; GFX8: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP1]](s32), [[SUB1]](s32)
; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32) ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)

View File

@ -12,8 +12,7 @@ body: |
; CHECK-LABEL: name: ffbh_u32_s ; CHECK-LABEL: name: ffbh_u32_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:sgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY]](s32)
; CHECK: [[AMDGPU_FFBH_U32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBH_U32 [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = G_AMDGPU_FFBH_U32 %0 %1:_(s32) = G_AMDGPU_FFBH_U32 %0
... ...

View File

@ -12,8 +12,7 @@ body: |
; CHECK-LABEL: name: ffbl_b32_s ; CHECK-LABEL: name: ffbl_b32_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:sgpr(s32) = G_AMDGPU_FFBL_B32 [[COPY]](s32)
; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0 %0:_(s32) = COPY $sgpr0
%1:_(s32) = G_AMDGPU_FFBL_B32 %0 %1:_(s32) = G_AMDGPU_FFBL_B32 %0
... ...

View File

@ -60,11 +60,11 @@ body: |
; CHECK-LABEL: name: cttz_zero_undef_s64_v ; CHECK-LABEL: name: cttz_zero_undef_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[AMDGPU_FFBL_B32_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[UV]](s32) ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32)
; CHECK: [[AMDGPU_FFBL_B32_1:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FFBL_B32 [[UV1]](s32) ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32 ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[AMDGPU_FFBL_B32_1]], [[C]] ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTTZ_ZERO_UNDEF1]], [[C]]
; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[AMDGPU_FFBL_B32_]], [[ADD]] ; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[CTTZ_ZERO_UNDEF]], [[ADD]]
; CHECK: S_ENDPGM 0, implicit [[UMIN]](s32) ; CHECK: S_ENDPGM 0, implicit [[UMIN]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1 %0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CTTZ_ZERO_UNDEF %0 %1:_(s32) = G_CTTZ_ZERO_UNDEF %0

View File

@ -79,8 +79,7 @@ define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_flbit_i32_b32 s0, s4 ; GFX10-GISEL-NEXT: s_flbit_i32_b32 s0, s4
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 32
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 32, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
@ -166,13 +165,12 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -269,16 +267,14 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v3, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 32, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -393,22 +389,18 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v4, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v5, v1 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v6, v2 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v7, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v3, 32, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 32, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -509,10 +501,8 @@ define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 32, vcc_lo
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1 ; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
@ -595,8 +585,7 @@ define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_flbit_i32_b64 s0, s[2:3] ; GFX10-GISEL-NEXT: s_flbit_i32_b64 s0, s[2:3]
; GFX10-GISEL-NEXT: s_cmp_eq_u64 s[2:3], 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 64
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 64, s0
; GFX10-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000 ; GFX10-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
@ -677,8 +666,7 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_flbit_i32_b64 s0, s[2:3] ; GFX10-GISEL-NEXT: s_flbit_i32_b64 s0, s[2:3]
; GFX10-GISEL-NEXT: s_cmp_eq_u64 s[2:3], 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 64
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 64, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
@ -783,13 +771,12 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v3, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v4, v1 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1] ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v0, v0, 32 clamp
; GFX10-GISEL-NEXT: v_min_u32_e32 v0, v1, v0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 32, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 64, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v3, v4, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 64, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -897,12 +884,11 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v3, v1 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v4, v2 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
; GFX10-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[1:2] ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 32, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, v2, v1
; GFX10-GISEL-NEXT: v_min_u32_e32 v3, v4, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 64, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 64, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -996,9 +982,9 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1091,9 +1077,8 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@ -1196,13 +1181,12 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@ -1304,13 +1288,12 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@ -1413,10 +1396,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v0, 24, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1521,10 +1504,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 16, v1 ; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v2, 16, v2
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, s2, v1 ; GFX10-GISEL-NEXT: v_and_b32_e32 v2, s2, v2
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, s2, vcc_lo
; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%val = load i16, i16 addrspace(1)* %valptr %val = load i16, i16 addrspace(1)* %valptr
@ -1628,10 +1611,10 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 25, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v0, 25, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0x7f, vcc_lo
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm

View File

@ -79,8 +79,7 @@ define amdgpu_kernel void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_ff1_i32_b32 s0, s4 ; GFX10-GISEL-NEXT: s_ff1_i32_b32 s0, s4
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 32
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 32, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
@ -166,13 +165,12 @@ define amdgpu_kernel void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -269,16 +267,14 @@ define amdgpu_kernel void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v3, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 32, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -393,22 +389,18 @@ define amdgpu_kernel void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v4, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v5, v1 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v6, v2 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v3, v3
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v7, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v3, 32, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 32, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -587,8 +579,7 @@ define amdgpu_kernel void @s_cttz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_ff1_i32_b64 s0, s[2:3] ; GFX10-GISEL-NEXT: s_ff1_i32_b64 s0, s[2:3]
; GFX10-GISEL-NEXT: s_cmp_eq_u64 s[2:3], 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 64
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 64, s0
; GFX10-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000 ; GFX10-GISEL-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x200000
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
@ -669,8 +660,7 @@ define amdgpu_kernel void @s_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_ff1_i32_b64 s0, s[2:3] ; GFX10-GISEL-NEXT: s_ff1_i32_b64 s0, s[2:3]
; GFX10-GISEL-NEXT: s_cmp_eq_u64 s[2:3], 0 ; GFX10-GISEL-NEXT: s_min_u32 s0, s0, 64
; GFX10-GISEL-NEXT: s_cselect_b32 s0, 64, s0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
@ -775,13 +765,12 @@ define amdgpu_kernel void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v3, v1 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v4, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[0:1] ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v1, v1, 32 clamp
; GFX10-GISEL-NEXT: v_min_u32_e32 v0, v0, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 32, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 64, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v3, v4, v3
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 64, vcc_lo
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -889,12 +878,11 @@ define amdgpu_kernel void @v_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3] ; GFX10-GISEL-NEXT: global_load_dwordx2 v[1:2], v1, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v3, v2 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v4, v1 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[1:2] ; GFX10-GISEL-NEXT: v_add_nc_u32_e64 v2, v2, 32 clamp
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 32, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, v1, v2
; GFX10-GISEL-NEXT: v_min_u32_e32 v3, v4, v3 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 64, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 64, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -988,9 +976,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm ; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() %tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1083,9 +1071,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@ -1188,13 +1175,12 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
@ -1296,13 +1282,12 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c ; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 32, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 ; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0
; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]

View File

@ -1198,9 +1198,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3
; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1
; GFX9-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 32, vcc ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
; GFX9-GISEL-NEXT: s_endpgm ; GFX9-GISEL-NEXT: s_endpgm
%val = load i32, i32 addrspace(1)* %arrayidx, align 1 %val = load i32, i32 addrspace(1)* %arrayidx, align 1
@ -1324,8 +1324,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3
; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_min_u32_e32 v2, 32, v2
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 32, vcc
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
@ -1458,9 +1457,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v2, v3
; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 32, vcc
; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 32, v1 ; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 32, v1
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[2:3]