From bcf5184a68d1d851895692bae6eed16a74b519db Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 25 Jul 2020 17:22:22 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Make sure <2 x s1> phis are scalarized --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 91 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f72db8a61aab..c21414d59ba0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -415,11 +415,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor(AllS64Vectors) .legalFor(AddrSpaces64) .legalFor(AddrSpaces32) + .legalIf(isPointer(0)) .clampScalar(0, S32, S256) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, S32, 16) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .legalIf(isPointer(0)); + .scalarize(0); if (ST.hasVOP3PInsts()) { assert(ST.hasIntClamp() && "all targets with VOP3P should support clamp"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 81408b79b11f..9a91d908bb7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1547,3 +1547,94 @@ body: | S_SETPC_B64 undef $sgpr30_sgpr31 ... +--- +name: test_phi_v2s1 +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: test_phi_v2s1 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C4]] + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C4]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C4]] + ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] + ; CHECK: G_BR %bb.2 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 + ; CHECK: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C5]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr1 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(<2 x s1>) = G_ICMP intpred(eq), %0, %1 + %6:_(s1) = G_ICMP intpred(eq), %3, %4 + G_BRCOND %6, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %7:_(<2 x s1>) = G_ICMP intpred(ne), %0, %2 + G_BR %bb.2 + + bb.2: + %8:_(<2 x s1>) = G_PHI %5, %bb.0, %7, %bb.1 + %9:_(<2 x s32>) = G_ZEXT %8 + $vgpr0_vgpr1 = COPY %9 + S_SETPC_B64 undef $sgpr30_sgpr31 +...