diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 32dc2a7afce5..8d0ab496938e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3154,22 +3154,29 @@ SDValue AMDGPUTargetLowering::performTruncateCombine( (Src.getOpcode() == ISD::SRL || Src.getOpcode() == ISD::SRA || Src.getOpcode() == ISD::SHL)) { - if (auto ShiftAmount = isConstOrConstSplat(Src.getOperand(1))) { - if (ShiftAmount->getZExtValue() <= VT.getScalarSizeInBits()) { - EVT MidVT = VT.isVector() ? - EVT::getVectorVT(*DAG.getContext(), MVT::i32, - VT.getVectorNumElements()) : MVT::i32; + SDValue Amt = Src.getOperand(1); + KnownBits Known; + DAG.computeKnownBits(Amt, Known); + unsigned Size = VT.getScalarSizeInBits(); + if ((Known.isConstant() && Known.getConstant().ule(Size)) || + (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) { + EVT MidVT = VT.isVector() ? + EVT::getVectorVT(*DAG.getContext(), MVT::i32, + VT.getVectorNumElements()) : MVT::i32; - EVT ShiftTy = getShiftAmountTy(MidVT, DAG.getDataLayout()); - SDValue NewShiftAmt = DAG.getConstant(ShiftAmount->getZExtValue(), - SL, ShiftTy); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, - Src.getOperand(0)); - DCI.AddToWorklist(Trunc.getNode()); - SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, - Trunc, NewShiftAmt); - return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); + EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, + Src.getOperand(0)); + DCI.AddToWorklist(Trunc.getNode()); + + if (Amt.getValueType() != NewShiftVT) { + Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT); + DCI.AddToWorklist(Amt.getNode()); } + + SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, + Trunc, Amt); + return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); } } } diff --git a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll index 65307ca6fa94..8bbac5832add 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll @@ -100,3 +100,39 @@ define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) { store i16 %add, i16 addrspace(1)* undef ret void } + +; GCN-LABEL: {{^}}trunc_srl_i64_var_mask15_to_i16: +; GCN: s_waitcnt +; GCN-NEXT: v_and_b32_e32 v1, 15, v2 +; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GCN-NEXT: s_setpc_b64 +define i16 @trunc_srl_i64_var_mask15_to_i16(i64 %x, i64 %amt) { + %amt.masked = and i64 %amt, 15 + %shift = lshr i64 %x, %amt.masked + %trunc = trunc i64 %shift to i16 + ret i16 %trunc +} + +; GCN-LABEL: {{^}}trunc_srl_i64_var_mask16_to_i16: +; GCN: s_waitcnt +; GCN-NEXT: v_and_b32_e32 v2, 16, v2 +; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] +; GCN-NEXT: s_setpc_b64 +define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) { + %amt.masked = and i64 %amt, 16 + %shift = lshr i64 %x, %amt.masked + %trunc = trunc i64 %shift to i16 + ret i16 %trunc +} + +; GCN-LABEL: {{^}}trunc_srl_i64_var_mask31_to_i16: +; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v2, 31, v2 +; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] +; GCN-NEXT: s_setpc_b64 s[30:31] +define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) { + %amt.masked = and i64 %amt, 31 + %shift = lshr i64 %x, %amt.masked + %trunc = trunc i64 %shift to i16 + ret i16 %trunc +}