forked from OSchip/llvm-project
AMDGPU: Handle partial shift reduction for variable shifts
If the variable shift amount has known bits, we can still reduce the shift. llvm-svn: 331917
This commit is contained in:
parent
b143d9a5ea
commit
74fd7600d2
|
@ -3154,22 +3154,29 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
|
||||||
(Src.getOpcode() == ISD::SRL ||
|
(Src.getOpcode() == ISD::SRL ||
|
||||||
Src.getOpcode() == ISD::SRA ||
|
Src.getOpcode() == ISD::SRA ||
|
||||||
Src.getOpcode() == ISD::SHL)) {
|
Src.getOpcode() == ISD::SHL)) {
|
||||||
if (auto ShiftAmount = isConstOrConstSplat(Src.getOperand(1))) {
|
SDValue Amt = Src.getOperand(1);
|
||||||
if (ShiftAmount->getZExtValue() <= VT.getScalarSizeInBits()) {
|
KnownBits Known;
|
||||||
EVT MidVT = VT.isVector() ?
|
DAG.computeKnownBits(Amt, Known);
|
||||||
EVT::getVectorVT(*DAG.getContext(), MVT::i32,
|
unsigned Size = VT.getScalarSizeInBits();
|
||||||
VT.getVectorNumElements()) : MVT::i32;
|
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
|
||||||
|
(Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
|
||||||
|
EVT MidVT = VT.isVector() ?
|
||||||
|
EVT::getVectorVT(*DAG.getContext(), MVT::i32,
|
||||||
|
VT.getVectorNumElements()) : MVT::i32;
|
||||||
|
|
||||||
EVT ShiftTy = getShiftAmountTy(MidVT, DAG.getDataLayout());
|
EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
|
||||||
SDValue NewShiftAmt = DAG.getConstant(ShiftAmount->getZExtValue(),
|
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
|
||||||
SL, ShiftTy);
|
Src.getOperand(0));
|
||||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
|
DCI.AddToWorklist(Trunc.getNode());
|
||||||
Src.getOperand(0));
|
|
||||||
DCI.AddToWorklist(Trunc.getNode());
|
if (Amt.getValueType() != NewShiftVT) {
|
||||||
SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
|
Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
|
||||||
Trunc, NewShiftAmt);
|
DCI.AddToWorklist(Amt.getNode());
|
||||||
return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
|
||||||
|
Trunc, Amt);
|
||||||
|
return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,3 +100,39 @@ define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) {
|
||||||
store i16 %add, i16 addrspace(1)* undef
|
store i16 %add, i16 addrspace(1)* undef
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}trunc_srl_i64_var_mask15_to_i16:
|
||||||
|
; GCN: s_waitcnt
|
||||||
|
; GCN-NEXT: v_and_b32_e32 v1, 15, v2
|
||||||
|
; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0
|
||||||
|
; GCN-NEXT: s_setpc_b64
|
||||||
|
define i16 @trunc_srl_i64_var_mask15_to_i16(i64 %x, i64 %amt) {
|
||||||
|
%amt.masked = and i64 %amt, 15
|
||||||
|
%shift = lshr i64 %x, %amt.masked
|
||||||
|
%trunc = trunc i64 %shift to i16
|
||||||
|
ret i16 %trunc
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}trunc_srl_i64_var_mask16_to_i16:
|
||||||
|
; GCN: s_waitcnt
|
||||||
|
; GCN-NEXT: v_and_b32_e32 v2, 16, v2
|
||||||
|
; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
|
||||||
|
; GCN-NEXT: s_setpc_b64
|
||||||
|
define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) {
|
||||||
|
%amt.masked = and i64 %amt, 16
|
||||||
|
%shift = lshr i64 %x, %amt.masked
|
||||||
|
%trunc = trunc i64 %shift to i16
|
||||||
|
ret i16 %trunc
|
||||||
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}trunc_srl_i64_var_mask31_to_i16:
|
||||||
|
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
; GCN-NEXT: v_and_b32_e32 v2, 31, v2
|
||||||
|
; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
|
||||||
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) {
|
||||||
|
%amt.masked = and i64 %amt, 31
|
||||||
|
%shift = lshr i64 %x, %amt.masked
|
||||||
|
%trunc = trunc i64 %shift to i16
|
||||||
|
ret i16 %trunc
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue