forked from OSchip/llvm-project
AMDGPU: Look through casted selects to constant fold bin ops
The promotion of the uniform select to i32 interfered with this fold.
This commit is contained in:
parent
bcd91778fe
commit
2fe500ab5b
|
@ -530,14 +530,32 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Find a select instruction, which may have been casted. This is mostly to deal
|
||||
// with cases where i16 selects weer promoted here to i32.
|
||||
static SelectInst *findSelectThroughCast(Value *V, CastInst *&Cast) {
|
||||
Cast = nullptr;
|
||||
if (SelectInst *Sel = dyn_cast<SelectInst>(V))
|
||||
return Sel;
|
||||
|
||||
if ((Cast = dyn_cast<CastInst>(V))) {
|
||||
if (SelectInst *Sel = dyn_cast<SelectInst>(Cast->getOperand(0)))
|
||||
return Sel;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
|
||||
// Don't do this unless the old select is going away. We want to eliminate the
|
||||
// binary operator, not replace a binop with a select.
|
||||
int SelOpNo = 0;
|
||||
SelectInst *Sel = dyn_cast<SelectInst>(BO.getOperand(0));
|
||||
|
||||
CastInst *CastOp;
|
||||
|
||||
SelectInst *Sel = findSelectThroughCast(BO.getOperand(0), CastOp);
|
||||
if (!Sel || !Sel->hasOneUse()) {
|
||||
SelOpNo = 1;
|
||||
Sel = dyn_cast<SelectInst>(BO.getOperand(1));
|
||||
Sel = findSelectThroughCast(BO.getOperand(1), CastOp);
|
||||
}
|
||||
|
||||
if (!Sel || !Sel->hasOneUse())
|
||||
|
@ -549,6 +567,11 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
|
|||
if (!CBO || !CT || !CF)
|
||||
return false;
|
||||
|
||||
if (CastOp) {
|
||||
CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
|
||||
CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
|
||||
}
|
||||
|
||||
// TODO: Handle special 0/-1 cases DAG combine does, although we only really
|
||||
// need to handle divisions here.
|
||||
Constant *FoldedT = SelOpNo ?
|
||||
|
@ -573,6 +596,8 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
|
|||
NewSelect->takeName(&BO);
|
||||
BO.replaceAllUsesWith(NewSelect);
|
||||
BO.eraseFromParent();
|
||||
if (CastOp)
|
||||
CastOp->eraseFromParent();
|
||||
Sel->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -410,13 +410,18 @@ define i32 @select_mul_rhs_const_i32(i1 %cond) {
|
|||
; IR-LABEL: @select_mul_rhs_const_i32(
|
||||
; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 5000, i32 8000
|
||||
; IR-NEXT: ret i32 [[OP]]
|
||||
;
|
||||
%select = select i1 %cond, i32 5, i32 8
|
||||
%op = mul i32 %select, 1000
|
||||
ret i32 %op
|
||||
}
|
||||
|
||||
; FIXME: Truncate from promoted select blocks this.
|
||||
define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
|
||||
; IR-LABEL: @select_add_lhs_const_i16(
|
||||
; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i16 128, i16 131
|
||||
; IR-NEXT: store i16 [[OP]], i16 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
|
||||
; GCN-LABEL: select_add_lhs_const_i16:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_load_dword s0, s[4:5], 0x0
|
||||
|
@ -428,16 +433,62 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
|
|||
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
||||
; GCN-NEXT: flat_store_short v[0:1], v0
|
||||
; GCN-NEXT: s_endpgm
|
||||
; IR-LABEL: @select_add_lhs_const_i16(
|
||||
; IR-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], i32 5, i32 8
|
||||
; IR-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
||||
; IR-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
|
||||
; IR-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 123
|
||||
; IR-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
||||
; IR-NEXT: store i16 [[TMP5]], i16 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
%select = select i1 %cond, i16 5, i16 8
|
||||
%op = add i16 %select, 123
|
||||
store i16 %op, i16 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define i16 @select_add_trunc_select(i1 %cond) {
|
||||
; GCN-LABEL: select_add_trunc_select:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v0, 50, 47, vcc
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
; IR-LABEL: @select_add_trunc_select(
|
||||
; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i16 47, i16 50
|
||||
; IR-NEXT: ret i16 [[OP]]
|
||||
;
|
||||
%select = select i1 %cond, i32 5, i32 8
|
||||
%trunc = trunc i32 %select to i16
|
||||
%op = add i16 %trunc, 42
|
||||
ret i16 %op
|
||||
}
|
||||
|
||||
define i32 @select_add_sext_select(i1 %cond) {
|
||||
; IR-LABEL: @select_add_sext_select(
|
||||
; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 29, i32 50
|
||||
; IR-NEXT: ret i32 [[OP]]
|
||||
; GCN-LABEL: select_add_sext_select:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v0, 50, 29, vcc
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%select = select i1 %cond, i16 -13, i16 8
|
||||
%trunc = sext i16 %select to i32
|
||||
%op = add i32 %trunc, 42
|
||||
ret i32 %op
|
||||
}
|
||||
|
||||
define i32 @select_add_zext_select(i1 %cond) {
|
||||
; IR-LABEL: @select_add_zext_select(
|
||||
; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], i32 47, i32 50
|
||||
; IR-NEXT: ret i32 [[OP]]
|
||||
|
||||
; GCN-LABEL: select_add_zext_select:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
|
||||
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; GCN-NEXT: v_cndmask_b32_e64 v0, 50, 47, vcc
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%select = select i1 %cond, i16 5, i16 8
|
||||
%trunc = zext i16 %select to i32
|
||||
%op = add i32 %trunc, 42
|
||||
ret i32 %op
|
||||
}
|
||||
|
|
|
@ -112,9 +112,7 @@ define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspac
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
|
||||
; TODO: shrink i16 constant. This is correct but suboptimal.
|
||||
; GCN: v_mov_b32_e32 [[T:v[0-9]+]], 0xffff0009
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[T]],
|
||||
; GCN: v_cndmask_b32_e64 v2, 2, 9,
|
||||
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) {
|
||||
%sel = select i1 %cond, i16 -4, i16 3
|
||||
%bo = sub i16 5, %sel
|
||||
|
|
Loading…
Reference in New Issue