forked from OSchip/llvm-project
AMDGPU/GlobalISel: Enable CSE in pre-legalizer combiner
This commit is contained in:
parent
64c2641c89
commit
6314a72730
|
@ -249,6 +249,9 @@ void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
}
|
||||
|
||||
AU.addRequired<GISelCSEAnalysisWrapperPass>();
|
||||
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
|
@ -270,8 +273,13 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
|
|||
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
|
||||
AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
|
||||
F.hasMinSize(), KB, MDT);
|
||||
// Enable CSE.
|
||||
GISelCSEAnalysisWrapper &Wrapper =
|
||||
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
|
||||
auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
|
||||
|
||||
Combiner C(PCInfo, TPC);
|
||||
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
|
||||
return C.combineMachineInstrs(MF, CSEInfo);
|
||||
}
|
||||
|
||||
char AMDGPUPreLegalizerCombiner::ID = 0;
|
||||
|
|
|
@ -29,10 +29,8 @@ body: |
|
|||
|
||||
; GCN-LABEL: name: urem_s32_var_const1
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: %const:_(s32) = G_CONSTANT i32 1
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]]
|
||||
; GCN: $vgpr0 = COPY [[ADD]](s32)
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: $vgpr0 = COPY [[C]](s32)
|
||||
%var:_(s32) = COPY $vgpr0
|
||||
%const:_(s32) = G_CONSTANT i32 1
|
||||
%rem:_(s32) = G_UREM %var, %const
|
||||
|
@ -49,10 +47,8 @@ body: |
|
|||
; GCN-LABEL: name: urem_s32_var_const2
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: %var:_(s32) = COPY $vgpr0
|
||||
; GCN: %const:_(s32) = G_CONSTANT i32 2
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]]
|
||||
; GCN: %rem:_(s32) = G_AND %var, [[ADD]]
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; GCN: %rem:_(s32) = G_AND %var, [[C]]
|
||||
; GCN: $vgpr0 = COPY %rem(s32)
|
||||
%var:_(s32) = COPY $vgpr0
|
||||
%const:_(s32) = G_CONSTANT i32 2
|
||||
|
|
|
@ -207,21 +207,28 @@ define i32 @v_urem_i32_pow2k_denom(i32 %num) {
|
|||
; CHECK-LABEL: v_urem_i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_add_i32 s4, 0x1000, -1
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem i32 %num, 4096
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
|
||||
; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_add_i32 s4, 0x1000, -1
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, s4, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_add_i32 s4, 0x1000, -1
|
||||
; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_urem_v2i32_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: s_movk_i32 s4, 0xfff
|
||||
; CGP-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; CGP-NEXT: v_and_b32_e32 v1, s4, v1
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem <2 x i32> %num, <i32 4096, i32 4096>
|
||||
ret <2 x i32> %result
|
||||
}
|
||||
|
|
|
@ -949,38 +949,43 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
|
|||
; CHECK-LABEL: v_urem_i64_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_add_u32 s4, 0x1000, -1
|
||||
; CHECK-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; CHECK-NEXT: s_and_b32 s5, s5, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; CHECK-NEXT: s_addc_u32 s5, 0, -1
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, s5, v1
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem i64 %num, 4096
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
|
||||
; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_movk_i32 s4, 0x1000
|
||||
; CHECK-NEXT: s_add_u32 s5, s4, -1
|
||||
; CHECK-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; CHECK-NEXT: s_and_b32 s6, s6, 1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; CHECK-NEXT: s_addc_u32 s6, 0, -1
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, -1
|
||||
; CHECK-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; CHECK-NEXT: v_and_b32_e32 v0, s5, v0
|
||||
; CHECK-NEXT: s_and_b32 s5, s7, 1
|
||||
; CHECK-NEXT: v_and_b32_e32 v1, s6, v1
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; CHECK-NEXT: s_addc_u32 s5, 0, -1
|
||||
; CHECK-NEXT: v_and_b32_e32 v2, s4, v2
|
||||
; CHECK-NEXT: v_and_b32_e32 v3, s5, v3
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GISEL-NEXT: s_add_u32 s5, s4, -1
|
||||
; GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GISEL-NEXT: s_and_b32 s6, s6, 1
|
||||
; GISEL-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GISEL-NEXT: s_addc_u32 s6, 0, -1
|
||||
; GISEL-NEXT: s_add_u32 s4, s4, -1
|
||||
; GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; GISEL-NEXT: v_and_b32_e32 v0, s5, v0
|
||||
; GISEL-NEXT: s_and_b32 s5, s7, 1
|
||||
; GISEL-NEXT: v_and_b32_e32 v1, s6, v1
|
||||
; GISEL-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; GISEL-NEXT: s_addc_u32 s5, 0, -1
|
||||
; GISEL-NEXT: v_and_b32_e32 v2, s4, v2
|
||||
; GISEL-NEXT: v_and_b32_e32 v3, s5, v3
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_urem_v2i64_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: s_movk_i32 s4, 0xfff
|
||||
; CGP-NEXT: v_and_b32_e32 v0, s4, v0
|
||||
; CGP-NEXT: v_and_b32_e32 v2, s4, v2
|
||||
; CGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = urem <2 x i64> %num, <i64 4096, i64 4096>
|
||||
ret <2 x i64> %result
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue