[AMDGPU] Remove IR SpeculativeExecution pass from codegen pipeline

This pass seems to have very little effect because all it does is hoist some instructions, but it is followed later in the codegen pipeline by the IR CodeSinking pass which does the opposite. Differential Revision: https://reviews.llvm.org/D130258
2022-07-21 13:14:06 +01:00 · 2022-07-21 13:14:06 +01:00 · e301e071ba
parent 71d1bd1457
commit e301e071ba
5 changed files with 17 additions and 15 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@ -985,7 +985,6 @@ void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
  addPass(createLICMPass());
  addPass(createSeparateConstOffsetFromGEPPass());
-  addPass(createSpeculativeExecutionPass());
  // ReassociateGEPs exposes more opportunities for SLSR. See
  // the example in reassociate-geps-and-slsr.ll.
  addPass(createStraightLineStrengthReducePass());
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@ -30,7 +30,6 @@ define amdgpu_ps void @main(i32 %0, float %1) {
 ; ISA-NEXT:  .LBB0_1: ; %Flow1
 ; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; ISA-NEXT:    s_or_b64 exec, exec, s[6:7]
-; ISA-NEXT:    s_add_i32 s8, s8, 1
 ; ISA-NEXT:    s_mov_b64 s[6:7], 0
 ; ISA-NEXT:  .LBB0_2: ; %Flow
 ; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
@ -54,6 +53,7 @@ define amdgpu_ps void @main(i32 %0, float %1) {
 ; ISA-NEXT:    s_cbranch_execz .LBB0_1
 ; ISA-NEXT:  ; %bb.5: ; %endif2
 ; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; ISA-NEXT:    s_add_i32 s8, s8, 1
 ; ISA-NEXT:    s_xor_b64 s[4:5], exec, -1
 ; ISA-NEXT:    s_branch .LBB0_1
 ; ISA-NEXT:  .LBB0_6: ; %Flow2
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@ -451,7 +451,6 @@
 ; GCN-O1-OPTS-NEXT:      Loop Pass Manager
 ; GCN-O1-OPTS-NEXT:        Loop Invariant Code Motion
 ; GCN-O1-OPTS-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
-; GCN-O1-OPTS-NEXT:      Speculatively execute instructions
 ; GCN-O1-OPTS-NEXT:      Scalar Evolution Analysis
 ; GCN-O1-OPTS-NEXT:      Straight line strength reduction
 ; GCN-O1-OPTS-NEXT:      Early CSE
@ -741,7 +740,6 @@
 ; GCN-O2-NEXT:      Loop Pass Manager
 ; GCN-O2-NEXT:        Loop Invariant Code Motion
 ; GCN-O2-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
-; GCN-O2-NEXT:      Speculatively execute instructions
 ; GCN-O2-NEXT:      Scalar Evolution Analysis
 ; GCN-O2-NEXT:      Straight line strength reduction
 ; GCN-O2-NEXT:      Early CSE
@ -1034,7 +1032,6 @@
 ; GCN-O3-NEXT:      Loop Pass Manager
 ; GCN-O3-NEXT:        Loop Invariant Code Motion
 ; GCN-O3-NEXT:      Split GEPs to a variadic base and a constant offset for better CSE
-; GCN-O3-NEXT:      Speculatively execute instructions
 ; GCN-O3-NEXT:      Scalar Evolution Analysis
 ; GCN-O3-NEXT:      Straight line strength reduction
 ; GCN-O3-NEXT:      Phi Values Analysis
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@ -767,7 +767,7 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; EG:       ; %bb.0: ; %bb
 ; EG-NEXT:    ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    JUMP @5 POP:1
-; EG-NEXT:    ALU 10, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 14, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 0
 ; EG-NEXT:    POP @5 POP:1
 ; EG-NEXT:    CF_END
@ -775,8 +775,10 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; EG-NEXT:     SETNE_INT * T0.W, KC0[2].Z, 0.0,
 ; EG-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     LSHL T0.W, KC0[2].W, literal.x,
-; EG-NEXT:     LSHL * T1.W, KC0[3].Y, literal.x,
+; EG-NEXT:     MOV T0.X, KC0[3].Y,
+; EG-NEXT:     MOV * T1.X, KC0[2].W,
+; EG-NEXT:     LSHL T0.W, PS, literal.x,
+; EG-NEXT:     LSHL * T1.W, PV.X, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
 ; EG-NEXT:     ASHR T1.W, PS, literal.x,
 ; EG-NEXT:     ASHR * T0.W, PV.W, literal.x,
@ -784,14 +786,16 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; EG-NEXT:     MOV T2.W, KC0[2].Y,
 ; EG-NEXT:     MULLO_INT * T0.X, PS, PV.W,
 ; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
-; EG-NEXT:     MOV * T0.Y, PS,
+; EG-NEXT:     MOV T0.Y, PS,
+; EG-NEXT:     MOV T0.W, KC0[3].X,
+; EG-NEXT:     MOV * T0.W, KC0[3].Z,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: simplify_i24_crash:
 ; CM:       ; %bb.0: ; %bb
 ; CM-NEXT:    ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    JUMP @5 POP:1
-; CM-NEXT:    ALU 13, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 17, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    POP @5 POP:1
 ; CM-NEXT:    CF_END
@ -799,8 +803,10 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; CM-NEXT:     SETNE_INT * T0.W, KC0[2].Z, 0.0,
 ; CM-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     LSHL T0.Z, KC0[2].W, literal.x,
-; CM-NEXT:     LSHL * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT:     MOV * T0.X, KC0[3].Y,
+; CM-NEXT:     MOV * T1.X, KC0[2].W,
+; CM-NEXT:     LSHL T0.Z, PV.X, literal.x,
+; CM-NEXT:     LSHL * T0.W, T0.X, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
 ; CM-NEXT:     MOV T0.Y, KC0[2].Y,
 ; CM-NEXT:     ASHR T1.Z, PV.W, literal.x,
@ -811,7 +817,9 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T1.Z,
 ; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
 ; CM-NEXT:     LSHR T1.X, T0.Y, literal.x,
-; CM-NEXT:     MOV * T0.Y, PV.X,
+; CM-NEXT:     MOV T0.Y, PV.X,
+; CM-NEXT:     MOV T0.Z, KC0[3].X,
+; CM-NEXT:     MOV * T0.W, KC0[3].Z,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 bb:
  %cmp = icmp eq i32 %arg0, 0
--- a/llvm/test/CodeGen/AMDGPU/select-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll
@ -143,8 +143,6 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo

 ; GCN-LABEL: {{^}}regression:
 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
-; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
-; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0

 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
 entry: