diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 0dd6c09a958c..6ba751fcdf9b 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -179,9 +179,14 @@ static unsigned getSaveExecOp(unsigned Opc) { // register allocation, so turn them back into normal instructions. static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) { switch (MI.getOpcode()) { - case AMDGPU::S_MOV_B64_term: case AMDGPU::S_MOV_B32_term: { - MI.setDesc(TII.get(AMDGPU::COPY)); + bool RegSrc = MI.getOperand(1).isReg(); + MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + return true; + } + case AMDGPU::S_MOV_B64_term: { + bool RegSrc = MI.getOperand(1).isReg(); + MI.setDesc(TII.get(RegSrc ? AMDGPU::COPY : AMDGPU::S_MOV_B64)); return true; } case AMDGPU::S_XOR_B64_term: { diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir new file mode 100644 index 000000000000..0c45c7df30bc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: lower_term_opcodes +tracksRegLiveness: false +body: | + ; CHECK-LABEL: name: lower_term_opcodes + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: $sgpr0 = COPY $sgpr1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: $sgpr0 = S_MOV_B32 0 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: $sgpr0 = S_MOV_B32 &SYMBOL + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $sgpr0_sgpr1 = COPY $sgpr2_sgpr3 + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: $sgpr0_sgpr1 = S_MOV_B64 0 + ; CHECK: bb.5: + ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: $sgpr0_sgpr1 = S_MOV_B64 &SYMBOL + ; CHECK: bb.6: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: $sgpr0 = S_XOR_B32 $sgpr1, $sgpr2, implicit-def $scc + ; CHECK: bb.7: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc + ; CHECK: bb.8: + ; CHECK: successors: %bb.9(0x80000000) + ; CHECK: $sgpr0 = S_OR_B32 $sgpr1, $sgpr2, implicit-def $scc + ; CHECK: bb.9: + ; CHECK: successors: %bb.10(0x80000000) + ; CHECK: $sgpr0_sgpr1 = S_OR_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc + ; CHECK: bb.10: + ; CHECK: successors: %bb.11(0x80000000) + ; CHECK: $sgpr0 = S_ANDN2_B32 $sgpr1, $sgpr2, implicit-def $scc + ; CHECK: bb.11: + ; CHECK: $sgpr0_sgpr1 = S_ANDN2_B64 $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc + bb.0: + $sgpr0 = S_MOV_B32_term $sgpr1 + + bb.1: + $sgpr0 = S_MOV_B32_term 0 + + bb.3: + $sgpr0 = S_MOV_B32_term &SYMBOL + + bb.4: + $sgpr0_sgpr1 = S_MOV_B64_term $sgpr2_sgpr3 + + bb.5: + $sgpr0_sgpr1 = S_MOV_B64_term 0 + + bb.6: + $sgpr0_sgpr1 = S_MOV_B64_term &SYMBOL + + bb.7: + $sgpr0 = S_XOR_B32_term $sgpr1, $sgpr2, implicit-def $scc + + bb.8: + $sgpr0_sgpr1 = S_XOR_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc + + bb.9: + $sgpr0 = S_OR_B32_term $sgpr1, $sgpr2, implicit-def $scc + + bb.10: + $sgpr0_sgpr1 = S_OR_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc + + bb.11: + $sgpr0 = S_ANDN2_B32_term $sgpr1, $sgpr2, implicit-def $scc + + bb.12: + $sgpr0_sgpr1 = S_ANDN2_B64_term $sgpr2_sgpr3, $sgpr2_sgpr3, implicit-def $scc +...