[NVPTX] Copy machine operand flags in TII::insertBranch

Before this patch, flags such as undef were dropped by TII::insertBranch
(used by BranchFolding pass), resulting in the following error from
machine verifier:

    *** Bad machine code: Reading virtual register without a def ***
    - function:    hoge
    - basic block: %bb.0 bb (0x562e9c240e68)
    - instruction: CBranch %2:int1regs, %bb.3
    - operand 0:   %2:int1regs

Differential Revision: https://reviews.llvm.org/D113001
This commit is contained in:
Andrew Savonichev 2021-11-03 12:38:06 +03:00
parent 803d4f8a35
commit 30a3a17df8
3 changed files with 89 additions and 4 deletions

View File

@ -195,13 +195,12 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
.addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | FileCheck %s
; Disable CGP which also folds branches, so that only BranchFolding is under
; the spotlight.

View File

@ -0,0 +1,86 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - %s -march=nvptx64 -mcpu=sm_35 -run-pass=branch-folder | FileCheck %s
--- |
; ModuleID = '/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll'
source_filename = "/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define ptx_kernel void @hoge() {
bb:
br i1 undef, label %bb1.preheader, label %bb4.preheader
bb1.preheader: ; preds = %bb
br label %bb1
bb1: ; preds = %bb1.preheader, %bb1
%lsr.iv = phi i64 [ undef, %bb1.preheader ], [ %lsr.iv.next, %bb1 ]
%lsr.iv.next = add i64 %lsr.iv, 1
%tmp3 = icmp sle i64 %lsr.iv.next, 0
br i1 %tmp3, label %bb1, label %bb4.preheader
bb4.preheader: ; preds = %bb1, %bb
br label %bb4
bb4: ; preds = %bb4.preheader, %bb4
br label %bb4
}
...
---
name: hoge
alignment: 1
tracksRegLiveness: true
registers:
- { id: 0, class: int64regs }
- { id: 1, class: int64regs }
- { id: 2, class: int1regs }
- { id: 3, class: int64regs }
- { id: 4, class: int1regs }
- { id: 5, class: int64regs }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: hoge
; CHECK: bb.0.bb:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: CBranch undef %2:int1regs, %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb1.preheader:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:int64regs = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:int64regs = ADDi64ri [[ADDi64ri]], 1
; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:int1regs = SETP_s64ri [[ADDi64ri]], 1, 2
; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb4:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: GOTO %bb.3
bb.0.bb:
successors: %bb.1, %bb.3
CBranch undef %2:int1regs, %bb.3
bb.1.bb1.preheader:
%5:int64regs = IMPLICIT_DEF
bb.2.bb1:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
%5:int64regs = ADDi64ri %5, 1
%4:int1regs = SETP_s64ri %5, 1, 2
CBranch %4, %bb.2
bb.3.bb4:
GOTO %bb.3
...