From 30a3a17df842de45fe44e1608888ef7b4a4d2c35 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Wed, 3 Nov 2021 12:38:06 +0300 Subject: [PATCH] [NVPTX] Copy machine operand flags in TII::insertBranch Before this patch, flags such as undef were dropped by TII::insertBranch (used by BranchFolding pass), resulting in the following error from machine verifier: *** Bad machine code: Reading virtual register without a def *** - function: hoge - basic block: %bb.0 bb (0x562e9c240e68) - instruction: CBranch %2:int1regs, %bb.3 - operand 0: %2:int1regs Differential Revision: https://reviews.llvm.org/D113001 --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 5 +- llvm/test/CodeGen/NVPTX/branch-fold.ll | 2 +- llvm/test/CodeGen/NVPTX/branch-fold.mir | 86 ++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/branch-fold.mir diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index ec0c92ccf5c5..953d95e55f65 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -195,13 +195,12 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) - .addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.ll b/llvm/test/CodeGen/NVPTX/branch-fold.ll index 2b9cd0a35d92..336147f1f992 100644 --- a/llvm/test/CodeGen/NVPTX/branch-fold.ll +++ b/llvm/test/CodeGen/NVPTX/branch-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | FileCheck %s ; Disable CGP which also folds branches, so that only BranchFolding is under ; the spotlight. diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir new file mode 100644 index 000000000000..8bdac44c4f23 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -march=nvptx64 -mcpu=sm_35 -run-pass=branch-folder | FileCheck %s + +--- | + ; ModuleID = '/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll' + source_filename = "/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll" + target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" + target triple = "nvptx64-nvidia-cuda" + + define ptx_kernel void @hoge() { + bb: + br i1 undef, label %bb1.preheader, label %bb4.preheader + + bb1.preheader: ; preds = %bb + br label %bb1 + + bb1: ; preds = %bb1.preheader, %bb1 + %lsr.iv = phi i64 [ undef, %bb1.preheader ], [ %lsr.iv.next, %bb1 ] + %lsr.iv.next = add i64 %lsr.iv, 1 + %tmp3 = icmp sle i64 %lsr.iv.next, 0 + br i1 %tmp3, label %bb1, label %bb4.preheader + + bb4.preheader: ; preds = %bb1, %bb + br label %bb4 + + bb4: ; preds = %bb4.preheader, %bb4 + br label %bb4 + } + +... +--- +name: hoge +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: int64regs } + - { id: 1, class: int64regs } + - { id: 2, class: int1regs } + - { id: 3, class: int64regs } + - { id: 4, class: int1regs } + - { id: 5, class: int64regs } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: hoge + ; CHECK: bb.0.bb: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBranch undef %2:int1regs, %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.bb1.preheader: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:int64regs = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb1: + ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:int64regs = ADDi64ri [[ADDi64ri]], 1 + ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:int1regs = SETP_s64ri [[ADDi64ri]], 1, 2 + ; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb4: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: GOTO %bb.3 + bb.0.bb: + successors: %bb.1, %bb.3 + + CBranch undef %2:int1regs, %bb.3 + + bb.1.bb1.preheader: + %5:int64regs = IMPLICIT_DEF + + bb.2.bb1: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + %5:int64regs = ADDi64ri %5, 1 + %4:int1regs = SETP_s64ri %5, 1, 2 + CBranch %4, %bb.2 + + bb.3.bb4: + GOTO %bb.3 + +...