[VE] Implement JumpTable

Implement JumpTable to make BRIND work on VE.  Update an existing
br_jt regression test also.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D91582
This commit is contained in:
Kazushi (Jam) Marukawa 2020-11-17 22:38:49 +09:00
parent c4472f8b4c
commit f4517bbd73
6 changed files with 164 additions and 27 deletions

View File

@ -92,8 +92,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
// //
// or %sp, 0, %fp // or %sp, 0, %fp
// ld %s17, 40(,%sp) iff this function is using s17 as BP // ld %s17, 40(,%sp) iff this function is using s17 as BP
// ld %got, 32(,%sp) // ld %plt, 32(,%sp)
// ld %plt, 24(,%sp) // ld %got, 24(,%sp)
// ld %lr, 8(,%sp) // ld %lr, 8(,%sp)
// ld %fp, 0(,%sp) // ld %fp, 0(,%sp)
BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11).addReg(VE::SX9).addImm(0); BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11).addReg(VE::SX9).addImm(0);

View File

@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAG.h"
@ -153,6 +154,7 @@ void VETargetLowering::initSPUActions() {
setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
setOperationAction(ISD::ConstantPool, PtrVT, Custom); setOperationAction(ISD::ConstantPool, PtrVT, Custom);
setOperationAction(ISD::JumpTable, PtrVT, Custom);
/// VAARG handling { /// VAARG handling {
setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom);
@ -173,9 +175,7 @@ void VETargetLowering::initSPUActions() {
// VE doesn't have BRCOND // VE doesn't have BRCOND
setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand);
// BRIND and BR_JT are not implemented yet. // BR_JT is not implemented yet.
// FIXME: Implement both for the scalar perforamnce.
setOperationAction(ISD::BRIND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand);
/// } Branch /// } Branch
@ -929,6 +929,9 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
TF); TF);
if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
llvm_unreachable("Unhandled address SDNode"); llvm_unreachable("Unhandled address SDNode");
} }
@ -957,7 +960,7 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
MFI.setHasCalls(true); MFI.setHasCalls(true);
auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op); auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
if (isa<ConstantPoolSDNode>(Op) || if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
// Create following instructions for local linkage PIC code. // Create following instructions for local linkage PIC code.
// lea %reg, label@gotoff_lo // lea %reg, label@gotoff_lo
@ -1147,6 +1150,10 @@ SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
return lowerToTLSGeneralDynamicModel(Op, DAG); return lowerToTLSGeneralDynamicModel(Op, DAG);
} }
SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
}
// Lower a f128 load into two f64 loads. // Lower a f128 load into two f64 loads.
static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) { static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op); SDLoc DL(Op);
@ -1412,6 +1419,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerGlobalAddress(Op, DAG); return lowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG); return lowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable:
return lowerJumpTable(Op, DAG);
case ISD::LOAD: case ISD::LOAD:
return lowerLOAD(Op, DAG); return lowerLOAD(Op, DAG);
case ISD::STORE: case ISD::STORE:
@ -1424,6 +1433,63 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
} }
/// } Custom Lower /// } Custom Lower
/// JumpTable for VE.
///
/// VE cannot generate relocatable symbol in jump table. VE cannot
/// generate expressions using symbols in both text segment and data
/// segment like below.
/// .4byte .LBB0_2-.LJTI0_0
/// So, we generate offset from the top of function like below as
/// a custom label.
/// .4byte .LBB0_2-<function name>
unsigned VETargetLowering::getJumpTableEncoding() const {
// Use custom label for PIC.
if (isPositionIndependent())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
unsigned Uid, MCContext &Ctx) const {
assert(isPositionIndependent());
// Generate custom label for PIC like below.
// .4bytes .LBB0_2-<function name>
const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
return MCBinaryExpr::createSub(Value, Base, Ctx);
}
SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
assert(isPositionIndependent());
SDLoc DL(Table);
Function *Function = &DAG.getMachineFunction().getFunction();
assert(Function != nullptr);
auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
// In the jump table, we have following values in PIC mode.
// .4bytes .LBB0_2-<function name>
// We need to add this value and the address of this function to generate
// .LBB0_2 label correctly under PIC mode. So, we want to generate following
// instructions:
// lea %reg, fun@gotoff_lo
// and %reg, %reg, (32)0
// lea.sl %reg, fun@gotoff_hi(%reg, %got)
// In order to do so, we need to genarate correctly marked DAG node using
// makeHiLoPair.
SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
}
static bool isI32Insn(const SDNode *User, const SDNode *N) { static bool isI32Insn(const SDNode *User, const SDNode *N) {
switch (User->getOpcode()) { switch (User->getOpcode()) {
default: default:

View File

@ -92,6 +92,15 @@ public:
/// Custom Lower { /// Custom Lower {
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
unsigned getJumpTableEncoding() const override;
const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned Uid,
MCContext &Ctx) const override;
SDValue getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const override;
// VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only
// EK_LabelDifference32.
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@ -99,6 +108,7 @@ public:
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;

View File

@ -1603,7 +1603,7 @@ def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo),
def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo), def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo),
(LEASLrri $base, $lo, $hi)>; (LEASLrri $base, $lo, $hi)>;
foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr",
"tglobaltlsaddr" ] in { "tglobaltlsaddr", "tjumptable" ] in {
def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>; def : Pat<(VElo !cast<SDNode>(type):$lo), (velo_only $lo)>;
def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>; def : Pat<(VEhi !cast<SDNode>(type):$hi), (vehi_only $hi)>;
def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>; def : Pat<(add (VEhi !cast<SDNode>(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>;

View File

@ -63,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
case MachineOperand::MO_Immediate: case MachineOperand::MO_Immediate:
return MCOperand::createImm(MO.getImm()); return MCOperand::createImm(MO.getImm());
case MachineOperand::MO_JumpTableIndex:
return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP);
case MachineOperand::MO_MachineBasicBlock: case MachineOperand::MO_MachineBasicBlock:
return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP); return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP);

View File

@ -1,36 +1,96 @@
; RUN: llc < %s -mtriple=ve | FileCheck %s ; RUN: llc < %s -mtriple=ve | FileCheck %s
; RUN: llc < %s -mtriple=ve -relocation-model=pic \
; RUN: | FileCheck %s -check-prefix=PIC
; Function Attrs: norecurse nounwind readnone ; Function Attrs: norecurse nounwind readnone
define signext i32 @br_jt(i32 signext %0) { define signext i32 @br_jt(i32 signext %0) {
; CHECK-LABEL: br_jt: ; CHECK-LABEL: br_jt:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: brlt.w 2, %s0, .LBB{{[0-9]+}}_4 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0
; CHECK-NEXT: cmpu.w %s2, 3, %s1
; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_5
; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: breq.w 1, %s0, .LBB{{[0-9]+}}_8 ; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1
; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: sll %s0, %s0, 3
; CHECK-NEXT: brne.w 2, %s0, .LBB{{[0-9]+}}_7 ; CHECK-NEXT: lea %s1, .LJTI0_0@lo
; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s1, .LJTI0_0@hi(, %s1)
; CHECK-NEXT: ld %s1, (%s1, %s0)
; CHECK-NEXT: or %s0, 3, (0)1
; CHECK-NEXT: b.l.t (, %s1)
; CHECK-NEXT: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK-NEXT: or %s0, 0, (0)1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10) ; CHECK-NEXT: b.l.t (, %s10)
; CHECK-NEXT: .LBB{{[0-9]+}}_4: ; CHECK-NEXT: .LBB{{[0-9]+}}_3:
; CHECK-NEXT: breq.w 3, %s0, .LBB{{[0-9]+}}_9
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: brne.w 4, %s0, .LBB{{[0-9]+}}_7
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: or %s0, 7, (0)1
; CHECK-NEXT: .LBB{{[0-9]+}}_7:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
; CHECK-NEXT: .LBB{{[0-9]+}}_8:
; CHECK-NEXT: or %s0, 3, (0)1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
; CHECK-NEXT: .LBB{{[0-9]+}}_9:
; CHECK-NEXT: or %s0, 4, (0)1 ; CHECK-NEXT: or %s0, 4, (0)1
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10) ; CHECK-NEXT: b.l.t (, %s10)
; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NEXT: or %s0, 7, (0)1
; CHECK-NEXT: .LBB{{[0-9]+}}_5:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: b.l.t (, %s10)
;
; PIC-LABEL: br_jt:
; PIC: # %bb.0:
; PIC-NEXT: st %s9, (, %s11)
; PIC-NEXT: st %s10, 8(, %s11)
; PIC-NEXT: st %s15, 24(, %s11)
; PIC-NEXT: st %s16, 32(, %s11)
; PIC-NEXT: or %s9, 0, %s11
; PIC-NEXT: lea %s13, -176
; PIC-NEXT: and %s13, %s13, (32)0
; PIC-NEXT: lea.sl %s11, -1(%s13, %s11)
; PIC-NEXT: brge.l %s11, %s8, .LBB0_7
; PIC-NEXT: # %bb.6:
; PIC-NEXT: ld %s61, 24(, %s14)
; PIC-NEXT: or %s62, 0, %s0
; PIC-NEXT: lea %s63, 315
; PIC-NEXT: shm.l %s63, (%s61)
; PIC-NEXT: shm.l %s8, 8(%s61)
; PIC-NEXT: shm.l %s11, 16(%s61)
; PIC-NEXT: monc
; PIC-NEXT: or %s0, 0, %s62
; PIC-NEXT: .LBB0_7:
; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
; PIC-NEXT: adds.w.sx %s1, -1, %s0
; PIC-NEXT: cmpu.w %s2, 3, %s1
; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24)
; PIC-NEXT: and %s15, %s15, (32)0
; PIC-NEXT: sic %s16
; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15)
; PIC-NEXT: brgt.w 0, %s2, .LBB0_5
; PIC-NEXT: # %bb.1:
; PIC-NEXT: adds.w.zx %s0, %s1, (0)1
; PIC-NEXT: sll %s0, %s0, 2
; PIC-NEXT: lea %s1, .LJTI0_0@gotoff_lo
; PIC-NEXT: and %s1, %s1, (32)0
; PIC-NEXT: lea.sl %s1, .LJTI0_0@gotoff_hi(%s1, %s15)
; PIC-NEXT: ldl.sx %s0, (%s1, %s0)
; PIC-NEXT: lea %s1, br_jt@gotoff_lo
; PIC-NEXT: and %s1, %s1, (32)0
; PIC-NEXT: lea.sl %s1, br_jt@gotoff_hi(%s1, %s15)
; PIC-NEXT: adds.l %s1, %s0, %s1
; PIC-NEXT: or %s0, 3, (0)1
; PIC-NEXT: b.l.t (, %s1)
; PIC-NEXT: .LBB0_2:
; PIC-NEXT: or %s0, 0, (0)1
; PIC-NEXT: br.l.t .LBB0_5
; PIC-NEXT: .LBB0_3:
; PIC-NEXT: or %s0, 4, (0)1
; PIC-NEXT: br.l.t .LBB0_5
; PIC-NEXT: .LBB0_4:
; PIC-NEXT: or %s0, 7, (0)1
; PIC-NEXT: .LBB0_5:
; PIC-NEXT: adds.w.sx %s0, %s0, (0)1
; PIC-NEXT: or %s11, 0, %s9
; PIC-NEXT: ld %s16, 32(, %s11)
; PIC-NEXT: ld %s15, 24(, %s11)
; PIC-NEXT: ld %s10, 8(, %s11)
; PIC-NEXT: ld %s9, (, %s11)
; PIC-NEXT: b.l.t (, %s10)
switch i32 %0, label %5 [ switch i32 %0, label %5 [
i32 1, label %6 i32 1, label %6
i32 2, label %2 i32 2, label %2