From f4517bbd73401160294d2a17b1d68e5807709710 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Tue, 17 Nov 2020 22:38:49 +0900 Subject: [PATCH] [VE] Implement JumpTable Implement JumpTable to make BRIND work on VE. Update an existing br_jt regression test also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D91582 --- llvm/lib/Target/VE/VEFrameLowering.cpp | 4 +- llvm/lib/Target/VE/VEISelLowering.cpp | 74 +++++++++++++++++-- llvm/lib/Target/VE/VEISelLowering.h | 10 +++ llvm/lib/Target/VE/VEInstrInfo.td | 2 +- llvm/lib/Target/VE/VEMCInstLower.cpp | 3 +- llvm/test/CodeGen/VE/Scalar/br_jt.ll | 98 +++++++++++++++++++++----- 6 files changed, 164 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp index f27a2d08fd6c..ec18bee74329 100644 --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -92,8 +92,8 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, // // or %sp, 0, %fp // ld %s17, 40(,%sp) iff this function is using s17 as BP - // ld %got, 32(,%sp) - // ld %plt, 24(,%sp) + // ld %plt, 32(,%sp) + // ld %got, 24(,%sp) // ld %lr, 8(,%sp) // ld %fp, 0(,%sp) BuildMI(MBB, MBBI, DL, TII.get(VE::ORri), VE::SX11).addReg(VE::SX9).addImm(0); diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index b319b6e6645b..864f097315a2 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -153,6 +154,7 @@ void VETargetLowering::initSPUActions() { setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::JumpTable, PtrVT, Custom); /// VAARG handling { setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -173,9 +175,7 @@ void VETargetLowering::initSPUActions() { // VE doesn't have BRCOND setOperationAction(ISD::BRCOND, MVT::Other, Expand); - // BRIND and BR_JT are not implemented yet. - // FIXME: Implement both for the scalar perforamnce. - setOperationAction(ISD::BRIND, MVT::Other, Expand); + // BR_JT is not implemented yet. setOperationAction(ISD::BR_JT, MVT::Other, Expand); /// } Branch @@ -929,6 +929,9 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), TF); + if (const JumpTableSDNode *JT = dyn_cast(Op)) + return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF); + llvm_unreachable("Unhandled address SDNode"); } @@ -957,7 +960,7 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { MFI.setHasCalls(true); auto GlobalN = dyn_cast(Op); - if (isa(Op) || + if (isa(Op) || isa(Op) || (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { // Create following instructions for local linkage PIC code. // lea %reg, label@gotoff_lo @@ -1147,6 +1150,10 @@ SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op, return lowerToTLSGeneralDynamicModel(Op, DAG); } +SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + // Lower a f128 load into two f64 loads. static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); @@ -1412,6 +1419,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); case ISD::LOAD: return lowerLOAD(Op, DAG); case ISD::STORE: @@ -1424,6 +1433,63 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } /// } Custom Lower +/// JumpTable for VE. +/// +/// VE cannot generate relocatable symbol in jump table. VE cannot +/// generate expressions using symbols in both text segment and data +/// segment like below. +/// .4byte .LBB0_2-.LJTI0_0 +/// So, we generate offset from the top of function like below as +/// a custom label. +/// .4byte .LBB0_2- + +unsigned VETargetLowering::getJumpTableEncoding() const { + // Use custom label for PIC. + if (isPositionIndependent()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +const MCExpr *VETargetLowering::LowerCustomJumpTableEntry( + const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, + unsigned Uid, MCContext &Ctx) const { + assert(isPositionIndependent()); + + // Generate custom label for PIC like below. + // .4bytes .LBB0_2- + const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data()); + const auto *Base = MCSymbolRefExpr::create(Sym, Ctx); + return MCBinaryExpr::createSub(Value, Base, Ctx); +} + +SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + assert(isPositionIndependent()); + SDLoc DL(Table); + Function *Function = &DAG.getMachineFunction().getFunction(); + assert(Function != nullptr); + auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace()); + + // In the jump table, we have following values in PIC mode. + // .4bytes .LBB0_2- + // We need to add this value and the address of this function to generate + // .LBB0_2 label correctly under PIC mode. So, we want to generate following + // instructions: + // lea %reg, fun@gotoff_lo + // and %reg, %reg, (32)0 + // lea.sl %reg, fun@gotoff_hi(%reg, %got) + // In order to do so, we need to genarate correctly marked DAG node using + // makeHiLoPair. + SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy); + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy); + return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo); +} + static bool isI32Insn(const SDNode *User, const SDNode *N) { switch (User->getOpcode()) { default: diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 4ac609b73830..050496b9133b 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -92,6 +92,15 @@ public: /// Custom Lower { SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + unsigned getJumpTableEncoding() const override; + const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned Uid, + MCContext &Ctx) const override; + SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const override; + // VE doesn't need getPICJumpTableRelocBaseExpr since it is used for only + // EK_LabelDifference32. SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -99,6 +108,7 @@ public: SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 1a15058cf6c4..0d5bbf282ddf 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1603,7 +1603,7 @@ def vehi_lo : OutPatFrag<(ops node:$hi, node:$lo), def vehi_baselo : OutPatFrag<(ops node:$base, node:$hi, node:$lo), (LEASLrri $base, $lo, $hi)>; foreach type = [ "tblockaddress", "tconstpool", "texternalsym", "tglobaladdr", - "tglobaltlsaddr" ] in { + "tglobaltlsaddr", "tjumptable" ] in { def : Pat<(VElo !cast(type):$lo), (velo_only $lo)>; def : Pat<(VEhi !cast(type):$hi), (vehi_only $hi)>; def : Pat<(add (VEhi !cast(type):$hi), I64:$lo), (vehi_lo $hi, $lo)>; diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp index c14121d9e18a..bc5577ce4f97 100644 --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -63,7 +63,8 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); case MachineOperand::MO_Immediate: return MCOperand::createImm(MO.getImm()); - + case MachineOperand::MO_JumpTableIndex: + return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP); case MachineOperand::MO_MachineBasicBlock: return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP); diff --git a/llvm/test/CodeGen/VE/Scalar/br_jt.ll b/llvm/test/CodeGen/VE/Scalar/br_jt.ll index 86c089474c9d..a7218965c467 100644 --- a/llvm/test/CodeGen/VE/Scalar/br_jt.ll +++ b/llvm/test/CodeGen/VE/Scalar/br_jt.ll @@ -1,36 +1,96 @@ ; RUN: llc < %s -mtriple=ve | FileCheck %s +; RUN: llc < %s -mtriple=ve -relocation-model=pic \ +; RUN: | FileCheck %s -check-prefix=PIC ; Function Attrs: norecurse nounwind readnone define signext i32 @br_jt(i32 signext %0) { ; CHECK-LABEL: br_jt: ; CHECK: # %bb.0: ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 -; CHECK-NEXT: brlt.w 2, %s0, .LBB{{[0-9]+}}_4 +; CHECK-NEXT: adds.w.sx %s1, -1, %s0 +; CHECK-NEXT: cmpu.w %s2, 3, %s1 +; CHECK-NEXT: brgt.w 0, %s2, .LBB{{[0-9]+}}_5 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: breq.w 1, %s0, .LBB{{[0-9]+}}_8 -; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: brne.w 2, %s0, .LBB{{[0-9]+}}_7 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: sll %s0, %s0, 3 +; CHECK-NEXT: lea %s1, .LJTI0_0@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, .LJTI0_0@hi(, %s1) +; CHECK-NEXT: ld %s1, (%s1, %s0) +; CHECK-NEXT: or %s0, 3, (0)1 +; CHECK-NEXT: b.l.t (, %s1) +; CHECK-NEXT: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: b.l.t (, %s10) -; CHECK-NEXT: .LBB{{[0-9]+}}_4: -; CHECK-NEXT: breq.w 3, %s0, .LBB{{[0-9]+}}_9 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: brne.w 4, %s0, .LBB{{[0-9]+}}_7 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: or %s0, 7, (0)1 -; CHECK-NEXT: .LBB{{[0-9]+}}_7: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 -; CHECK-NEXT: b.l.t (, %s10) -; CHECK-NEXT: .LBB{{[0-9]+}}_8: -; CHECK-NEXT: or %s0, 3, (0)1 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 -; CHECK-NEXT: b.l.t (, %s10) -; CHECK-NEXT: .LBB{{[0-9]+}}_9: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: ; CHECK-NEXT: or %s0, 4, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: b.l.t (, %s10) +; CHECK-NEXT: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: or %s0, 7, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) +; +; PIC-LABEL: br_jt: +; PIC: # %bb.0: +; PIC-NEXT: st %s9, (, %s11) +; PIC-NEXT: st %s10, 8(, %s11) +; PIC-NEXT: st %s15, 24(, %s11) +; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: or %s9, 0, %s11 +; PIC-NEXT: lea %s13, -176 +; PIC-NEXT: and %s13, %s13, (32)0 +; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: brge.l %s11, %s8, .LBB0_7 +; PIC-NEXT: # %bb.6: +; PIC-NEXT: ld %s61, 24(, %s14) +; PIC-NEXT: or %s62, 0, %s0 +; PIC-NEXT: lea %s63, 315 +; PIC-NEXT: shm.l %s63, (%s61) +; PIC-NEXT: shm.l %s8, 8(%s61) +; PIC-NEXT: shm.l %s11, 16(%s61) +; PIC-NEXT: monc +; PIC-NEXT: or %s0, 0, %s62 +; PIC-NEXT: .LBB0_7: +; PIC-NEXT: adds.w.sx %s0, %s0, (0)1 +; PIC-NEXT: adds.w.sx %s1, -1, %s0 +; PIC-NEXT: cmpu.w %s2, 3, %s1 +; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; PIC-NEXT: and %s15, %s15, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: brgt.w 0, %s2, .LBB0_5 +; PIC-NEXT: # %bb.1: +; PIC-NEXT: adds.w.zx %s0, %s1, (0)1 +; PIC-NEXT: sll %s0, %s0, 2 +; PIC-NEXT: lea %s1, .LJTI0_0@gotoff_lo +; PIC-NEXT: and %s1, %s1, (32)0 +; PIC-NEXT: lea.sl %s1, .LJTI0_0@gotoff_hi(%s1, %s15) +; PIC-NEXT: ldl.sx %s0, (%s1, %s0) +; PIC-NEXT: lea %s1, br_jt@gotoff_lo +; PIC-NEXT: and %s1, %s1, (32)0 +; PIC-NEXT: lea.sl %s1, br_jt@gotoff_hi(%s1, %s15) +; PIC-NEXT: adds.l %s1, %s0, %s1 +; PIC-NEXT: or %s0, 3, (0)1 +; PIC-NEXT: b.l.t (, %s1) +; PIC-NEXT: .LBB0_2: +; PIC-NEXT: or %s0, 0, (0)1 +; PIC-NEXT: br.l.t .LBB0_5 +; PIC-NEXT: .LBB0_3: +; PIC-NEXT: or %s0, 4, (0)1 +; PIC-NEXT: br.l.t .LBB0_5 +; PIC-NEXT: .LBB0_4: +; PIC-NEXT: or %s0, 7, (0)1 +; PIC-NEXT: .LBB0_5: +; PIC-NEXT: adds.w.sx %s0, %s0, (0)1 +; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s16, 32(, %s11) +; PIC-NEXT: ld %s15, 24(, %s11) +; PIC-NEXT: ld %s10, 8(, %s11) +; PIC-NEXT: ld %s9, (, %s11) +; PIC-NEXT: b.l.t (, %s10) switch i32 %0, label %5 [ i32 1, label %6 i32 2, label %2