[SystemZ] Use VGM whenever possible to load FP immediates.

isFPImmLegal() has been extended to recognize certain FP immediates that can
be built with VGM (Vector Generate Mask).

These scalar FP immediates (that were previously loaded from the constant
pool) are now selected as VGMF/VGMG in Select().

Review: Ulrich Weigand
https://reviews.llvm.org/D58003

llvm-svn: 353867
This commit is contained in:
Jonas Paulsson 2019-02-12 18:06:06 +00:00
parent 0205828be4
commit 34bead750c
5 changed files with 122 additions and 3 deletions

View File

@ -1541,6 +1541,30 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
break;
}
case ISD::ConstantFP: {
APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();
if (Imm.isZero() || Imm.isNegZero())
break;
const SystemZInstrInfo *TII = getInstrInfo();
EVT VT = Node->getValueType(0);
unsigned Start, End;
unsigned BitWidth = VT.getSizeInBits();
bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth,
Start, End, static_cast<const SystemZInstrInfo *>(TII));
assert(Success && "Expected legal FP immediate");
SDLoc DL(Node);
unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG);
SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT,
CurDAG->getTargetConstant(Start, DL, MVT::i32),
CurDAG->getTargetConstant(End, DL, MVT::i32));
unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32
: SystemZ::subreg_h64);
Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0))
.getNode();
ReplaceNode(Node, Res);
return;
}
case ISD::STORE: {
if (tryFoldLoadStoreIntoMemOperand(Node))
return;

View File

@ -577,9 +577,39 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
// Return true if Imm can be generated with a vector instruction, such as VGM.
bool SystemZTargetLowering::
analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start,
unsigned &End, const SystemZInstrInfo *TII) {
APInt IntImm = Imm.bitcastToAPInt();
if (IntImm.getActiveBits() > 64)
return false;
// See if this immediate could be generated with VGM.
bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End);
if (!Success)
return false;
// isRxSBGMask returns the bit numbers for a full 64-bit value,
// with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
// bit numbers for an BitsPerElement value, so that 0 denotes
// 1 << (BitsPerElement-1).
Start -= 64 - BitWidth;
End -= 64 - BitWidth;
return true;
}
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
return Imm.isZero() || Imm.isNegZero();
if (Imm.isZero() || Imm.isNegZero())
return true;
if (!Subtarget.hasVector())
return false;
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
unsigned Start, End;
return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII);
}
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

View File

@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
#include "SystemZ.h"
#include "SystemZInstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
@ -513,7 +514,8 @@ public:
}
static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask);
static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth,
unsigned &Start, unsigned &End, const SystemZInstrInfo *TII);
private:
const SystemZSubtarget &Subtarget;

View File

@ -0,0 +1,63 @@
; Test loads of FP constants with VGM.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
define double @f1() {
; CHECK-LABEL: f1:
; CHECK: vgmg %v0, 2, 11
ret double 1.0
}
define double @f2() {
; CHECK-LABEL: f2:
; CHECK: vgmg %v0, 1, 1
ret double 2.0
}
define double @f3() {
; CHECK-LABEL: f3:
; CHECK: vgmg %v0, 0, 1
ret double -2.0
}
define double @f4() {
; CHECK-LABEL: f4:
; CHECK: vgmg %v0, 2, 10
ret double 0.5
}
define double @f5() {
; CHECK-LABEL: f5:
; CHECK: vgmg %v0, 2, 9
ret double 0.125
}
define float @f6() {
; CHECK-LABEL: f6:
; CHECK: vgmf %v0, 2, 8
ret float 1.0
}
define float @f7() {
; CHECK-LABEL: f7:
; CHECK: vgmf %v0, 1, 1
ret float 2.0
}
define float @f8() {
; CHECK-LABEL: f8:
; CHECK: vgmf %v0, 0, 1
ret float -2.0
}
define float @f9() {
; CHECK-LABEL: f9:
; CHECK: vgmf %v0, 2, 7
ret float 0.5
}
define float @f10() {
; CHECK-LABEL: f10:
; CHECK: vgmf %v0, 2, 6
ret float 0.125
}

View File

@ -1,7 +1,7 @@
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -systemz-subreg-liveness < %s | FileCheck %s
; Check for successful compilation.
; CHECK: meeb %f0, 0(%r1)
; CHECK: meebr %f1, %f0
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-ibm-linux"