2013-05-07 00:15:19 +08:00
|
|
|
//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Type profiles
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>]>;
|
|
|
|
def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
|
|
|
|
SDTCisVT<1, i64>]>;
|
|
|
|
def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
|
|
|
|
def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
|
2013-09-06 19:51:39 +08:00
|
|
|
def SDT_ZICmp : SDTypeProfile<0, 3,
|
|
|
|
[SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
[SystemZ] Be more careful about inverting CC masks
System z branches have a mask to select which of the 4 CC values should
cause the branch to be taken. We can invert a branch by inverting the mask.
However, not all instructions can produce all 4 CC values, so inverting
the branch like this can lead to some oddities. For example, integer
comparisons only produce a CC of 0 (equal), 1 (less) or 2 (greater).
If an integer EQ is reversed to NE before instruction selection,
the branch will test for 1 or 2. If instead the branch is reversed
after instruction selection (by inverting the mask), it will test for
1, 2 or 3. Both are correct, but the second isn't really canonical.
This patch therefore keeps track of which CC values are possible
and uses this when inverting a mask.
Although this is mostly cosmestic, it fixes undefined behavior
for the CIJNLH in branch-08.ll. Another fix would have been
to mask out bit 0 when generating the fused compare and branch,
but the point of this patch is that we shouldn't need to do that
in the first place.
The patch also makes it easier to reuse CC results from other instructions.
llvm-svn: 187495
2013-07-31 20:30:20 +08:00
|
|
|
def SDT_ZBRCCMask : SDTypeProfile<0, 3,
|
2014-07-10 18:52:51 +08:00
|
|
|
[SDTCisVT<0, i32>,
|
|
|
|
SDTCisVT<1, i32>,
|
[SystemZ] Be more careful about inverting CC masks
System z branches have a mask to select which of the 4 CC values should
cause the branch to be taken. We can invert a branch by inverting the mask.
However, not all instructions can produce all 4 CC values, so inverting
the branch like this can lead to some oddities. For example, integer
comparisons only produce a CC of 0 (equal), 1 (less) or 2 (greater).
If an integer EQ is reversed to NE before instruction selection,
the branch will test for 1 or 2. If instead the branch is reversed
after instruction selection (by inverting the mask), it will test for
1, 2 or 3. Both are correct, but the second isn't really canonical.
This patch therefore keeps track of which CC values are possible
and uses this when inverting a mask.
Although this is mostly cosmestic, it fixes undefined behavior
for the CIJNLH in branch-08.ll. Another fix would have been
to mask out bit 0 when generating the fused compare and branch,
but the point of this patch is that we shouldn't need to do that
in the first place.
The patch also makes it easier to reuse CC results from other instructions.
llvm-svn: 187495
2013-07-31 20:30:20 +08:00
|
|
|
SDTCisVT<2, OtherVT>]>;
|
|
|
|
def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
|
2013-05-07 00:15:19 +08:00
|
|
|
[SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<1, 2>,
|
2014-07-10 18:52:51 +08:00
|
|
|
SDTCisVT<3, i32>,
|
|
|
|
SDTCisVT<4, i32>]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
|
|
|
|
[SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisPtrTy<0>]>;
|
2013-09-27 23:14:04 +08:00
|
|
|
def SDT_ZWrapOffset : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<0, 2>,
|
|
|
|
SDTCisPtrTy<0>]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
|
|
|
|
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
|
|
|
|
[SDTCisVT<0, i32>,
|
2014-07-10 18:52:51 +08:00
|
|
|
SDTCisVT<1, i32>]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVT<0, untyped>,
|
|
|
|
SDTCisVT<1, untyped>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
|
|
|
def SDT_ZGR128Binary64 : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVT<0, untyped>,
|
|
|
|
SDTCisVT<1, untyped>,
|
|
|
|
SDTCisVT<2, i64>]>;
|
|
|
|
def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
|
|
|
|
[SDTCisVT<0, i32>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisVT<2, i32>,
|
|
|
|
SDTCisVT<3, i32>,
|
|
|
|
SDTCisVT<4, i32>,
|
|
|
|
SDTCisVT<5, i32>]>;
|
|
|
|
def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
|
|
|
|
[SDTCisVT<0, i32>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisVT<2, i32>,
|
|
|
|
SDTCisVT<3, i32>,
|
|
|
|
SDTCisVT<4, i32>,
|
|
|
|
SDTCisVT<5, i32>,
|
|
|
|
SDTCisVT<6, i32>]>;
|
2013-08-12 18:17:33 +08:00
|
|
|
def SDT_ZMemMemLength : SDTypeProfile<0, 3,
|
2013-07-08 17:35:23 +08:00
|
|
|
[SDTCisPtrTy<0>,
|
|
|
|
SDTCisPtrTy<1>,
|
2013-08-27 17:54:29 +08:00
|
|
|
SDTCisVT<2, i64>]>;
|
|
|
|
def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
|
|
|
|
[SDTCisPtrTy<0>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisVT<2, i64>,
|
|
|
|
SDTCisVT<3, i64>]>;
|
2013-08-16 19:21:54 +08:00
|
|
|
def SDT_ZString : SDTypeProfile<1, 3,
|
|
|
|
[SDTCisPtrTy<0>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisPtrTy<2>,
|
|
|
|
SDTCisVT<3, i32>]>;
|
2013-08-12 18:28:10 +08:00
|
|
|
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
|
2013-08-23 19:36:42 +08:00
|
|
|
def SDT_ZPrefetch : SDTypeProfile<0, 2,
|
2014-07-10 18:52:51 +08:00
|
|
|
[SDTCisVT<0, i32>,
|
2013-08-23 19:36:42 +08:00
|
|
|
SDTCisPtrTy<1>]>;
|
2016-05-17 04:32:22 +08:00
|
|
|
def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisInt<0>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisVT<2, OtherVT>]>;
|
|
|
|
def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
|
|
|
|
[SDTCisInt<0>,
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisVT<2, OtherVT>]>;
|
[SystemZ] Support transactional execution on zEC12
The zEC12 provides the transactional-execution facility. This is exposed
to users via a set of builtin routines on other compilers. This patch
adds LLVM support to enable those builtins. In partciular, the patch:
- adds the transactional-execution and processor-assist facilities
- adds MC support for all instructions provided by those facilities
- adds LLVM intrinsics for those instructions and hooks them up for CodeGen
- adds CodeGen support to optimize CC return value checking
Since this is first use of target-specific intrinsics on the platform,
the patch creates the include/llvm/IR/IntrinsicsSystemZ.td file and
hooks it up in Intrinsics.td. I've also changed Triple::getArchTypePrefix
to return "s390" instead of "systemz", since the naming convention for
GCC intrinsics uses "s390" on the platform, and it neemed more straight-
forward to use the same convention for LLVM IR intrinsics.
An associated clang patch makes the intrinsics (and command line switches)
available at the source-language level.
For reference, the transactional-execution instructions are documented
in the z/Architecture Principles of Operation for the zEC12:
http://publibfp.boulder.ibm.com/cgi-bin/bookmgr/download/DZ9ZR009.pdf
The associated builtins are documented in the GCC manual:
http://gcc.gnu.org/onlinedocs/gcc/S_002f390-System-z-Built-in-Functions.html
Index: llvm-head/lib/Target/SystemZ/SystemZOperators.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZOperators.td
+++ llvm-head/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic : SDTypeProf
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
+def SDT_ZTBegin : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -180,6 +183,15 @@ def z_prefetch : SDNode<"System
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZInstrFormats.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZInstrFormats.td
+++ llvm-head/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag i
let Inst{15-0} = BD2;
}
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD2;
+
+ let Inst{31-16} = op;
+ let Inst{15-0} = BD2;
+}
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZInstrInfo.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZInstrInfo.td
+++ llvm-head/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1362,6 +1362,60 @@ let Defs = [CC] in {
}
//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+ // Transaction Begin
+ let hasSideEffects = 1, mayStore = 1,
+ usesCustomInserter = 1, Defs = [CC] in {
+ def TBEGIN : InstSIL<0xE560,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbegin\t$BD1, $I2",
+ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ [(z_tbegin_nofloat bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ def TBEGINC : InstSIL<0xE561,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbeginc\t$BD1, $I2",
+ [(int_s390_tbeginc bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ }
+
+ // Transaction End
+ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+ // Transaction Abort
+ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+ "tabort\t$BD2",
+ [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+ // Nontransactional Store
+ let hasSideEffects = 1 in
+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+ // Extract Transaction Nesting Depth
+ let hasSideEffects = 1 in
+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+ let hasSideEffects = 1, R4 = 0 in
+ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+ "ppa\t$R1, $R2, $R3", []>;
+ def : Pat<(int_s390_ppa_txassist GR32:$src),
+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZProcessors.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZProcessors.td
+++ llvm-head/lib/Target/SystemZ/SystemZProcessors.td
@@ -60,6 +60,16 @@ def FeatureMiscellaneousExtensions : Sys
"Assume that the miscellaneous-extensions facility is installed"
>;
+def FeatureTransactionalExecution : SystemZFeature<
+ "transactional-execution", "TransactionalExecution",
+ "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+ "processor-assist", "ProcessorAssist",
+ "Assume that the processor-assist facility is installed"
+>;
+
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
@@ -70,4 +80,5 @@ def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
FeatureFPExtension, FeaturePopulationCount,
FeatureFastSerialization, FeatureInterlockedAccess1,
- FeatureMiscellaneousExtensions]>;
+ FeatureMiscellaneousExtensions,
+ FeatureTransactionalExecution, FeatureProcessorAssist]>;
Index: llvm-head/lib/Target/SystemZ/SystemZSubtarget.cpp
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ llvm-head/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -40,6 +40,7 @@ SystemZSubtarget::SystemZSubtarget(const
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
HasPopulationCount(false), HasFastSerialization(false),
HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+ HasTransactionalExecution(false), HasProcessorAssist(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
Index: llvm-head/lib/Target/SystemZ/SystemZSubtarget.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZSubtarget.h
+++ llvm-head/lib/Target/SystemZ/SystemZSubtarget.h
@@ -42,6 +42,8 @@ protected:
bool HasFastSerialization;
bool HasInterlockedAccess1;
bool HasMiscellaneousExtensions;
+ bool HasTransactionalExecution;
+ bool HasProcessorAssist;
private:
Triple TargetTriple;
@@ -102,6 +104,12 @@ public:
return HasMiscellaneousExtensions;
}
+ // Return true if the target has the transactional-execution facility.
+ bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+ // Return true if the target has the processor-assist facility.
+ bool hasProcessorAssist() const { return HasProcessorAssist; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
Index: llvm-head/lib/Support/Triple.cpp
===================================================================
--- llvm-head.orig/lib/Support/Triple.cpp
+++ llvm-head/lib/Support/Triple.cpp
@@ -92,7 +92,7 @@ const char *Triple::getArchTypePrefix(Ar
case sparcv9:
case sparc: return "sparc";
- case systemz: return "systemz";
+ case systemz: return "s390";
case x86:
case x86_64: return "x86";
Index: llvm-head/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm-head.orig/include/llvm/IR/Intrinsics.td
+++ llvm-head/include/llvm/IR/Intrinsics.td
@@ -634,3 +634,4 @@ include "llvm/IR/IntrinsicsNVVM.td"
include "llvm/IR/IntrinsicsMips.td"
include "llvm/IR/IntrinsicsR600.td"
include "llvm/IR/IntrinsicsBPF.td"
+include "llvm/IR/IntrinsicsSystemZ.td"
Index: llvm-head/include/llvm/IR/IntrinsicsSystemZ.td
===================================================================
--- /dev/null
+++ llvm-head/include/llvm/IR/IntrinsicsSystemZ.td
@@ -0,0 +1,46 @@
+//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the SystemZ-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Transactional-execution intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+ def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tabort : Intrinsic<[], [llvm_i64_ty],
+ [IntrNoReturn, Throws]>;
+
+ def int_s390_tend : GCCBuiltin<"__builtin_tend">,
+ Intrinsic<[llvm_i32_ty], []>;
+
+ def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+
+ def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
+ [IntrReadWriteArgMem]>;
+
+ def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">,
+ Intrinsic<[], [llvm_i32_ty]>;
+}
+
Index: llvm-head/lib/Target/SystemZ/SystemZ.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZ.h
+++ llvm-head/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0 = C
const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
const unsigned CCMASK_TM = CCMASK_ANY;
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3;
+const unsigned CCMASK_TBEGIN = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
Index: llvm-head/lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZISelLowering.h
+++ llvm-head/lib/Target/SystemZ/SystemZISelLowering.h
@@ -146,6 +146,15 @@ enum {
// Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
SERIALIZE,
+ // Transaction begin. The first operand is the chain, the second
+ // the TDB pointer, and the third the immediate control field.
+ // Returns chain and glue.
+ TBEGIN,
+ TBEGIN_NOFLOAT,
+
+ // Transaction end. Just the chain operand. Returns chain and glue.
+ TEND,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -318,6 +327,7 @@ private:
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -355,6 +365,10 @@ private:
MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
+ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const;
};
} // end namespace llvm
Index: llvm-head/lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ llvm-head/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
#include <cctype>
using namespace llvm;
@@ -304,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowe
// Codes for which we want to perform some z-specific combinations.
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ // Handle intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemcpyOptSize = 0;
@@ -1031,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chai
return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
}
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument. Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+ unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_tbegin:
+ Opcode = SystemZISD::TBEGIN;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tbegin_nofloat:
+ Opcode = SystemZISD::TBEGIN_NOFLOAT;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tend:
+ Opcode = SystemZISD::TEND;
+ CCValid = SystemZ::CCMASK_TEND;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ Ops.push_back(Op.getOperand(0));
+ for (unsigned I = 2; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue OldChain = SDValue(Op.getNode(), 1);
+ SDValue NewChain = SDValue(Intr.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+ return Intr;
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
@@ -1588,9 +1639,53 @@ static void adjustForTestUnderMask(Selec
C.CCMask = NewCCMask;
}
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+ SDValue Call, unsigned CCValid, uint64_t CC,
+ ISD::CondCode Cond) {
+ Comparison C(Call, SDValue());
+ C.Opcode = Opcode;
+ C.CCValid = CCValid;
+ if (Cond == ISD::SETEQ)
+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+ else if (Cond == ISD::SETNE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ else
+ llvm_unreachable("Unexpected integer comparison type");
+ C.CCMask &= CCValid;
+ return C;
+}
+
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
ISD::CondCode Cond) {
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+ unsigned Opcode, CCValid;
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ }
Comparison C(CmpOp0, CmpOp1);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1632,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &D
// Emit the comparison instruction described by C.
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+ if (!C.Op1.getNode()) {
+ SDValue Op;
+ switch (C.Op0.getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+ break;
+ default:
+ llvm_unreachable("Invalid comparison operands");
+ }
+ return SDValue(Op.getNode(), Op->getNumValues() - 1);
+ }
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
DAG.getConstant(C.ICmpType, MVT::i32));
@@ -1713,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETC
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
SDValue CmpOp1 = Op.getOperand(3);
@@ -1723,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_C
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
SDValue Glue = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Chain, DAG.getConstant(C.CCValid, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
}
@@ -2561,6 +2666,30 @@ SDValue SystemZTargetLowering::lowerPREF
Node->getMemoryVT(), Node->getMemOperand());
}
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+ SDValue Glue = SDValue(After, After->getNumValues() - 1);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
+ return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2634,6 +2763,8 @@ SDValue SystemZTargetLowering::LowerOper
return lowerSTACKRESTORE(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return lowerINTRINSIC_W_CHAIN(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -2674,6 +2805,9 @@ const char *SystemZTargetLowering::getTa
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(SERIALIZE);
+ OPCODE(TBEGIN);
+ OPCODE(TBEGIN_NOFLOAT);
+ OPCODE(TEND);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -3501,6 +3635,50 @@ SystemZTargetLowering::emitStringWrapper
return DoneMBB;
}
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const {
+ MachineFunction &MF = *MBB->getParent();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // Update opcode.
+ MI->setDesc(TII->get(Opcode));
+
+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+ // Make sure to add the corresponding GRSM bits if they are missing.
+ uint64_t Control = MI->getOperand(2).getImm();
+ static const unsigned GPRControlBit[16] = {
+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+ };
+ Control |= GPRControlBit[15];
+ if (TFI->hasFP(MF))
+ Control |= GPRControlBit[11];
+ MI->getOperand(2).setImm(Control);
+
+ // Add GPR clobbers.
+ for (int I = 0; I < 16; I++) {
+ if ((Control & GPRControlBit[I]) == 0) {
+ unsigned Reg = SystemZMC::GR64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ // Add FPR clobbers.
+ if (!NoFloat && (Control & 4) != 0) {
+ for (int I = 0; I < 16; I++) {
+ unsigned Reg = SystemZMC::FP64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -3742,6 +3920,12 @@ EmitInstrWithCustomInserter(MachineInstr
return emitStringWrapper(MI, MBB, SystemZ::MVST);
case SystemZ::SRSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::SRST);
+ case SystemZ::TBEGIN:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+ case SystemZ::TBEGIN_nofloat:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+ case SystemZ::TBEGINC:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
default:
llvm_unreachable("Unexpected instr type to insert");
}
Index: llvm-head/test/CodeGen/SystemZ/htm-intrinsics.ll
===================================================================
--- /dev/null
+++ llvm-head/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -0,0 +1,352 @@
+; Test transactional-execution intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+declare i32 @llvm.s390.tbegin(i8 *, i32)
+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32)
+declare void @llvm.s390.tbeginc(i8 *, i32)
+declare i32 @llvm.s390.tend()
+declare void @llvm.s390.tabort(i64)
+declare void @llvm.s390.ntstg(i64, i64 *)
+declare i32 @llvm.s390.etnd()
+declare void @llvm.s390.ppa.txassist(i32)
+
+; TBEGIN.
+define void @test_tbegin() {
+; CHECK-LABEL: test_tbegin:
+; CHECK-NOT: stmg
+; CHECK: std %f8,
+; CHECK: std %f9,
+; CHECK: std %f10,
+; CHECK: std %f11,
+; CHECK: std %f12,
+; CHECK: std %f13,
+; CHECK: std %f14,
+; CHECK: std %f15,
+; CHECK: tbegin 0, 65292
+; CHECK: ld %f8,
+; CHECK: ld %f9,
+; CHECK: ld %f10,
+; CHECK: ld %f11,
+; CHECK: ld %f12,
+; CHECK: ld %f13,
+; CHECK: ld %f14,
+; CHECK: ld %f15,
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat).
+define void @test_tbegin_nofloat1() {
+; CHECK-LABEL: test_tbegin_nofloat1:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with integer CC return value.
+define i32 @test_tbegin_nofloat2() {
+; CHECK-LABEL: test_tbegin_nofloat2:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with implicit CC check.
+define void @test_tbegin_nofloat3(i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat3:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: jnh {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TBEGIN (nofloat) with dual CC use.
+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat4:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with register.
+define void @test_tbegin_nofloat5(i8 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat5:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0(%r2), 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0x0f00.
+define void @test_tbegin_nofloat6() {
+; CHECK-LABEL: test_tbegin_nofloat6:
+; CHECK: stmg %r6, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 3840
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xf100.
+define void @test_tbegin_nofloat7() {
+; CHECK-LABEL: test_tbegin_nofloat7:
+; CHECK: stmg %r8, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 61696
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically.
+define void @test_tbegin_nofloat8() {
+; CHECK-LABEL: test_tbegin_nofloat8:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed.
+define void @test_tbegin_nofloat9() {
+; CHECK-LABEL: test_tbegin_nofloat9:
+; CHECK: stmg %r10, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 64256
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically.
+define void @test_tbegin_nofloat10(i64 %n) {
+; CHECK-LABEL: test_tbegin_nofloat10:
+; CHECK: stmg %r11, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ %buf = alloca i8, i64 %n
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGINC.
+define void @test_tbeginc() {
+; CHECK-LABEL: test_tbeginc:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbeginc 0, 65288
+; CHECK: br %r14
+ call void @llvm.s390.tbeginc(i8 *null, i32 65288)
+ ret void
+}
+
+; TEND with integer CC return value.
+define i32 @test_tend1() {
+; CHECK-LABEL: test_tend1:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ ret i32 %res
+}
+
+; TEND with implicit CC check.
+define void @test_tend3(i32 *%ptr) {
+; CHECK-LABEL: test_tend3:
+; CHECK: tend
+; CHECK: je {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TEND with dual CC use.
+define i32 @test_tend2(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tend2:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TABORT with register only.
+define void @test_tabort1(i64 %val) {
+; CHECK-LABEL: test_tabort1:
+; CHECK: tabort 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 %val)
+ ret void
+}
+
+; TABORT with immediate only.
+define void @test_tabort2(i64 %val) {
+; CHECK-LABEL: test_tabort2:
+; CHECK: tabort 1234
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 1234)
+ ret void
+}
+
+; TABORT with register + immediate.
+define void @test_tabort3(i64 %val) {
+; CHECK-LABEL: test_tabort3:
+; CHECK: tabort 1234(%r2)
+; CHECK: br %r14
+ %sum = add i64 %val, 1234
+ call void @llvm.s390.tabort(i64 %sum)
+ ret void
+}
+
+; TABORT with out-of-range immediate.
+define void @test_tabort4(i64 %val) {
+; CHECK-LABEL: test_tabort4:
+; CHECK: tabort 0({{%r[1-5]}})
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 4096)
+ ret void
+}
+
+; NTSTG with base pointer only.
+define void @test_ntstg1(i64 *%ptr, i64 %val) {
+; CHECK-LABEL: test_ntstg1:
+; CHECK: ntstg %r3, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with base and index.
+; Check that VSTL doesn't allow an index.
+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) {
+; CHECK-LABEL: test_ntstg2:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3
+; CHECK: ntstg %r4, 0([[REG]],%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 %index
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the highest in-range displacement.
+define void @test_ntstg3(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg3:
+; CHECK: ntstg %r3, 524280(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range positive displacement.
+define void @test_ntstg4(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg4:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the lowest in-range displacement.
+define void @test_ntstg5(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg5:
+; CHECK: ntstg %r3, -524288(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range negative displacement.
+define void @test_ntstg6(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg6:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; ETND.
+define i32 @test_etnd() {
+; CHECK-LABEL: test_etnd:
+; CHECK: etnd %r2
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.etnd()
+ ret i32 %res
+}
+
+; PPA (Transaction-Abort Assist)
+define void @test_ppa_txassist(i32 %val) {
+; CHECK-LABEL: test_ppa_txassist:
+; CHECK: ppa %r2, 0, 1
+; CHECK: br %r14
+ call void @llvm.s390.ppa.txassist(i32 %val)
+ ret void
+}
+
Index: llvm-head/test/MC/SystemZ/insn-bad-zEC12.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-bad-zEC12.s
+++ llvm-head/test/MC/SystemZ/insn-bad-zEC12.s
@@ -3,6 +3,22 @@
# RUN: FileCheck < %t %s
#CHECK: error: invalid operand
+#CHECK: ntstg %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ntstg %r0, 524288
+
+ ntstg %r0, -524289
+ ntstg %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ppa %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: ppa %r0, %r0, 16
+
+ ppa %r0, %r0, -1
+ ppa %r0, %r0, 16
+
+#CHECK: error: invalid operand
#CHECK: risbgn %r0,%r0,0,0,-1
#CHECK: error: invalid operand
#CHECK: risbgn %r0,%r0,0,0,64
@@ -22,3 +38,47 @@
risbgn %r0,%r0,-1,0,0
risbgn %r0,%r0,256,0,0
+#CHECK: error: invalid operand
+#CHECK: tabort -1
+#CHECK: error: invalid operand
+#CHECK: tabort 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tabort 0(%r1,%r2)
+
+ tabort -1
+ tabort 4096
+ tabort 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: tbegin -1, 0
+#CHECK: error: invalid operand
+#CHECK: tbegin 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbegin 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbegin 0, -1
+#CHECK: error: invalid operand
+#CHECK: tbegin 0, 65536
+
+ tbegin -1, 0
+ tbegin 4096, 0
+ tbegin 0(%r1,%r2), 0
+ tbegin 0, -1
+ tbegin 0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: tbeginc -1, 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbeginc 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc 0, -1
+#CHECK: error: invalid operand
+#CHECK: tbeginc 0, 65536
+
+ tbeginc -1, 0
+ tbeginc 4096, 0
+ tbeginc 0(%r1,%r2), 0
+ tbeginc 0, -1
+ tbeginc 0, 65536
Index: llvm-head/test/MC/SystemZ/insn-good-zEC12.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-good-zEC12.s
+++ llvm-head/test/MC/SystemZ/insn-good-zEC12.s
@@ -1,6 +1,48 @@
# For zEC12 and above.
# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s
+#CHECK: etnd %r0 # encoding: [0xb2,0xec,0x00,0x00]
+#CHECK: etnd %r15 # encoding: [0xb2,0xec,0x00,0xf0]
+#CHECK: etnd %r7 # encoding: [0xb2,0xec,0x00,0x70]
+
+ etnd %r0
+ etnd %r15
+ etnd %r7
+
+#CHECK: ntstg %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25]
+#CHECK: ntstg %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25]
+#CHECK: ntstg %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25]
+#CHECK: ntstg %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25]
+#CHECK: ntstg %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25]
+#CHECK: ntstg %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25]
+#CHECK: ntstg %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25]
+
+ ntstg %r0, -524288
+ ntstg %r0, -1
+ ntstg %r0, 0
+ ntstg %r0, 1
+ ntstg %r0, 524287
+ ntstg %r0, 0(%r1)
+ ntstg %r0, 0(%r15)
+ ntstg %r0, 524287(%r1,%r15)
+ ntstg %r0, 524287(%r15,%r1)
+ ntstg %r15, 0
+
+#CHECK: ppa %r0, %r0, 0 # encoding: [0xb2,0xe8,0x00,0x00]
+#CHECK: ppa %r0, %r0, 15 # encoding: [0xb2,0xe8,0xf0,0x00]
+#CHECK: ppa %r0, %r15, 0 # encoding: [0xb2,0xe8,0x00,0x0f]
+#CHECK: ppa %r4, %r6, 7 # encoding: [0xb2,0xe8,0x70,0x46]
+#CHECK: ppa %r15, %r0, 0 # encoding: [0xb2,0xe8,0x00,0xf0]
+
+ ppa %r0, %r0, 0
+ ppa %r0, %r0, 15
+ ppa %r0, %r15, 0
+ ppa %r4, %r6, 7
+ ppa %r15, %r0, 0
+
#CHECK: risbgn %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x59]
#CHECK: risbgn %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59]
#CHECK: risbgn %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x59]
@@ -17,3 +59,68 @@
risbgn %r15,%r0,0,0,0
risbgn %r4,%r5,6,7,8
+#CHECK: tabort 0 # encoding: [0xb2,0xfc,0x00,0x00]
+#CHECK: tabort 0(%r1) # encoding: [0xb2,0xfc,0x10,0x00]
+#CHECK: tabort 0(%r15) # encoding: [0xb2,0xfc,0xf0,0x00]
+#CHECK: tabort 4095 # encoding: [0xb2,0xfc,0x0f,0xff]
+#CHECK: tabort 4095(%r1) # encoding: [0xb2,0xfc,0x1f,0xff]
+#CHECK: tabort 4095(%r15) # encoding: [0xb2,0xfc,0xff,0xff]
+
+ tabort 0
+ tabort 0(%r1)
+ tabort 0(%r15)
+ tabort 4095
+ tabort 4095(%r1)
+ tabort 4095(%r15)
+
+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin 4095, 0 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00]
+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin 0, 1 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01]
+#CHECK: tbegin 0, 32767 # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff]
+#CHECK: tbegin 0, 32768 # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00]
+#CHECK: tbegin 0, 65535 # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff]
+#CHECK: tbegin 0(%r1), 42 # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a]
+#CHECK: tbegin 0(%r15), 42 # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a]
+#CHECK: tbegin 4095(%r1), 42 # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a]
+#CHECK: tbegin 4095(%r15), 42 # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a]
+
+ tbegin 0, 0
+ tbegin 4095, 0
+ tbegin 0, 0
+ tbegin 0, 1
+ tbegin 0, 32767
+ tbegin 0, 32768
+ tbegin 0, 65535
+ tbegin 0(%r1), 42
+ tbegin 0(%r15), 42
+ tbegin 4095(%r1), 42
+ tbegin 4095(%r15), 42
+
+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc 4095, 0 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00]
+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc 0, 1 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01]
+#CHECK: tbeginc 0, 32767 # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff]
+#CHECK: tbeginc 0, 32768 # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00]
+#CHECK: tbeginc 0, 65535 # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff]
+#CHECK: tbeginc 0(%r1), 42 # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a]
+#CHECK: tbeginc 0(%r15), 42 # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a]
+#CHECK: tbeginc 4095(%r1), 42 # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a]
+#CHECK: tbeginc 4095(%r15), 42 # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a]
+
+ tbeginc 0, 0
+ tbeginc 4095, 0
+ tbeginc 0, 0
+ tbeginc 0, 1
+ tbeginc 0, 32767
+ tbeginc 0, 32768
+ tbeginc 0, 65535
+ tbeginc 0(%r1), 42
+ tbeginc 0(%r15), 42
+ tbeginc 4095(%r1), 42
+ tbeginc 4095(%r15), 42
+
+#CHECK: tend # encoding: [0xb2,0xf8,0x00,0x00]
+
+ tend
Index: llvm-head/test/MC/SystemZ/insn-bad-z196.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-bad-z196.s
+++ llvm-head/test/MC/SystemZ/insn-bad-z196.s
@@ -244,6 +244,11 @@
cxlgbr %f0, 16, %r0, 0
cxlgbr %f2, 0, %r0, 0
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: etnd %r7
+
+ etnd %r7
+
#CHECK: error: invalid operand
#CHECK: fidbra %f0, 0, %f0, -1
#CHECK: error: invalid operand
@@ -546,6 +551,16 @@
locr %r0,%r0,-1
locr %r0,%r0,16
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: ntstg %r0, 524287(%r1,%r15)
+
+ ntstg %r0, 524287(%r1,%r15)
+
+#CHECK: error: {{(instruction requires: processor-assist)?}}
+#CHECK: ppa %r4, %r6, 7
+
+ ppa %r4, %r6, 7
+
#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}}
#CHECK: risbgn %r1, %r2, 0, 0, 0
@@ -690,3 +705,24 @@
stocg %r0,-524289,1
stocg %r0,524288,1
stocg %r0,0(%r1,%r2),1
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tabort 4095(%r1)
+
+ tabort 4095(%r1)
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbegin 4095(%r1), 42
+
+ tbegin 4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbeginc 4095(%r1), 42
+
+ tbeginc 4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tend
+
+ tend
+
Index: llvm-head/test/MC/Disassembler/SystemZ/insns.txt
===================================================================
--- llvm-head.orig/test/MC/Disassembler/SystemZ/insns.txt
+++ llvm-head/test/MC/Disassembler/SystemZ/insns.txt
@@ -2503,6 +2503,15 @@
# CHECK: ear %r15, %a15
0xb2 0x4f 0x00 0xff
+# CHECK: etnd %r0
+0xb2 0xec 0x00 0x00
+
+# CHECK: etnd %r15
+0xb2 0xec 0x00 0xf0
+
+# CHECK: etnd %r7
+0xb2 0xec 0x00 0x70
+
# CHECK: fidbr %f0, 0, %f0
0xb3 0x5f 0x00 0x00
@@ -6034,6 +6043,36 @@
# CHECK: ny %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x54
+# CHECK: ntstg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x25
+
+# CHECK: ntstg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x25
+
+# CHECK: ntstg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x25
+
+# CHECK: ntstg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x25
+
# CHECK: oc 0(1), 0
0xd6 0x00 0x00 0x00 0x00 0x00
@@ -6346,6 +6385,21 @@
# CHECK: popcnt %r7, %r8
0xb9 0xe1 0x00 0x78
+# CHECK: ppa %r0, %r0, 0
+0xb2 0xe8 0x00 0x00
+
+# CHECK: ppa %r0, %r0, 15
+0xb2 0xe8 0xf0 0x00
+
+# CHECK: ppa %r0, %r15, 0
+0xb2 0xe8 0x00 0x0f
+
+# CHECK: ppa %r4, %r6, 7
+0xb2 0xe8 0x70 0x46
+
+# CHECK: ppa %r15, %r0, 0
+0xb2 0xe8 0x00 0xf0
+
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55
@@ -8062,6 +8116,93 @@
# CHECK: sy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x5b
+# CHECK: tabort 0
+0xb2 0xfc 0x00 0x00
+
+# CHECK: tabort 0(%r1)
+0xb2 0xfc 0x10 0x00
+
+# CHECK: tabort 0(%r15)
+0xb2 0xfc 0xf0 0x00
+
+# CHECK: tabort 4095
+0xb2 0xfc 0x0f 0xff
+
+# CHECK: tabort 4095(%r1)
+0xb2 0xfc 0x1f 0xff
+
+# CHECK: tabort 4095(%r15)
+0xb2 0xfc 0xff 0xff
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 4095, 0
+0xe5 0x60 0x0f 0xff 0x00 0x00
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 0, 1
+0xe5 0x60 0x00 0x00 0x00 0x01
+
+# CHECK: tbegin 0, 32767
+0xe5 0x60 0x00 0x00 0x7f 0xff
+
+# CHECK: tbegin 0, 32768
+0xe5 0x60 0x00 0x00 0x80 0x00
+
+# CHECK: tbegin 0, 65535
+0xe5 0x60 0x00 0x00 0xff 0xff
+
+# CHECK: tbegin 0(%r1), 42
+0xe5 0x60 0x10 0x00 0x00 0x2a
+
+# CHECK: tbegin 0(%r15), 42
+0xe5 0x60 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbegin 4095(%r1), 42
+0xe5 0x60 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbegin 4095(%r15), 42
+0xe5 0x60 0xff 0xff 0x00 0x2a
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 4095, 0
+0xe5 0x61 0x0f 0xff 0x00 0x00
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 0, 1
+0xe5 0x61 0x00 0x00 0x00 0x01
+
+# CHECK: tbeginc 0, 32767
+0xe5 0x61 0x00 0x00 0x7f 0xff
+
+# CHECK: tbeginc 0, 32768
+0xe5 0x61 0x00 0x00 0x80 0x00
+
+# CHECK: tbeginc 0, 65535
+0xe5 0x61 0x00 0x00 0xff 0xff
+
+# CHECK: tbeginc 0(%r1), 42
+0xe5 0x61 0x10 0x00 0x00 0x2a
+
+# CHECK: tbeginc 0(%r15), 42
+0xe5 0x61 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r1), 42
+0xe5 0x61 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r15), 42
+0xe5 0x61 0xff 0xff 0x00 0x2a
+
+# CHECK: tend
+0xb2 0xf8 0x00 0x00
+
# CHECK: tm 0, 0
0x91 0x00 0x00 0x00
llvm-svn: 233803
2015-04-01 20:51:43 +08:00
|
|
|
def SDT_ZTBegin : SDTypeProfile<0, 2,
|
|
|
|
[SDTCisPtrTy<0>,
|
|
|
|
SDTCisVT<1, i32>]>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
def SDT_ZInsertVectorElt : SDTypeProfile<1, 3,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisVT<3, i32>]>;
|
|
|
|
def SDT_ZExtractVectorElt : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<1>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
|
|
|
def SDT_ZReplicate : SDTypeProfile<1, 1,
|
|
|
|
[SDTCisVec<0>]>;
|
2015-05-06 03:27:45 +08:00
|
|
|
def SDT_ZVecUnaryConv : SDTypeProfile<1, 1,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisVec<1>]>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def SDT_ZVecUnary : SDTypeProfile<1, 1,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>]>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
def SDT_ZVecBinary : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<0, 2>]>;
|
|
|
|
def SDT_ZVecBinaryInt : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
|
|
|
def SDT_ZVecBinaryConv : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisVec<1>,
|
|
|
|
SDTCisSameAs<1, 2>]>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisVec<1>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
def SDT_ZRotateMask : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisVT<1, i32>,
|
|
|
|
SDTCisVT<2, i32>]>;
|
|
|
|
def SDT_ZJoinDwords : SDTypeProfile<1, 2,
|
|
|
|
[SDTCisVT<0, v2i64>,
|
|
|
|
SDTCisVT<1, i64>,
|
|
|
|
SDTCisVT<2, i64>]>;
|
|
|
|
def SDT_ZVecTernary : SDTypeProfile<1, 3,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<0, 2>,
|
|
|
|
SDTCisSameAs<0, 3>]>;
|
|
|
|
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<0, 2>,
|
|
|
|
SDTCisVT<3, i32>]>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4,
|
|
|
|
[SDTCisVec<0>,
|
|
|
|
SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisSameAs<0, 2>,
|
|
|
|
SDTCisSameAs<0, 3>,
|
|
|
|
SDTCisVT<4, i32>]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Node definitions
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// These are target-independent nodes, but have target-specific formats.
|
|
|
|
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
|
|
|
|
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
|
|
|
|
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
|
|
|
|
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
|
|
|
|
SDNPOutGlue]>;
|
2015-02-18 17:13:27 +08:00
|
|
|
def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
|
2013-05-07 00:15:19 +08:00
|
|
|
|
|
|
|
// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
|
|
|
|
def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
|
|
|
|
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
|
|
|
def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
|
|
|
SDNPVariadic]>;
|
2013-08-19 20:42:31 +08:00
|
|
|
def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
|
|
|
SDNPVariadic]>;
|
2015-02-18 17:13:27 +08:00
|
|
|
def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
|
|
|
|
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
|
|
|
|
SDNPVariadic]>;
|
|
|
|
def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
|
|
|
|
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
|
|
|
|
SDNPVariadic]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
|
2013-09-27 23:14:04 +08:00
|
|
|
def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
|
|
|
|
SDT_ZWrapOffset, []>;
|
2013-12-13 23:35:00 +08:00
|
|
|
def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
|
2013-09-06 19:51:39 +08:00
|
|
|
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
|
|
|
|
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
|
2013-09-10 18:20:32 +08:00
|
|
|
def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
|
|
|
|
[SDNPHasChain, SDNPInGlue]>;
|
|
|
|
def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
|
|
|
|
[SDNPInGlue]>;
|
|
|
|
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
|
|
|
|
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
|
|
|
|
SDT_ZExtractAccess>;
|
2015-03-31 20:56:33 +08:00
|
|
|
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
|
2013-07-02 23:40:22 +08:00
|
|
|
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
|
2013-05-07 00:15:19 +08:00
|
|
|
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
|
|
|
|
def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
|
|
|
|
def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
|
|
|
|
|
2013-12-10 18:36:34 +08:00
|
|
|
def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
|
|
|
|
[SDNPHasChain, SDNPMayStore]>;
|
2016-04-04 20:45:44 +08:00
|
|
|
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
|
|
|
|
[SDNPHasChain, SDNPSideEffect]>;
|
2013-12-10 18:36:34 +08:00
|
|
|
|
2016-05-17 04:32:22 +08:00
|
|
|
def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
|
|
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
|
|
|
def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
// Defined because the index is an i32 rather than a pointer.
|
|
|
|
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
|
|
|
|
SDT_ZInsertVectorElt>;
|
|
|
|
def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
|
|
|
|
SDT_ZExtractVectorElt>;
|
|
|
|
def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
|
|
|
|
def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
|
|
|
|
def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
|
|
|
|
def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
|
|
|
|
def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
|
|
|
|
def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
|
|
|
|
def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
|
|
|
|
def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
|
|
|
|
def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS",
|
|
|
|
SDT_ZVecTernaryInt>;
|
|
|
|
def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
|
|
|
|
def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
|
|
|
|
[SDNPOutGlue]>;
|
2015-05-06 03:29:21 +08:00
|
|
|
def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
|
|
|
|
def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
|
|
|
|
def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
|
|
|
|
def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
|
|
|
|
SDT_ZVecBinaryInt>;
|
|
|
|
def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
|
|
|
|
SDT_ZVecBinaryInt>;
|
|
|
|
def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
|
|
|
|
SDT_ZVecBinaryInt>;
|
|
|
|
def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
|
|
|
|
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
|
|
|
|
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
|
|
|
|
def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
2015-05-06 03:26:48 +08:00
|
|
|
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
|
|
|
|
def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
|
|
|
|
def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
|
|
|
|
[SDNPOutGlue]>;
|
2015-05-06 03:27:45 +08:00
|
|
|
def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
|
|
|
|
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
|
2015-05-06 03:31:09 +08:00
|
|
|
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
|
|
|
|
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
|
|
|
|
[SDNPOutGlue]>;
|
|
|
|
def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
|
|
|
|
SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
|
|
|
|
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
|
|
|
|
[SDNPOutGlue]>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
|
|
|
|
: SDNode<"SystemZISD::"##name, profile,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
|
|
|
|
|
|
|
|
def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">;
|
|
|
|
def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">;
|
|
|
|
def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">;
|
|
|
|
def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">;
|
|
|
|
def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">;
|
|
|
|
def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">;
|
|
|
|
def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">;
|
|
|
|
def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">;
|
|
|
|
def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">;
|
|
|
|
def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
|
|
|
|
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
|
|
|
|
def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
|
|
|
|
|
2013-08-12 18:17:33 +08:00
|
|
|
def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
|
2013-07-08 17:35:23 +08:00
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
2013-08-27 17:54:29 +08:00
|
|
|
def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
2013-09-05 18:36:45 +08:00
|
|
|
def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
|
|
|
def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
|
|
|
def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
|
|
|
def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
|
|
|
def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
|
|
|
def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
2013-08-12 18:17:33 +08:00
|
|
|
def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
|
2013-08-12 18:28:10 +08:00
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
|
2013-08-27 17:54:29 +08:00
|
|
|
def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
|
2013-08-16 19:21:54 +08:00
|
|
|
def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
|
2013-08-16 19:29:37 +08:00
|
|
|
def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
|
2013-08-16 19:41:43 +08:00
|
|
|
def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
|
2013-08-12 18:28:10 +08:00
|
|
|
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
|
|
|
|
[SDNPInGlue]>;
|
2013-08-23 19:36:42 +08:00
|
|
|
def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
|
|
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
|
|
|
|
SDNPMemOperand]>;
|
2013-07-08 17:35:23 +08:00
|
|
|
|
[SystemZ] Support transactional execution on zEC12
The zEC12 provides the transactional-execution facility. This is exposed
to users via a set of builtin routines on other compilers. This patch
adds LLVM support to enable those builtins. In partciular, the patch:
- adds the transactional-execution and processor-assist facilities
- adds MC support for all instructions provided by those facilities
- adds LLVM intrinsics for those instructions and hooks them up for CodeGen
- adds CodeGen support to optimize CC return value checking
Since this is first use of target-specific intrinsics on the platform,
the patch creates the include/llvm/IR/IntrinsicsSystemZ.td file and
hooks it up in Intrinsics.td. I've also changed Triple::getArchTypePrefix
to return "s390" instead of "systemz", since the naming convention for
GCC intrinsics uses "s390" on the platform, and it neemed more straight-
forward to use the same convention for LLVM IR intrinsics.
An associated clang patch makes the intrinsics (and command line switches)
available at the source-language level.
For reference, the transactional-execution instructions are documented
in the z/Architecture Principles of Operation for the zEC12:
http://publibfp.boulder.ibm.com/cgi-bin/bookmgr/download/DZ9ZR009.pdf
The associated builtins are documented in the GCC manual:
http://gcc.gnu.org/onlinedocs/gcc/S_002f390-System-z-Built-in-Functions.html
Index: llvm-head/lib/Target/SystemZ/SystemZOperators.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZOperators.td
+++ llvm-head/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic : SDTypeProf
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
+def SDT_ZTBegin : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -180,6 +183,15 @@ def z_prefetch : SDNode<"System
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZInstrFormats.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZInstrFormats.td
+++ llvm-head/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag i
let Inst{15-0} = BD2;
}
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD2;
+
+ let Inst{31-16} = op;
+ let Inst{15-0} = BD2;
+}
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZInstrInfo.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZInstrInfo.td
+++ llvm-head/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1362,6 +1362,60 @@ let Defs = [CC] in {
}
//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+ // Transaction Begin
+ let hasSideEffects = 1, mayStore = 1,
+ usesCustomInserter = 1, Defs = [CC] in {
+ def TBEGIN : InstSIL<0xE560,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbegin\t$BD1, $I2",
+ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ [(z_tbegin_nofloat bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ def TBEGINC : InstSIL<0xE561,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbeginc\t$BD1, $I2",
+ [(int_s390_tbeginc bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ }
+
+ // Transaction End
+ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+ // Transaction Abort
+ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+ "tabort\t$BD2",
+ [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+ // Nontransactional Store
+ let hasSideEffects = 1 in
+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+ // Extract Transaction Nesting Depth
+ let hasSideEffects = 1 in
+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+ let hasSideEffects = 1, R4 = 0 in
+ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+ "ppa\t$R1, $R2, $R3", []>;
+ def : Pat<(int_s390_ppa_txassist GR32:$src),
+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
Index: llvm-head/lib/Target/SystemZ/SystemZProcessors.td
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZProcessors.td
+++ llvm-head/lib/Target/SystemZ/SystemZProcessors.td
@@ -60,6 +60,16 @@ def FeatureMiscellaneousExtensions : Sys
"Assume that the miscellaneous-extensions facility is installed"
>;
+def FeatureTransactionalExecution : SystemZFeature<
+ "transactional-execution", "TransactionalExecution",
+ "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+ "processor-assist", "ProcessorAssist",
+ "Assume that the processor-assist facility is installed"
+>;
+
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
@@ -70,4 +80,5 @@ def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
FeatureFPExtension, FeaturePopulationCount,
FeatureFastSerialization, FeatureInterlockedAccess1,
- FeatureMiscellaneousExtensions]>;
+ FeatureMiscellaneousExtensions,
+ FeatureTransactionalExecution, FeatureProcessorAssist]>;
Index: llvm-head/lib/Target/SystemZ/SystemZSubtarget.cpp
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ llvm-head/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -40,6 +40,7 @@ SystemZSubtarget::SystemZSubtarget(const
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
HasPopulationCount(false), HasFastSerialization(false),
HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+ HasTransactionalExecution(false), HasProcessorAssist(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
Index: llvm-head/lib/Target/SystemZ/SystemZSubtarget.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZSubtarget.h
+++ llvm-head/lib/Target/SystemZ/SystemZSubtarget.h
@@ -42,6 +42,8 @@ protected:
bool HasFastSerialization;
bool HasInterlockedAccess1;
bool HasMiscellaneousExtensions;
+ bool HasTransactionalExecution;
+ bool HasProcessorAssist;
private:
Triple TargetTriple;
@@ -102,6 +104,12 @@ public:
return HasMiscellaneousExtensions;
}
+ // Return true if the target has the transactional-execution facility.
+ bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+ // Return true if the target has the processor-assist facility.
+ bool hasProcessorAssist() const { return HasProcessorAssist; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
Index: llvm-head/lib/Support/Triple.cpp
===================================================================
--- llvm-head.orig/lib/Support/Triple.cpp
+++ llvm-head/lib/Support/Triple.cpp
@@ -92,7 +92,7 @@ const char *Triple::getArchTypePrefix(Ar
case sparcv9:
case sparc: return "sparc";
- case systemz: return "systemz";
+ case systemz: return "s390";
case x86:
case x86_64: return "x86";
Index: llvm-head/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm-head.orig/include/llvm/IR/Intrinsics.td
+++ llvm-head/include/llvm/IR/Intrinsics.td
@@ -634,3 +634,4 @@ include "llvm/IR/IntrinsicsNVVM.td"
include "llvm/IR/IntrinsicsMips.td"
include "llvm/IR/IntrinsicsR600.td"
include "llvm/IR/IntrinsicsBPF.td"
+include "llvm/IR/IntrinsicsSystemZ.td"
Index: llvm-head/include/llvm/IR/IntrinsicsSystemZ.td
===================================================================
--- /dev/null
+++ llvm-head/include/llvm/IR/IntrinsicsSystemZ.td
@@ -0,0 +1,46 @@
+//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the SystemZ-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Transactional-execution intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+ def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoDuplicate]>;
+
+ def int_s390_tabort : Intrinsic<[], [llvm_i64_ty],
+ [IntrNoReturn, Throws]>;
+
+ def int_s390_tend : GCCBuiltin<"__builtin_tend">,
+ Intrinsic<[llvm_i32_ty], []>;
+
+ def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+
+ def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
+ [IntrReadWriteArgMem]>;
+
+ def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">,
+ Intrinsic<[], [llvm_i32_ty]>;
+}
+
Index: llvm-head/lib/Target/SystemZ/SystemZ.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZ.h
+++ llvm-head/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0 = C
const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
const unsigned CCMASK_TM = CCMASK_ANY;
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3;
+const unsigned CCMASK_TBEGIN = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
Index: llvm-head/lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZISelLowering.h
+++ llvm-head/lib/Target/SystemZ/SystemZISelLowering.h
@@ -146,6 +146,15 @@ enum {
// Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
SERIALIZE,
+ // Transaction begin. The first operand is the chain, the second
+ // the TDB pointer, and the third the immediate control field.
+ // Returns chain and glue.
+ TBEGIN,
+ TBEGIN_NOFLOAT,
+
+ // Transaction end. Just the chain operand. Returns chain and glue.
+ TEND,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -318,6 +327,7 @@ private:
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -355,6 +365,10 @@ private:
MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
+ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const;
};
} // end namespace llvm
Index: llvm-head/lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- llvm-head.orig/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ llvm-head/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
#include <cctype>
using namespace llvm;
@@ -304,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowe
// Codes for which we want to perform some z-specific combinations.
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ // Handle intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemcpyOptSize = 0;
@@ -1031,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chai
return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
}
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument. Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+ unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_tbegin:
+ Opcode = SystemZISD::TBEGIN;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tbegin_nofloat:
+ Opcode = SystemZISD::TBEGIN_NOFLOAT;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tend:
+ Opcode = SystemZISD::TEND;
+ CCValid = SystemZ::CCMASK_TEND;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ Ops.push_back(Op.getOperand(0));
+ for (unsigned I = 2; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue OldChain = SDValue(Op.getNode(), 1);
+ SDValue NewChain = SDValue(Intr.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+ return Intr;
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
@@ -1588,9 +1639,53 @@ static void adjustForTestUnderMask(Selec
C.CCMask = NewCCMask;
}
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+ SDValue Call, unsigned CCValid, uint64_t CC,
+ ISD::CondCode Cond) {
+ Comparison C(Call, SDValue());
+ C.Opcode = Opcode;
+ C.CCValid = CCValid;
+ if (Cond == ISD::SETEQ)
+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+ else if (Cond == ISD::SETNE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ else
+ llvm_unreachable("Unexpected integer comparison type");
+ C.CCMask &= CCValid;
+ return C;
+}
+
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
ISD::CondCode Cond) {
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+ unsigned Opcode, CCValid;
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ }
Comparison C(CmpOp0, CmpOp1);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1632,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &D
// Emit the comparison instruction described by C.
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+ if (!C.Op1.getNode()) {
+ SDValue Op;
+ switch (C.Op0.getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+ break;
+ default:
+ llvm_unreachable("Invalid comparison operands");
+ }
+ return SDValue(Op.getNode(), Op->getNumValues() - 1);
+ }
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
DAG.getConstant(C.ICmpType, MVT::i32));
@@ -1713,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETC
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
SDValue CmpOp1 = Op.getOperand(3);
@@ -1723,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_C
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
SDValue Glue = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Chain, DAG.getConstant(C.CCValid, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
}
@@ -2561,6 +2666,30 @@ SDValue SystemZTargetLowering::lowerPREF
Node->getMemoryVT(), Node->getMemOperand());
}
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+ SDValue Glue = SDValue(After, After->getNumValues() - 1);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
+ return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2634,6 +2763,8 @@ SDValue SystemZTargetLowering::LowerOper
return lowerSTACKRESTORE(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return lowerINTRINSIC_W_CHAIN(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -2674,6 +2805,9 @@ const char *SystemZTargetLowering::getTa
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(SERIALIZE);
+ OPCODE(TBEGIN);
+ OPCODE(TBEGIN_NOFLOAT);
+ OPCODE(TEND);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -3501,6 +3635,50 @@ SystemZTargetLowering::emitStringWrapper
return DoneMBB;
}
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const {
+ MachineFunction &MF = *MBB->getParent();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // Update opcode.
+ MI->setDesc(TII->get(Opcode));
+
+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+ // Make sure to add the corresponding GRSM bits if they are missing.
+ uint64_t Control = MI->getOperand(2).getImm();
+ static const unsigned GPRControlBit[16] = {
+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+ };
+ Control |= GPRControlBit[15];
+ if (TFI->hasFP(MF))
+ Control |= GPRControlBit[11];
+ MI->getOperand(2).setImm(Control);
+
+ // Add GPR clobbers.
+ for (int I = 0; I < 16; I++) {
+ if ((Control & GPRControlBit[I]) == 0) {
+ unsigned Reg = SystemZMC::GR64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ // Add FPR clobbers.
+ if (!NoFloat && (Control & 4) != 0) {
+ for (int I = 0; I < 16; I++) {
+ unsigned Reg = SystemZMC::FP64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -3742,6 +3920,12 @@ EmitInstrWithCustomInserter(MachineInstr
return emitStringWrapper(MI, MBB, SystemZ::MVST);
case SystemZ::SRSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::SRST);
+ case SystemZ::TBEGIN:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+ case SystemZ::TBEGIN_nofloat:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+ case SystemZ::TBEGINC:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
default:
llvm_unreachable("Unexpected instr type to insert");
}
Index: llvm-head/test/CodeGen/SystemZ/htm-intrinsics.ll
===================================================================
--- /dev/null
+++ llvm-head/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -0,0 +1,352 @@
+; Test transactional-execution intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+declare i32 @llvm.s390.tbegin(i8 *, i32)
+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32)
+declare void @llvm.s390.tbeginc(i8 *, i32)
+declare i32 @llvm.s390.tend()
+declare void @llvm.s390.tabort(i64)
+declare void @llvm.s390.ntstg(i64, i64 *)
+declare i32 @llvm.s390.etnd()
+declare void @llvm.s390.ppa.txassist(i32)
+
+; TBEGIN.
+define void @test_tbegin() {
+; CHECK-LABEL: test_tbegin:
+; CHECK-NOT: stmg
+; CHECK: std %f8,
+; CHECK: std %f9,
+; CHECK: std %f10,
+; CHECK: std %f11,
+; CHECK: std %f12,
+; CHECK: std %f13,
+; CHECK: std %f14,
+; CHECK: std %f15,
+; CHECK: tbegin 0, 65292
+; CHECK: ld %f8,
+; CHECK: ld %f9,
+; CHECK: ld %f10,
+; CHECK: ld %f11,
+; CHECK: ld %f12,
+; CHECK: ld %f13,
+; CHECK: ld %f14,
+; CHECK: ld %f15,
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat).
+define void @test_tbegin_nofloat1() {
+; CHECK-LABEL: test_tbegin_nofloat1:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with integer CC return value.
+define i32 @test_tbegin_nofloat2() {
+; CHECK-LABEL: test_tbegin_nofloat2:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with implicit CC check.
+define void @test_tbegin_nofloat3(i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat3:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: jnh {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TBEGIN (nofloat) with dual CC use.
+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat4:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with register.
+define void @test_tbegin_nofloat5(i8 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat5:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0(%r2), 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0x0f00.
+define void @test_tbegin_nofloat6() {
+; CHECK-LABEL: test_tbegin_nofloat6:
+; CHECK: stmg %r6, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 3840
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xf100.
+define void @test_tbegin_nofloat7() {
+; CHECK-LABEL: test_tbegin_nofloat7:
+; CHECK: stmg %r8, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 61696
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically.
+define void @test_tbegin_nofloat8() {
+; CHECK-LABEL: test_tbegin_nofloat8:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed.
+define void @test_tbegin_nofloat9() {
+; CHECK-LABEL: test_tbegin_nofloat9:
+; CHECK: stmg %r10, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 64256
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically.
+define void @test_tbegin_nofloat10(i64 %n) {
+; CHECK-LABEL: test_tbegin_nofloat10:
+; CHECK: stmg %r11, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ %buf = alloca i8, i64 %n
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGINC.
+define void @test_tbeginc() {
+; CHECK-LABEL: test_tbeginc:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbeginc 0, 65288
+; CHECK: br %r14
+ call void @llvm.s390.tbeginc(i8 *null, i32 65288)
+ ret void
+}
+
+; TEND with integer CC return value.
+define i32 @test_tend1() {
+; CHECK-LABEL: test_tend1:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ ret i32 %res
+}
+
+; TEND with implicit CC check.
+define void @test_tend3(i32 *%ptr) {
+; CHECK-LABEL: test_tend3:
+; CHECK: tend
+; CHECK: je {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TEND with dual CC use.
+define i32 @test_tend2(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tend2:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TABORT with register only.
+define void @test_tabort1(i64 %val) {
+; CHECK-LABEL: test_tabort1:
+; CHECK: tabort 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 %val)
+ ret void
+}
+
+; TABORT with immediate only.
+define void @test_tabort2(i64 %val) {
+; CHECK-LABEL: test_tabort2:
+; CHECK: tabort 1234
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 1234)
+ ret void
+}
+
+; TABORT with register + immediate.
+define void @test_tabort3(i64 %val) {
+; CHECK-LABEL: test_tabort3:
+; CHECK: tabort 1234(%r2)
+; CHECK: br %r14
+ %sum = add i64 %val, 1234
+ call void @llvm.s390.tabort(i64 %sum)
+ ret void
+}
+
+; TABORT with out-of-range immediate.
+define void @test_tabort4(i64 %val) {
+; CHECK-LABEL: test_tabort4:
+; CHECK: tabort 0({{%r[1-5]}})
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 4096)
+ ret void
+}
+
+; NTSTG with base pointer only.
+define void @test_ntstg1(i64 *%ptr, i64 %val) {
+; CHECK-LABEL: test_ntstg1:
+; CHECK: ntstg %r3, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with base and index.
+; Check that VSTL doesn't allow an index.
+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) {
+; CHECK-LABEL: test_ntstg2:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3
+; CHECK: ntstg %r4, 0([[REG]],%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 %index
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the highest in-range displacement.
+define void @test_ntstg3(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg3:
+; CHECK: ntstg %r3, 524280(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range positive displacement.
+define void @test_ntstg4(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg4:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the lowest in-range displacement.
+define void @test_ntstg5(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg5:
+; CHECK: ntstg %r3, -524288(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range negative displacement.
+define void @test_ntstg6(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg6:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; ETND.
+define i32 @test_etnd() {
+; CHECK-LABEL: test_etnd:
+; CHECK: etnd %r2
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.etnd()
+ ret i32 %res
+}
+
+; PPA (Transaction-Abort Assist)
+define void @test_ppa_txassist(i32 %val) {
+; CHECK-LABEL: test_ppa_txassist:
+; CHECK: ppa %r2, 0, 1
+; CHECK: br %r14
+ call void @llvm.s390.ppa.txassist(i32 %val)
+ ret void
+}
+
Index: llvm-head/test/MC/SystemZ/insn-bad-zEC12.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-bad-zEC12.s
+++ llvm-head/test/MC/SystemZ/insn-bad-zEC12.s
@@ -3,6 +3,22 @@
# RUN: FileCheck < %t %s
#CHECK: error: invalid operand
+#CHECK: ntstg %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ntstg %r0, 524288
+
+ ntstg %r0, -524289
+ ntstg %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: ppa %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: ppa %r0, %r0, 16
+
+ ppa %r0, %r0, -1
+ ppa %r0, %r0, 16
+
+#CHECK: error: invalid operand
#CHECK: risbgn %r0,%r0,0,0,-1
#CHECK: error: invalid operand
#CHECK: risbgn %r0,%r0,0,0,64
@@ -22,3 +38,47 @@
risbgn %r0,%r0,-1,0,0
risbgn %r0,%r0,256,0,0
+#CHECK: error: invalid operand
+#CHECK: tabort -1
+#CHECK: error: invalid operand
+#CHECK: tabort 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tabort 0(%r1,%r2)
+
+ tabort -1
+ tabort 4096
+ tabort 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: tbegin -1, 0
+#CHECK: error: invalid operand
+#CHECK: tbegin 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbegin 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbegin 0, -1
+#CHECK: error: invalid operand
+#CHECK: tbegin 0, 65536
+
+ tbegin -1, 0
+ tbegin 4096, 0
+ tbegin 0(%r1,%r2), 0
+ tbegin 0, -1
+ tbegin 0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: tbeginc -1, 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc 4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: tbeginc 0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: tbeginc 0, -1
+#CHECK: error: invalid operand
+#CHECK: tbeginc 0, 65536
+
+ tbeginc -1, 0
+ tbeginc 4096, 0
+ tbeginc 0(%r1,%r2), 0
+ tbeginc 0, -1
+ tbeginc 0, 65536
Index: llvm-head/test/MC/SystemZ/insn-good-zEC12.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-good-zEC12.s
+++ llvm-head/test/MC/SystemZ/insn-good-zEC12.s
@@ -1,6 +1,48 @@
# For zEC12 and above.
# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s
+#CHECK: etnd %r0 # encoding: [0xb2,0xec,0x00,0x00]
+#CHECK: etnd %r15 # encoding: [0xb2,0xec,0x00,0xf0]
+#CHECK: etnd %r7 # encoding: [0xb2,0xec,0x00,0x70]
+
+ etnd %r0
+ etnd %r15
+ etnd %r7
+
+#CHECK: ntstg %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25]
+#CHECK: ntstg %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25]
+#CHECK: ntstg %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25]
+#CHECK: ntstg %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25]
+#CHECK: ntstg %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25]
+#CHECK: ntstg %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25]
+#CHECK: ntstg %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25]
+#CHECK: ntstg %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25]
+
+ ntstg %r0, -524288
+ ntstg %r0, -1
+ ntstg %r0, 0
+ ntstg %r0, 1
+ ntstg %r0, 524287
+ ntstg %r0, 0(%r1)
+ ntstg %r0, 0(%r15)
+ ntstg %r0, 524287(%r1,%r15)
+ ntstg %r0, 524287(%r15,%r1)
+ ntstg %r15, 0
+
+#CHECK: ppa %r0, %r0, 0 # encoding: [0xb2,0xe8,0x00,0x00]
+#CHECK: ppa %r0, %r0, 15 # encoding: [0xb2,0xe8,0xf0,0x00]
+#CHECK: ppa %r0, %r15, 0 # encoding: [0xb2,0xe8,0x00,0x0f]
+#CHECK: ppa %r4, %r6, 7 # encoding: [0xb2,0xe8,0x70,0x46]
+#CHECK: ppa %r15, %r0, 0 # encoding: [0xb2,0xe8,0x00,0xf0]
+
+ ppa %r0, %r0, 0
+ ppa %r0, %r0, 15
+ ppa %r0, %r15, 0
+ ppa %r4, %r6, 7
+ ppa %r15, %r0, 0
+
#CHECK: risbgn %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x59]
#CHECK: risbgn %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59]
#CHECK: risbgn %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x59]
@@ -17,3 +59,68 @@
risbgn %r15,%r0,0,0,0
risbgn %r4,%r5,6,7,8
+#CHECK: tabort 0 # encoding: [0xb2,0xfc,0x00,0x00]
+#CHECK: tabort 0(%r1) # encoding: [0xb2,0xfc,0x10,0x00]
+#CHECK: tabort 0(%r15) # encoding: [0xb2,0xfc,0xf0,0x00]
+#CHECK: tabort 4095 # encoding: [0xb2,0xfc,0x0f,0xff]
+#CHECK: tabort 4095(%r1) # encoding: [0xb2,0xfc,0x1f,0xff]
+#CHECK: tabort 4095(%r15) # encoding: [0xb2,0xfc,0xff,0xff]
+
+ tabort 0
+ tabort 0(%r1)
+ tabort 0(%r15)
+ tabort 4095
+ tabort 4095(%r1)
+ tabort 4095(%r15)
+
+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin 4095, 0 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00]
+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
+#CHECK: tbegin 0, 1 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01]
+#CHECK: tbegin 0, 32767 # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff]
+#CHECK: tbegin 0, 32768 # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00]
+#CHECK: tbegin 0, 65535 # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff]
+#CHECK: tbegin 0(%r1), 42 # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a]
+#CHECK: tbegin 0(%r15), 42 # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a]
+#CHECK: tbegin 4095(%r1), 42 # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a]
+#CHECK: tbegin 4095(%r15), 42 # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a]
+
+ tbegin 0, 0
+ tbegin 4095, 0
+ tbegin 0, 0
+ tbegin 0, 1
+ tbegin 0, 32767
+ tbegin 0, 32768
+ tbegin 0, 65535
+ tbegin 0(%r1), 42
+ tbegin 0(%r15), 42
+ tbegin 4095(%r1), 42
+ tbegin 4095(%r15), 42
+
+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc 4095, 0 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00]
+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
+#CHECK: tbeginc 0, 1 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01]
+#CHECK: tbeginc 0, 32767 # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff]
+#CHECK: tbeginc 0, 32768 # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00]
+#CHECK: tbeginc 0, 65535 # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff]
+#CHECK: tbeginc 0(%r1), 42 # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a]
+#CHECK: tbeginc 0(%r15), 42 # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a]
+#CHECK: tbeginc 4095(%r1), 42 # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a]
+#CHECK: tbeginc 4095(%r15), 42 # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a]
+
+ tbeginc 0, 0
+ tbeginc 4095, 0
+ tbeginc 0, 0
+ tbeginc 0, 1
+ tbeginc 0, 32767
+ tbeginc 0, 32768
+ tbeginc 0, 65535
+ tbeginc 0(%r1), 42
+ tbeginc 0(%r15), 42
+ tbeginc 4095(%r1), 42
+ tbeginc 4095(%r15), 42
+
+#CHECK: tend # encoding: [0xb2,0xf8,0x00,0x00]
+
+ tend
Index: llvm-head/test/MC/SystemZ/insn-bad-z196.s
===================================================================
--- llvm-head.orig/test/MC/SystemZ/insn-bad-z196.s
+++ llvm-head/test/MC/SystemZ/insn-bad-z196.s
@@ -244,6 +244,11 @@
cxlgbr %f0, 16, %r0, 0
cxlgbr %f2, 0, %r0, 0
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: etnd %r7
+
+ etnd %r7
+
#CHECK: error: invalid operand
#CHECK: fidbra %f0, 0, %f0, -1
#CHECK: error: invalid operand
@@ -546,6 +551,16 @@
locr %r0,%r0,-1
locr %r0,%r0,16
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: ntstg %r0, 524287(%r1,%r15)
+
+ ntstg %r0, 524287(%r1,%r15)
+
+#CHECK: error: {{(instruction requires: processor-assist)?}}
+#CHECK: ppa %r4, %r6, 7
+
+ ppa %r4, %r6, 7
+
#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}}
#CHECK: risbgn %r1, %r2, 0, 0, 0
@@ -690,3 +705,24 @@
stocg %r0,-524289,1
stocg %r0,524288,1
stocg %r0,0(%r1,%r2),1
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tabort 4095(%r1)
+
+ tabort 4095(%r1)
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbegin 4095(%r1), 42
+
+ tbegin 4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tbeginc 4095(%r1), 42
+
+ tbeginc 4095(%r1), 42
+
+#CHECK: error: {{(instruction requires: transactional-execution)?}}
+#CHECK: tend
+
+ tend
+
Index: llvm-head/test/MC/Disassembler/SystemZ/insns.txt
===================================================================
--- llvm-head.orig/test/MC/Disassembler/SystemZ/insns.txt
+++ llvm-head/test/MC/Disassembler/SystemZ/insns.txt
@@ -2503,6 +2503,15 @@
# CHECK: ear %r15, %a15
0xb2 0x4f 0x00 0xff
+# CHECK: etnd %r0
+0xb2 0xec 0x00 0x00
+
+# CHECK: etnd %r15
+0xb2 0xec 0x00 0xf0
+
+# CHECK: etnd %r7
+0xb2 0xec 0x00 0x70
+
# CHECK: fidbr %f0, 0, %f0
0xb3 0x5f 0x00 0x00
@@ -6034,6 +6043,36 @@
# CHECK: ny %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x54
+# CHECK: ntstg %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x25
+
+# CHECK: ntstg %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x25
+
+# CHECK: ntstg %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x25
+
+# CHECK: ntstg %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x25
+
+# CHECK: ntstg %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x25
+
+# CHECK: ntstg %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x25
+
+# CHECK: ntstg %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x25
+
# CHECK: oc 0(1), 0
0xd6 0x00 0x00 0x00 0x00 0x00
@@ -6346,6 +6385,21 @@
# CHECK: popcnt %r7, %r8
0xb9 0xe1 0x00 0x78
+# CHECK: ppa %r0, %r0, 0
+0xb2 0xe8 0x00 0x00
+
+# CHECK: ppa %r0, %r0, 15
+0xb2 0xe8 0xf0 0x00
+
+# CHECK: ppa %r0, %r15, 0
+0xb2 0xe8 0x00 0x0f
+
+# CHECK: ppa %r4, %r6, 7
+0xb2 0xe8 0x70 0x46
+
+# CHECK: ppa %r15, %r0, 0
+0xb2 0xe8 0x00 0xf0
+
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55
@@ -8062,6 +8116,93 @@
# CHECK: sy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x5b
+# CHECK: tabort 0
+0xb2 0xfc 0x00 0x00
+
+# CHECK: tabort 0(%r1)
+0xb2 0xfc 0x10 0x00
+
+# CHECK: tabort 0(%r15)
+0xb2 0xfc 0xf0 0x00
+
+# CHECK: tabort 4095
+0xb2 0xfc 0x0f 0xff
+
+# CHECK: tabort 4095(%r1)
+0xb2 0xfc 0x1f 0xff
+
+# CHECK: tabort 4095(%r15)
+0xb2 0xfc 0xff 0xff
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 4095, 0
+0xe5 0x60 0x0f 0xff 0x00 0x00
+
+# CHECK: tbegin 0, 0
+0xe5 0x60 0x00 0x00 0x00 0x00
+
+# CHECK: tbegin 0, 1
+0xe5 0x60 0x00 0x00 0x00 0x01
+
+# CHECK: tbegin 0, 32767
+0xe5 0x60 0x00 0x00 0x7f 0xff
+
+# CHECK: tbegin 0, 32768
+0xe5 0x60 0x00 0x00 0x80 0x00
+
+# CHECK: tbegin 0, 65535
+0xe5 0x60 0x00 0x00 0xff 0xff
+
+# CHECK: tbegin 0(%r1), 42
+0xe5 0x60 0x10 0x00 0x00 0x2a
+
+# CHECK: tbegin 0(%r15), 42
+0xe5 0x60 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbegin 4095(%r1), 42
+0xe5 0x60 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbegin 4095(%r15), 42
+0xe5 0x60 0xff 0xff 0x00 0x2a
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 4095, 0
+0xe5 0x61 0x0f 0xff 0x00 0x00
+
+# CHECK: tbeginc 0, 0
+0xe5 0x61 0x00 0x00 0x00 0x00
+
+# CHECK: tbeginc 0, 1
+0xe5 0x61 0x00 0x00 0x00 0x01
+
+# CHECK: tbeginc 0, 32767
+0xe5 0x61 0x00 0x00 0x7f 0xff
+
+# CHECK: tbeginc 0, 32768
+0xe5 0x61 0x00 0x00 0x80 0x00
+
+# CHECK: tbeginc 0, 65535
+0xe5 0x61 0x00 0x00 0xff 0xff
+
+# CHECK: tbeginc 0(%r1), 42
+0xe5 0x61 0x10 0x00 0x00 0x2a
+
+# CHECK: tbeginc 0(%r15), 42
+0xe5 0x61 0xf0 0x00 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r1), 42
+0xe5 0x61 0x1f 0xff 0x00 0x2a
+
+# CHECK: tbeginc 4095(%r15), 42
+0xe5 0x61 0xff 0xff 0x00 0x2a
+
+# CHECK: tend
+0xb2 0xf8 0x00 0x00
+
# CHECK: tm 0, 0
0x91 0x00 0x00 0x00
llvm-svn: 233803
2015-04-01 20:51:43 +08:00
|
|
|
def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayStore,
|
|
|
|
SDNPSideEffect]>;
|
|
|
|
def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPMayStore,
|
|
|
|
SDNPSideEffect]>;
|
|
|
|
def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
|
|
|
|
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
|
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>;
|
|
|
|
def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>;
|
|
|
|
def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Pattern fragments
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-05-17 04:32:22 +08:00
|
|
|
def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
|
|
|
|
def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
|
|
|
|
def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
|
|
|
|
|
|
|
|
def z_strvh : PatFrag<(ops node:$src, node:$addr),
|
|
|
|
(z_storebswap node:$src, node:$addr, i16)>;
|
|
|
|
def z_strv : PatFrag<(ops node:$src, node:$addr),
|
|
|
|
(z_storebswap node:$src, node:$addr, i32)>;
|
|
|
|
def z_strvg : PatFrag<(ops node:$src, node:$addr),
|
|
|
|
(z_storebswap node:$src, node:$addr, i64)>;
|
|
|
|
|
2013-09-06 19:51:39 +08:00
|
|
|
// Signed and unsigned comparisons.
|
|
|
|
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
|
|
|
|
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
|
|
|
|
return Type != SystemZICMP::UnsignedOnly;
|
|
|
|
}]>;
|
|
|
|
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
|
|
|
|
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
|
|
|
|
return Type != SystemZICMP::SignedOnly;
|
|
|
|
}]>;
|
|
|
|
|
2013-09-10 18:20:32 +08:00
|
|
|
// Register- and memory-based TEST UNDER MASK.
|
|
|
|
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
|
|
|
|
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
|
|
|
|
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
|
|
|
|
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
|
|
|
|
def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
|
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
// Match extensions of an i32 to an i64, followed by an in-register sign
|
|
|
|
// extension from a sub-i32 value.
|
|
|
|
def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
|
|
|
|
def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Register zero-extend operations. Sub-32-bit values are represented as i32s.
|
|
|
|
def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
|
|
|
|
def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
|
|
|
|
def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
|
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
// Match extensions of an i32 to an i64, followed by an AND of the low
|
|
|
|
// i8 or i16 part.
|
|
|
|
def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
|
|
|
|
def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Typed floating-point loads.
|
|
|
|
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
|
|
|
|
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
|
|
|
|
|
2013-09-16 17:03:10 +08:00
|
|
|
// Extending loads in which the extension type can be signed.
|
|
|
|
def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
|
|
|
|
unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
|
|
|
|
return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
|
|
|
|
}]>;
|
|
|
|
def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
|
|
|
|
}]>;
|
|
|
|
def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
// Extending loads in which the extension type can be unsigned.
|
|
|
|
def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
|
|
|
|
unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
|
|
|
|
return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
|
|
|
|
}]>;
|
|
|
|
def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
|
|
|
|
}]>;
|
|
|
|
def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
|
|
|
}]>;
|
|
|
|
|
2013-06-27 17:27:40 +08:00
|
|
|
// Extending loads in which the extension type doesn't matter.
|
|
|
|
def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
|
|
|
|
}]>;
|
|
|
|
def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
|
|
|
|
}]>;
|
|
|
|
def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
|
|
|
|
}]>;
|
|
|
|
def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
|
|
|
}]>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Aligned loads.
|
|
|
|
class AlignedLoad<SDPatternOperator load>
|
|
|
|
: PatFrag<(ops node:$addr), (load node:$addr), [{
|
2014-03-06 19:22:58 +08:00
|
|
|
auto *Load = cast<LoadSDNode>(N);
|
2013-05-07 00:15:19 +08:00
|
|
|
return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
|
|
|
|
}]>;
|
2013-09-16 17:03:10 +08:00
|
|
|
def aligned_load : AlignedLoad<load>;
|
|
|
|
def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
|
|
|
|
def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
|
|
|
|
def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
|
|
|
|
def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
|
2013-05-07 00:15:19 +08:00
|
|
|
|
|
|
|
// Aligned stores.
|
|
|
|
class AlignedStore<SDPatternOperator store>
|
|
|
|
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
|
2014-03-06 19:22:58 +08:00
|
|
|
auto *Store = cast<StoreSDNode>(N);
|
2013-05-07 00:15:19 +08:00
|
|
|
return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
|
|
|
|
}]>;
|
|
|
|
def aligned_store : AlignedStore<store>;
|
|
|
|
def aligned_truncstorei16 : AlignedStore<truncstorei16>;
|
|
|
|
def aligned_truncstorei32 : AlignedStore<truncstorei32>;
|
|
|
|
|
2013-05-31 21:25:22 +08:00
|
|
|
// Non-volatile loads. Used for instructions that might access the storage
|
|
|
|
// location multiple times.
|
|
|
|
class NonvolatileLoad<SDPatternOperator load>
|
|
|
|
: PatFrag<(ops node:$addr), (load node:$addr), [{
|
2014-03-06 19:22:58 +08:00
|
|
|
auto *Load = cast<LoadSDNode>(N);
|
2013-05-31 21:25:22 +08:00
|
|
|
return !Load->isVolatile();
|
|
|
|
}]>;
|
2013-06-27 17:27:40 +08:00
|
|
|
def nonvolatile_load : NonvolatileLoad<load>;
|
|
|
|
def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
|
|
|
|
def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
|
|
|
|
def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
|
2013-05-31 21:25:22 +08:00
|
|
|
|
|
|
|
// Non-volatile stores.
|
|
|
|
class NonvolatileStore<SDPatternOperator store>
|
|
|
|
: PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
|
2014-03-06 19:22:58 +08:00
|
|
|
auto *Store = cast<StoreSDNode>(N);
|
2013-05-31 21:25:22 +08:00
|
|
|
return !Store->isVolatile();
|
|
|
|
}]>;
|
2013-06-27 17:27:40 +08:00
|
|
|
def nonvolatile_store : NonvolatileStore<store>;
|
|
|
|
def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
|
|
|
|
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
|
|
|
|
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
|
2013-05-31 21:25:22 +08:00
|
|
|
|
2013-09-05 18:36:45 +08:00
|
|
|
// A store of a load that can be implemented using MVC.
|
|
|
|
def mvc_store : PatFrag<(ops node:$value, node:$addr),
|
|
|
|
(unindexedstore node:$value, node:$addr),
|
|
|
|
[{ return storeLoadCanUseMVC(N); }]>;
|
|
|
|
|
|
|
|
// Binary read-modify-write operations on memory in which the other
|
|
|
|
// operand is also memory and for which block operations like NC can
|
|
|
|
// be used. There are two patterns for each operator, depending on
|
|
|
|
// which operand contains the "other" load.
|
|
|
|
multiclass block_op<SDPatternOperator operator> {
|
|
|
|
def "1" : PatFrag<(ops node:$value, node:$addr),
|
|
|
|
(unindexedstore (operator node:$value,
|
|
|
|
(unindexedload node:$addr)),
|
|
|
|
node:$addr),
|
|
|
|
[{ return storeLoadCanUseBlockBinary(N, 0); }]>;
|
|
|
|
def "2" : PatFrag<(ops node:$value, node:$addr),
|
|
|
|
(unindexedstore (operator (unindexedload node:$addr),
|
|
|
|
node:$value),
|
|
|
|
node:$addr),
|
|
|
|
[{ return storeLoadCanUseBlockBinary(N, 1); }]>;
|
|
|
|
}
|
|
|
|
defm block_and : block_op<and>;
|
|
|
|
defm block_or : block_op<or>;
|
|
|
|
defm block_xor : block_op<xor>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Insertions.
|
|
|
|
def inserti8 : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, -256), node:$src2)>;
|
|
|
|
def insertll : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0xffffffffffff0000), node:$src2)>;
|
|
|
|
def insertlh : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0xffffffff0000ffff), node:$src2)>;
|
|
|
|
def inserthl : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0xffff0000ffffffff), node:$src2)>;
|
|
|
|
def inserthh : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0x0000ffffffffffff), node:$src2)>;
|
|
|
|
def insertlf : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0xffffffff00000000), node:$src2)>;
|
|
|
|
def inserthf : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or (and node:$src1, 0x00000000ffffffff), node:$src2)>;
|
|
|
|
|
|
|
|
// ORs that can be treated as insertions.
|
|
|
|
def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or node:$src1, node:$src2), [{
|
|
|
|
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
|
|
|
|
return CurDAG->MaskedValueIsZero(N->getOperand(0),
|
|
|
|
APInt::getLowBitsSet(BitWidth, 8));
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
// ORs that can be treated as reversed insertions.
|
|
|
|
def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
|
|
|
|
(or node:$src1, node:$src2), [{
|
|
|
|
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
|
|
|
|
return CurDAG->MaskedValueIsZero(N->getOperand(1),
|
|
|
|
APInt::getLowBitsSet(BitWidth, 8));
|
|
|
|
}]>;
|
|
|
|
|
2013-12-13 23:35:00 +08:00
|
|
|
// Negative integer absolute.
|
|
|
|
def z_inegabs : PatFrag<(ops node:$src), (ineg (z_iabs node:$src))>;
|
|
|
|
|
2013-08-19 20:48:54 +08:00
|
|
|
// Integer absolute, matching the canonical form generated by DAGCombiner.
|
|
|
|
def z_iabs32 : PatFrag<(ops node:$src),
|
|
|
|
(xor (add node:$src, (sra node:$src, (i32 31))),
|
|
|
|
(sra node:$src, (i32 31)))>;
|
|
|
|
def z_iabs64 : PatFrag<(ops node:$src),
|
|
|
|
(xor (add node:$src, (sra node:$src, (i32 63))),
|
|
|
|
(sra node:$src, (i32 63)))>;
|
2013-08-19 20:56:58 +08:00
|
|
|
def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
|
|
|
|
def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
|
2013-08-19 20:48:54 +08:00
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
// Integer multiply-and-add
|
|
|
|
def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
|
|
|
(add (mul node:$src1, node:$src2), node:$src3)>;
|
|
|
|
|
2015-05-06 03:26:48 +08:00
|
|
|
// Fused multiply-subtract, using the natural operand order.
|
|
|
|
def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
|
|
|
(fma node:$src1, node:$src2, (fneg node:$src3))>;
|
|
|
|
|
2013-05-07 00:15:19 +08:00
|
|
|
// Fused multiply-add and multiply-subtract, but with the order of the
|
|
|
|
// operands matching SystemZ's MA and MS instructions.
|
|
|
|
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
|
|
|
(fma node:$src2, node:$src3, node:$src1)>;
|
|
|
|
def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
|
|
|
(fma node:$src2, node:$src3, (fneg node:$src1))>;
|
|
|
|
|
|
|
|
// Floating-point negative absolute.
|
|
|
|
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
|
|
|
|
|
|
|
|
// Create a unary operator that loads from memory and then performs
|
|
|
|
// the given operation on it.
|
2013-05-31 21:25:22 +08:00
|
|
|
class loadu<SDPatternOperator operator, SDPatternOperator load = load>
|
2013-05-07 00:15:19 +08:00
|
|
|
: PatFrag<(ops node:$addr), (operator (load node:$addr))>;
|
|
|
|
|
|
|
|
// Create a store operator that performs the given unary operation
|
|
|
|
// on the value before storing it.
|
2013-05-31 21:25:22 +08:00
|
|
|
class storeu<SDPatternOperator operator, SDPatternOperator store = store>
|
2013-05-07 00:15:19 +08:00
|
|
|
: PatFrag<(ops node:$value, node:$addr),
|
|
|
|
(store (operator node:$value), node:$addr)>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
|
|
|
// Vector representation of all-zeros and all-ones.
|
|
|
|
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
|
|
|
|
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
|
|
|
|
|
|
|
|
// Load a scalar and replicate it in all elements of a vector.
|
|
|
|
class z_replicate_load<ValueType scalartype, SDPatternOperator load>
|
|
|
|
: PatFrag<(ops node:$addr),
|
|
|
|
(z_replicate (scalartype (load node:$addr)))>;
|
|
|
|
def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
|
|
|
|
def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
|
|
|
|
def z_replicate_loadi32 : z_replicate_load<i32, load>;
|
|
|
|
def z_replicate_loadi64 : z_replicate_load<i64, load>;
|
2015-05-06 03:27:45 +08:00
|
|
|
def z_replicate_loadf32 : z_replicate_load<f32, load>;
|
2015-05-06 03:26:48 +08:00
|
|
|
def z_replicate_loadf64 : z_replicate_load<f64, load>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
|
|
|
// Load a scalar and insert it into a single element of a vector.
|
|
|
|
class z_vle<ValueType scalartype, SDPatternOperator load>
|
|
|
|
: PatFrag<(ops node:$vec, node:$addr, node:$index),
|
|
|
|
(z_vector_insert node:$vec, (scalartype (load node:$addr)),
|
|
|
|
node:$index)>;
|
|
|
|
def z_vlei8 : z_vle<i32, anyextloadi8>;
|
|
|
|
def z_vlei16 : z_vle<i32, anyextloadi16>;
|
|
|
|
def z_vlei32 : z_vle<i32, load>;
|
|
|
|
def z_vlei64 : z_vle<i64, load>;
|
2015-05-06 03:27:45 +08:00
|
|
|
def z_vlef32 : z_vle<f32, load>;
|
2015-05-06 03:26:48 +08:00
|
|
|
def z_vlef64 : z_vle<f64, load>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
|
|
|
// Load a scalar and insert it into the low element of the high i64 of a
|
|
|
|
// zeroed vector.
|
|
|
|
class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
|
|
|
|
: PatFrag<(ops node:$addr),
|
|
|
|
(z_vector_insert (z_vzero),
|
|
|
|
(scalartype (load node:$addr)), (i32 index))>;
|
|
|
|
def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
|
|
|
|
def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
|
|
|
|
def z_vllezi32 : z_vllez<i32, load, 1>;
|
|
|
|
def z_vllezi64 : PatFrag<(ops node:$addr),
|
|
|
|
(z_join_dwords (i64 (load node:$addr)), (i64 0))>;
|
2015-05-06 03:27:45 +08:00
|
|
|
// We use high merges to form a v4f32 from four f32s. Propagating zero
|
|
|
|
// into all elements but index 1 gives this expression.
|
|
|
|
def z_vllezf32 : PatFrag<(ops node:$addr),
|
|
|
|
(bitconvert
|
|
|
|
(z_merge_high
|
2015-05-06 03:29:21 +08:00
|
|
|
(v2i64
|
|
|
|
(z_unpackl_high
|
|
|
|
(v4i32
|
|
|
|
(bitconvert
|
|
|
|
(v4f32 (scalar_to_vector
|
|
|
|
(f32 (load node:$addr)))))))),
|
2015-05-06 03:27:45 +08:00
|
|
|
(v2i64 (z_vzero))))>;
|
2015-05-06 03:26:48 +08:00
|
|
|
def z_vllezf64 : PatFrag<(ops node:$addr),
|
|
|
|
(z_merge_high
|
|
|
|
(scalar_to_vector (f64 (load node:$addr))),
|
|
|
|
(z_vzero))>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
|
|
|
// Store one element of a vector.
|
|
|
|
class z_vste<ValueType scalartype, SDPatternOperator store>
|
|
|
|
: PatFrag<(ops node:$vec, node:$addr, node:$index),
|
|
|
|
(store (scalartype (z_vector_extract node:$vec, node:$index)),
|
|
|
|
node:$addr)>;
|
|
|
|
def z_vstei8 : z_vste<i32, truncstorei8>;
|
|
|
|
def z_vstei16 : z_vste<i32, truncstorei16>;
|
|
|
|
def z_vstei32 : z_vste<i32, store>;
|
|
|
|
def z_vstei64 : z_vste<i64, store>;
|
2015-05-06 03:27:45 +08:00
|
|
|
def z_vstef32 : z_vste<f32, store>;
|
2015-05-06 03:26:48 +08:00
|
|
|
def z_vstef64 : z_vste<f64, store>;
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
|
|
|
|
// Arithmetic negation on vectors.
|
|
|
|
def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
|
|
|
|
|
|
|
|
// Bitwise negation on vectors.
|
|
|
|
def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
|
|
|
|
|
|
|
|
// Signed "integer greater than zero" on vectors.
|
|
|
|
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
|
|
|
|
|
|
|
|
// Signed "integer less than zero" on vectors.
|
|
|
|
def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
|
|
|
|
|
|
|
|
// Integer absolute on vectors.
|
|
|
|
class z_viabs<int shift>
|
|
|
|
: PatFrag<(ops node:$src),
|
|
|
|
(xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
|
|
|
|
(z_vsra_by_scalar node:$src, (i32 shift)))>;
|
|
|
|
def z_viabs8 : z_viabs<7>;
|
|
|
|
def z_viabs16 : z_viabs<15>;
|
|
|
|
def z_viabs32 : z_viabs<31>;
|
|
|
|
def z_viabs64 : z_viabs<63>;
|
|
|
|
|
|
|
|
// Sign-extend the i64 elements of a vector.
|
|
|
|
class z_vse<int shift>
|
|
|
|
: PatFrag<(ops node:$src),
|
|
|
|
(z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
|
|
|
|
def z_vsei8 : z_vse<56>;
|
|
|
|
def z_vsei16 : z_vse<48>;
|
|
|
|
def z_vsei32 : z_vse<32>;
|
|
|
|
|
|
|
|
// ...and again with the extensions being done on individual i64 scalars.
|
|
|
|
class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
|
|
|
|
: PatFrag<(ops node:$src),
|
|
|
|
(z_join_dwords
|
|
|
|
(operator (z_vector_extract node:$src, index1)),
|
|
|
|
(operator (z_vector_extract node:$src, index2)))>;
|
|
|
|
def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>;
|
|
|
|
def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
|
|
|
|
def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;
|