forked from OSchip/llvm-project
[ARM] Improve codegen of volatile load/store of i64
Summary: Instead of generating two i32 instructions for each load or store of a volatile i64 value (two LDRs or STRs), now emit LDRD/STRD. These improvements cover architectures implementing ARMv5TE or Thumb-2. The code generation explicitly deviates from using the register-offset variant of LDRD/STRD. In this variant, the register allocated to the register-offset cannot be reused in any of the remaining operands. Such restriction seems to be non-trivial to implement in LLVM, thus it is left as a to-do. Differential Revision: https://reviews.llvm.org/D70072
This commit is contained in:
parent
23ac16cf9b
commit
c010d4d195
|
@ -2735,6 +2735,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case ARM::LOADDUAL:
|
||||
case ARM::STOREDUAL: {
|
||||
Register PairReg = MI.getOperand(0).getReg();
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
||||
TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
|
||||
.addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
|
||||
Opcode == ARM::LOADDUAL ? RegState::Define : 0)
|
||||
.addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
|
||||
Opcode == ARM::LOADDUAL ? RegState::Define : 0);
|
||||
for (unsigned i = 1; i < MI.getNumOperands(); i++)
|
||||
MIB.add(MI.getOperand(i));
|
||||
MIB.add(predOps(ARMCC::AL));
|
||||
MIB.cloneMemRefs(MI);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -145,6 +145,8 @@ public:
|
|||
|
||||
// Thumb 2 Addressing Modes:
|
||||
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
template <unsigned Shift>
|
||||
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
|
||||
SDValue &OffImm);
|
||||
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
|
||||
|
@ -1312,6 +1314,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
|
|||
return true;
|
||||
}
|
||||
|
||||
template <unsigned Shift>
|
||||
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
|
||||
SDValue &OffImm) {
|
||||
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
|
||||
int RHSC;
|
||||
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
|
||||
Base = N.getOperand(0);
|
||||
if (Base.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
Base = CurDAG->getTargetFrameIndex(
|
||||
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
|
||||
}
|
||||
|
||||
if (N.getOpcode() == ISD::SUB)
|
||||
RHSC = -RHSC;
|
||||
OffImm =
|
||||
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Base only.
|
||||
Base = N;
|
||||
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
|
||||
SDValue &Base, SDValue &OffImm) {
|
||||
// Match simple R - imm8 operands.
|
||||
|
@ -3655,6 +3684,59 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
CurDAG->RemoveDeadNode(N);
|
||||
return;
|
||||
}
|
||||
case ARMISD::LDRD: {
|
||||
if (Subtarget->isThumb2())
|
||||
break; // TableGen handles isel in this case.
|
||||
SDValue Base, RegOffset, ImmOffset;
|
||||
const SDValue &Chain = N->getOperand(0);
|
||||
const SDValue &Addr = N->getOperand(1);
|
||||
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
|
||||
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
|
||||
// The register-offset variant of LDRD mandates that the register
|
||||
// allocated to RegOffset is not reused in any of the remaining operands.
|
||||
// This restriction is currently not enforced. Therefore emitting this
|
||||
// variant is explicitly avoided.
|
||||
Base = Addr;
|
||||
RegOffset = CurDAG->getRegister(0, MVT::i32);
|
||||
}
|
||||
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
|
||||
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
|
||||
{MVT::Untyped, MVT::Other}, Ops);
|
||||
SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
|
||||
SDValue(New, 0));
|
||||
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
|
||||
SDValue(New, 0));
|
||||
transferMemOperands(N, New);
|
||||
ReplaceUses(SDValue(N, 0), Lo);
|
||||
ReplaceUses(SDValue(N, 1), Hi);
|
||||
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
|
||||
CurDAG->RemoveDeadNode(N);
|
||||
return;
|
||||
}
|
||||
case ARMISD::STRD: {
|
||||
if (Subtarget->isThumb2())
|
||||
break; // TableGen handles isel in this case.
|
||||
SDValue Base, RegOffset, ImmOffset;
|
||||
const SDValue &Chain = N->getOperand(0);
|
||||
const SDValue &Addr = N->getOperand(3);
|
||||
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
|
||||
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
|
||||
// The register-offset variant of STRD mandates that the register
|
||||
// allocated to RegOffset is not reused in any of the remaining operands.
|
||||
// This restriction is currently not enforced. Therefore emitting this
|
||||
// variant is explicitly avoided.
|
||||
Base = Addr;
|
||||
RegOffset = CurDAG->getRegister(0, MVT::i32);
|
||||
}
|
||||
SDNode *RegPair =
|
||||
createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
|
||||
SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
|
||||
SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
|
||||
transferMemOperands(N, New);
|
||||
ReplaceUses(SDValue(N, 0), SDValue(New, 0));
|
||||
CurDAG->RemoveDeadNode(N);
|
||||
return;
|
||||
}
|
||||
case ARMISD::LOOP_DEC: {
|
||||
SDValue Ops[] = { N->getOperand(1),
|
||||
N->getOperand(2),
|
||||
|
|
|
@ -1082,6 +1082,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SRA, MVT::i64, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i64, Custom);
|
||||
|
||||
// MVE lowers 64 bit shifts to lsll and lsrl
|
||||
// assuming that ISD::SRL and SRA of i64 are already marked custom
|
||||
|
@ -1624,6 +1626,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
|
||||
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
|
||||
|
||||
case ARMISD::LDRD: return "ARMISD::LDRD";
|
||||
case ARMISD::STRD: return "ARMISD::STRD";
|
||||
|
||||
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
|
||||
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
|
||||
|
||||
|
@ -9151,6 +9156,25 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
|
|||
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
|
||||
}
|
||||
|
||||
void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
EVT MemVT = LD->getMemoryVT();
|
||||
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
|
||||
|
||||
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
|
||||
!Subtarget->isThumb1Only() && LD->isVolatile()) {
|
||||
SDLoc dl(N);
|
||||
SDValue Result = DAG.getMemIntrinsicNode(
|
||||
ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
|
||||
{LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
|
||||
SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
|
||||
SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
|
||||
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
|
||||
Results.append({Pair, Result.getValue(2)});
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
|
||||
EVT MemVT = ST->getMemoryVT();
|
||||
|
@ -9180,6 +9204,38 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
|
|||
ST->getMemOperand());
|
||||
}
|
||||
|
||||
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
|
||||
EVT MemVT = ST->getMemoryVT();
|
||||
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
|
||||
|
||||
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
|
||||
!Subtarget->isThumb1Only() && ST->isVolatile()) {
|
||||
SDNode *N = Op.getNode();
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue Lo = DAG.getNode(
|
||||
ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
|
||||
DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
|
||||
MVT::i32));
|
||||
SDValue Hi = DAG.getNode(
|
||||
ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
|
||||
DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
|
||||
MVT::i32));
|
||||
|
||||
return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
|
||||
{ST->getChain(), Lo, Hi, ST->getBasePtr()},
|
||||
MemVT, ST->getMemOperand());
|
||||
} else if (Subtarget->hasMVEIntegerOps() &&
|
||||
((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
|
||||
MemVT == MVT::v16i1))) {
|
||||
return LowerPredicateStore(Op, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static bool isZeroVector(SDValue N) {
|
||||
return (ISD::isBuildVectorAllZeros(N.getNode()) ||
|
||||
(N->getOpcode() == ARMISD::VMOVIMM &&
|
||||
|
@ -9414,7 +9470,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::LOAD:
|
||||
return LowerPredicateLoad(Op, DAG);
|
||||
case ISD::STORE:
|
||||
return LowerPredicateStore(Op, DAG);
|
||||
return LowerSTORE(Op, DAG, Subtarget);
|
||||
case ISD::MLOAD:
|
||||
return LowerMLOAD(Op, DAG);
|
||||
case ISD::ATOMIC_LOAD:
|
||||
|
@ -9518,7 +9574,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
case ISD::ABS:
|
||||
lowerABS(N, Results, DAG);
|
||||
return ;
|
||||
|
||||
case ISD::LOAD:
|
||||
LowerLOAD(N, Results, DAG);
|
||||
break;
|
||||
}
|
||||
if (Res.getNode())
|
||||
Results.push_back(Res);
|
||||
|
|
|
@ -305,7 +305,11 @@ class VectorType;
|
|||
VST4_UPD,
|
||||
VST2LN_UPD,
|
||||
VST3LN_UPD,
|
||||
VST4LN_UPD
|
||||
VST4LN_UPD,
|
||||
|
||||
// Load/Store of dual registers
|
||||
LDRD,
|
||||
STRD
|
||||
};
|
||||
|
||||
} // end namespace ARMISD
|
||||
|
@ -771,6 +775,8 @@ class VectorType;
|
|||
SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
Register getRegisterByName(const char* RegName, LLT VT,
|
||||
const MachineFunction &MF) const override;
|
||||
|
|
|
@ -245,6 +245,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
|
|||
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
|
||||
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
|
||||
|
||||
def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
|
||||
def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
|
||||
def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
// Vector operations shared between NEON and MVE
|
||||
|
||||
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
|
||||
|
@ -2736,6 +2742,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
|
|||
Requires<[IsARM, HasV5TE]>;
|
||||
}
|
||||
|
||||
let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
|
||||
def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
|
||||
64, IIC_iLoad_d_r, []>,
|
||||
Requires<[IsARM, HasV5TE]> {
|
||||
let AM = AddrMode3;
|
||||
}
|
||||
}
|
||||
|
||||
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
|
||||
NoItinerary, "lda", "\t$Rt, $addr", []>;
|
||||
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
|
||||
|
@ -3014,6 +3028,14 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
|
|||
}
|
||||
}
|
||||
|
||||
let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
|
||||
def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
|
||||
64, IIC_iStore_d_r, []>,
|
||||
Requires<[IsARM, HasV5TE]> {
|
||||
let AM = AddrMode3;
|
||||
}
|
||||
}
|
||||
|
||||
// Indexed stores
|
||||
multiclass AI2_stridx<bit isByte, string opc,
|
||||
InstrItinClass iii, InstrItinClass iir> {
|
||||
|
|
|
@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
|
|||
|
||||
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
|
||||
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
|
||||
class T2AddrMode_Imm8s4 : MemOperand {
|
||||
class T2AddrMode_Imm8s4 : MemOperand,
|
||||
ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
|
||||
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
|
||||
let DecoderMethod = "DecodeT2AddrModeImm8s4";
|
||||
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
|
||||
|
@ -1448,7 +1449,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
|
|||
// Load doubleword
|
||||
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
|
||||
(ins t2addrmode_imm8s4:$addr),
|
||||
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
|
||||
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
|
||||
[(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
|
||||
Sched<[WriteLd]>;
|
||||
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
|
||||
|
||||
|
@ -1629,7 +1631,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
|
|||
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
|
||||
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
|
||||
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
|
||||
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
|
||||
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
|
||||
[(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
|
||||
Sched<[WriteST]>;
|
||||
|
||||
// Indexed stores
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE,CHECK
|
||||
; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2,CHECK
|
||||
; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T,CHECK
|
||||
|
||||
@x = common dso_local global i64 0, align 8
|
||||
@y = common dso_local global i64 0, align 8
|
||||
|
||||
define void @test() {
|
||||
entry:
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
|
||||
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
|
||||
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
|
||||
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #4]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
|
||||
%0 = load volatile i64, i64* @x, align 8
|
||||
store volatile i64 %0, i64* @y, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_offset() {
|
||||
entry:
|
||||
; CHECK-LABEL: test_offset:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
|
||||
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
|
||||
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
|
||||
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
|
||||
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #-4]
|
||||
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 -4) to i64*), align 8
|
||||
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 -4) to i64*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_offset_1() {
|
||||
; CHECK-LABEL: test_offset_1:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #255]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #255]
|
||||
; CHECK-T2: adds [[ADDR0:r[0-9]+]], #255
|
||||
; CHECK-T2-NEXT: adds [[ADDR1:r[0-9]+]], #255
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #255]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #259]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #259]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #255]
|
||||
entry:
|
||||
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 255) to i64*), align 8
|
||||
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 255) to i64*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_offset_2() {
|
||||
; CHECK-LABEL: test_offset_2:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #256
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #256
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
|
||||
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
|
||||
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
|
||||
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #256]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #256]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #256]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #260]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #260]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #256]
|
||||
entry:
|
||||
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 256) to i64*), align 8
|
||||
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 256) to i64*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_offset_3() {
|
||||
; CHECK-LABEL: test_offset_3:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1020
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1020
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
|
||||
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
|
||||
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
|
||||
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #1020]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1024]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1020]
|
||||
entry:
|
||||
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1020) to i64*), align 8
|
||||
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1020) to i64*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_offset_4() {
|
||||
; CHECK-LABEL: test_offset_4:
|
||||
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1024
|
||||
; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1024
|
||||
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y
|
||||
; CHECK-T2-NEXT: movw [[ADDR0:r[0-9]+]], :lower16:x
|
||||
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
|
||||
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
|
||||
; CHECK-T2-NEXT: add.w [[ADDR0]], [[ADDR0]], #1024
|
||||
; CHECK-T2-NEXT: add.w [[ADDR1]], [[ADDR1]], #1024
|
||||
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
|
||||
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
|
||||
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
|
||||
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1028]
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1028]
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1024]
|
||||
entry:
|
||||
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1024) to i64*), align 8
|
||||
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1024) to i64*), align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define i64 @test_stack() {
|
||||
; CHECK-LABEL: test_stack:
|
||||
; CHECK-ARMV5TE: sub sp, sp, #80
|
||||
; CHECK-ARMV5TE-NEXT: mov [[R0:r[0-9]+]], #0
|
||||
; CHECK-ARMV5TE-NEXT: mov [[R1:r[0-9]+]], #1
|
||||
; CHECK-ARMV5TE-NEXT: strd [[R1]], [[R0]], [sp, #8]
|
||||
; CHECK-ARMV5TE-NEXT: ldrd r0, r1, [sp, #8]
|
||||
; CHECK-ARMV5TE-NEXT: add sp, sp, #80
|
||||
; CHECK-ARMV5TE-NEXT: bx lr
|
||||
; CHECK-T2: sub sp, #80
|
||||
; CHECK-T2-NEXT: movs [[R0:r[0-9]+]], #0
|
||||
; CHECK-T2-NEXT: movs [[R1:r[0-9]+]], #1
|
||||
; CHECK-T2-NEXT: strd [[R1]], [[R0]], [sp, #8]
|
||||
; CHECK-T2-NEXT: ldrd r0, r1, [sp, #8]
|
||||
; CHECK-T2-NEXT: add sp, #80
|
||||
; CHECK-T2-NEXT: bx lr
|
||||
; CHECK-ARMV4T: sub sp, sp, #80
|
||||
; CHECK-ARMV4T-NEXT: mov [[R0:r[0-9]+]], #0
|
||||
; CHECK-ARMV4T-NEXT: str [[R0]], [sp, #12]
|
||||
; CHECK-ARMV4T-NEXT: mov [[R1:r[0-9]+]], #1
|
||||
; CHECK-ARMV4T-NEXT: str [[R1]], [sp, #8]
|
||||
; CHECK-ARMV4T-NEXT: ldr r0, [sp, #8]
|
||||
; CHECK-ARMV4T-NEXT: ldr r1, [sp, #12]
|
||||
; CHECK-ARMV4T-NEXT: add sp, sp, #80
|
||||
; CHECK-ARMV4T-NEXT: bx lr
|
||||
entry:
|
||||
%a = alloca [10 x i64], align 8
|
||||
%arrayidx = getelementptr inbounds [10 x i64], [10 x i64]* %a, i32 0, i32 1
|
||||
store volatile i64 1, i64* %arrayidx, align 8
|
||||
%arrayidx1 = getelementptr inbounds [10 x i64], [10 x i64]* %a, i32 0, i32 1
|
||||
%0 = load volatile i64, i64* %arrayidx1, align 8
|
||||
ret i64 %0
|
||||
}
|
||||
|
Loading…
Reference in New Issue