[ARM] Improve codegen of volatile load/store of i64

Summary:
Instead of generating two i32 instructions for each load or store of a volatile
i64 value (two LDRs or STRs), now emit LDRD/STRD.

These improvements cover architectures implementing ARMv5TE or Thumb-2.

Reviewers: dmgreen, efriedma, john.brawn

Reviewed By: efriedma

Subscribers: kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70072
This commit is contained in:
Victor Campos 2019-12-16 14:22:15 +00:00
parent eca0c97a6b
commit bbcf1c3496
7 changed files with 311 additions and 6 deletions

View File

@ -1952,6 +1952,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent(); MI.eraseFromParent();
return true; return true;
} }
case ARM::LOADDUAL:
case ARM::STOREDUAL: {
Register PairReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
.addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
Opcode == ARM::LOADDUAL ? RegState::Define : 0)
.addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
Opcode == ARM::LOADDUAL ? RegState::Define : 0);
for (unsigned i = 1; i < MI.getNumOperands(); i++)
MIB.add(MI.getOperand(i));
MIB.add(predOps(ARMCC::AL));
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
} }
} }

View File

@ -145,6 +145,8 @@ public:
// Thumb 2 Addressing Modes: // Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
template <unsigned Shift>
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm); SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@ -1294,6 +1296,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
return true; return true;
} }
template <unsigned Shift>
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm) {
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
OffImm =
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
return true;
}
}
// Base only.
Base = N;
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
return true;
}
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) { SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands. // Match simple R - imm8 operands.
@ -3486,6 +3515,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N); CurDAG->RemoveDeadNode(N);
return; return;
} }
case ARMISD::LDRD: {
if (Subtarget->isThumb2())
break; // TableGen handles isel in this case.
SDValue Base, RegOffset, ImmOffset;
const SDValue &Chain = N->getOperand(0);
const SDValue &Addr = N->getOperand(1);
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
{MVT::Untyped, MVT::Other}, Ops);
SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
SDValue(New, 0));
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
SDValue(New, 0));
ReplaceUses(SDValue(N, 0), Lo);
ReplaceUses(SDValue(N, 1), Hi);
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
CurDAG->RemoveDeadNode(N);
return;
}
case ARMISD::LOOP_DEC: { case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1), SDValue Ops[] = { N->getOperand(1),
N->getOperand(2), N->getOperand(2),

View File

@ -1070,6 +1070,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl // MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom // assuming that ISD::SRL and SRA of i64 are already marked custom
@ -1593,6 +1595,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::LDRD: return "ARMISD::LDRD";
case ARMISD::STRD: return "ARMISD::STRD";
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
@ -9081,6 +9086,24 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
} }
void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
!Subtarget->isThumb1Only() && LD->isVolatile()) {
SDLoc dl(N);
SDValue Result = DAG.getMemIntrinsicNode(
ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
{LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
Result.getValue(0), Result.getValue(1));
Results.append({Pair, Result.getValue(2)});
}
}
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT(); EVT MemVT = ST->getMemoryVT();
@ -9110,6 +9133,34 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand()); ST->getMemOperand());
} }
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
!Subtarget->isThumb1Only() && ST->isVolatile()) {
SDNode *N = Op.getNode();
SDLoc dl(N);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
DAG.getTargetConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
DAG.getTargetConstant(1, dl, MVT::i32));
return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
{ST->getChain(), Lo, Hi, ST->getBasePtr()},
MemVT, ST->getMemOperand());
} else if (Subtarget->hasMVEIntegerOps() &&
((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
MemVT == MVT::v16i1))) {
return LowerPredicateStore(Op, DAG);
}
return SDValue();
}
static bool isZeroVector(SDValue N) { static bool isZeroVector(SDValue N) {
return (ISD::isBuildVectorAllZeros(N.getNode()) || return (ISD::isBuildVectorAllZeros(N.getNode()) ||
(N->getOpcode() == ARMISD::VMOVIMM && (N->getOpcode() == ARMISD::VMOVIMM &&
@ -9297,7 +9348,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::LOAD: case ISD::LOAD:
return LowerPredicateLoad(Op, DAG); return LowerPredicateLoad(Op, DAG);
case ISD::STORE: case ISD::STORE:
return LowerPredicateStore(Op, DAG); return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD: case ISD::MLOAD:
return LowerMLOAD(Op, DAG); return LowerMLOAD(Op, DAG);
case ISD::ATOMIC_LOAD: case ISD::ATOMIC_LOAD:
@ -9399,7 +9450,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ABS: case ISD::ABS:
lowerABS(N, Results, DAG); lowerABS(N, Results, DAG);
return ; return ;
case ISD::LOAD:
LowerLOAD(N, Results, DAG);
break;
} }
if (Res.getNode()) if (Res.getNode())
Results.push_back(Res); Results.push_back(Res);

View File

@ -278,7 +278,11 @@ class VectorType;
VST4_UPD, VST4_UPD,
VST2LN_UPD, VST2LN_UPD,
VST3LN_UPD, VST3LN_UPD,
VST4LN_UPD VST4LN_UPD,
// Load/Store of dual registers
LDRD,
STRD
}; };
} // end namespace ARMISD } // end namespace ARMISD
@ -731,6 +735,8 @@ class VectorType;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const; SelectionDAG &DAG) const;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, EVT VT, Register getRegisterByName(const char* RegName, EVT VT,
const MachineFunction &MF) const override; const MachineFunction &MF) const override;

View File

@ -243,6 +243,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
// Vector operations shared between NEON and MVE // Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@ -2695,6 +2701,12 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]>; Requires<[IsARM, HasV5TE]>;
} }
let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
64, IIC_iLoad_d_r, []>,
Requires<[IsARM, HasV5TE]>;
}
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "lda", "\t$Rt, $addr", []>; NoItinerary, "lda", "\t$Rt, $addr", []>;
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@ -2970,6 +2982,17 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
} }
} }
let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
64, IIC_iStore_d_r, []>,
Requires<[IsARM, HasV5TE]>;
}
let Predicates = [IsARM, HasV5TE] in {
def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
(STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>;
}
// Indexed stores // Indexed stores
multiclass AI2_stridx<bit isByte, string opc, multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> { InstrItinClass iii, InstrItinClass iir> {

View File

@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2) // t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
class T2AddrMode_Imm8s4 : MemOperand { class T2AddrMode_Imm8s4 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4"; let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand; let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@ -1412,7 +1413,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword // Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr), (ins t2addrmode_imm8s4:$addr),
IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
[(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
Sched<[WriteLd]>; Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@ -1593,7 +1595,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
[(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
Sched<[WriteST]>; Sched<[WriteST]>;
// Indexed stores // Indexed stores

View File

@ -0,0 +1,153 @@
; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE,CHECK
; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2,CHECK
; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T,CHECK
@x = common dso_local global i64 0, align 8
@y = common dso_local global i64 0, align 8
define void @test() {
entry:
; CHECK-LABEL: test:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #4]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
%0 = load volatile i64, i64* @x, align 8
store volatile i64 %0, i64* @y, align 8
ret void
}
define void @test_offset() {
entry:
; CHECK-LABEL: test_offset:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #-4]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #-4]
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 -4) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 -4) to i64*), align 8
ret void
}
define void @test_offset_1() {
; CHECK-LABEL: test_offset_1:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #255]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #255]
; CHECK-T2: adds [[ADDR0:r[0-9]+]], #255
; CHECK-T2-NEXT: adds [[ADDR1:r[0-9]+]], #255
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #255]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #259]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #259]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #255]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 255) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 255) to i64*), align 8
ret void
}
define void @test_offset_2() {
; CHECK-LABEL: test_offset_2:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: mov [[OFFSET0:r[0-9]+]], #256
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], [[OFFSET0]]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], [[OFFSET0]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #256]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #256]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #256]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #260]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #260]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #256]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 256) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 256) to i64*), align 8
ret void
}
define void @test_offset_3() {
; CHECK-LABEL: test_offset_3:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: mov [[OFFSET0:r[0-9]+]], #1020
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], [[OFFSET0]]]
; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], [[OFFSET0]]]
; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #1020]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1020]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1024]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1020]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1020) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1020) to i64*), align 8
ret void
}
define void @test_offset_4() {
; CHECK-LABEL: test_offset_4:
; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: mov [[OFFSET0:r[0-9]+]], #1024
; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], [[OFFSET0]]]
; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], [[OFFSET0]]]
; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y
; CHECK-T2-NEXT: movw [[ADDR0:r[0-9]+]], :lower16:x
; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y
; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x
; CHECK-T2-NEXT: add.w [[ADDR0]], [[ADDR0]], #1024
; CHECK-T2-NEXT: add.w [[ADDR1]], [[ADDR1]], #1024
; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]]
; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]]
; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]]
; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1024]
; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1028]
; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1028]
; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1024]
entry:
%0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1024) to i64*), align 8
store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1024) to i64*), align 8
ret void
}