forked from OSchip/llvm-project
[AArch64] Fix halfword load merging for big-endian targets
For big-endian targets, when we merge two halfword loads into a word load, the order of the halfwords in the loaded value is reversed compared to little-endian, so the load-store optimiser needs to swap the destination registers. This does not affect merging of two word loads, as we use ldp, which treats the memory as two separate 32-bit words. llvm-svn: 252597
This commit is contained in:
parent
bf5210f8ed
commit
d414c99b9c
|
@ -84,6 +84,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
|
|||
|
||||
const AArch64InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const AArch64Subtarget *Subtarget;
|
||||
|
||||
// Scan the instructions looking for a load/store that can be combined
|
||||
// with the current instruction into a load/store pair.
|
||||
|
@ -537,6 +538,10 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
if (!IsUnscaled)
|
||||
OffsetImm /= 2;
|
||||
MachineInstr *RtNewDest = MergeForward ? I : Paired;
|
||||
// When merging small (< 32 bit) loads for big-endian targets, the order of
|
||||
// the component parts gets swapped.
|
||||
if (!Subtarget->isLittleEndian())
|
||||
std::swap(RtMI, Rt2MI);
|
||||
// Construct the new load instruction.
|
||||
// FIXME: currently we support only halfword unsigned load. We need to
|
||||
// handle byte type, signed, and store instructions as well.
|
||||
|
@ -560,7 +565,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
DEBUG((NewMemMI)->print(dbgs()));
|
||||
|
||||
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
|
||||
if (ExtDestMI == Rt2MI) {
|
||||
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
|
||||
// Create the bitfield extract for high half.
|
||||
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::UBFMWri))
|
||||
|
@ -1388,8 +1393,9 @@ bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
|
|||
}
|
||||
|
||||
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||
TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
|
||||
TRI = Fn.getSubtarget().getRegisterInfo();
|
||||
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
|
||||
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
|
||||
TRI = Subtarget->getRegisterInfo();
|
||||
|
||||
bool Modified = false;
|
||||
bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
|
||||
|
|
|
@ -1,36 +1,51 @@
|
|||
; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
|
||||
; RUN: llc < %s -march=aarch64_be -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
|
||||
|
||||
; CHECK-LABEL: Ldrh_merge
|
||||
; CHECK-NOT: ldrh
|
||||
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
|
||||
; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
|
||||
; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i16 @Ldrh_merge(i16* nocapture readonly %p) {
|
||||
%1 = load i16, i16* %p, align 2
|
||||
%arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
|
||||
%2 = load i16, i16* %arrayidx2, align 2
|
||||
%add = add nuw nsw i16 %1, %2
|
||||
%add = sub nuw nsw i16 %1, %2
|
||||
ret i16 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldurh_merge
|
||||
; CHECK-NOT: ldurh
|
||||
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff
|
||||
; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]]
|
||||
; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i16 @Ldurh_merge(i16* nocapture readonly %p) {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
|
||||
%0 = load i16, i16* %arrayidx
|
||||
%arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
|
||||
%1 = load i16, i16* %arrayidx3
|
||||
%add = add nuw nsw i16 %0, %1
|
||||
%add = sub nuw nsw i16 %0, %1
|
||||
ret i16 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrh_4_merge
|
||||
; CHECK-NOT: ldrh
|
||||
; CHECK: ldp [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
|
||||
; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
|
||||
; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
|
||||
; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
|
||||
; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
|
||||
; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
|
||||
; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
|
||||
; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
|
||||
; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
|
||||
; LE: sub w0, [[TEMP2]], [[WORD2HI]]
|
||||
; BE: sub w0, [[TEMP2]], [[WORD2LO]]
|
||||
define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
|
||||
%arrayidx = getelementptr inbounds i16, i16* %P, i64 0
|
||||
%l0 = load i16, i16* %arrayidx
|
||||
|
@ -40,8 +55,8 @@ define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
|
|||
%l2 = load i16, i16* %arrayidx7
|
||||
%arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
|
||||
%l3 = load i16, i16* %arrayidx12
|
||||
%add4 = add nuw nsw i16 %l1, %l0
|
||||
%add9 = add nuw nsw i16 %add4, %l2
|
||||
%add14 = add nuw nsw i16 %add9, %l3
|
||||
%add4 = sub nuw nsw i16 %l1, %l0
|
||||
%add9 = udiv i16 %add4, %l2
|
||||
%add14 = sub nuw nsw i16 %add9, %l3
|
||||
ret i16 %add14
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue