forked from OSchip/llvm-project
Lower thumbv4t & thumbv5 lo->lo copies through a push-pop sequence
On pre-v6 hardware, 'MOV lo, lo' gives undefined results, so such copies need to be avoided. This patch trades simplicity for implementation time at the expense of performance... As they say: correctness first, then performance. See http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html for a few ideas on how to make this better. llvm-svn: 216138
This commit is contained in:
parent
a56749064a
commit
44937d98a3
|
@ -11,6 +11,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ARMSubtarget.h"
|
||||
#include "Thumb1InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const {
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc)));
|
||||
// Need to check the arch.
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
|
||||
|
||||
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
|
||||
"Thumb1 can only copy GPR registers");
|
||||
|
||||
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
|
||||
|| !ARM::tGPRRegClass.contains(DestReg))
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc)));
|
||||
else {
|
||||
// FIXME: The performance consequences of this are going to be atrocious.
|
||||
// Some things to try that should be better:
|
||||
// * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
|
||||
// * 'movs $dst, $src' if cpsr isn't live
|
||||
// See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
|
||||
|
||||
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
|
||||
.addReg(DestReg, getDefRegState(true));
|
||||
}
|
||||
}
|
||||
|
||||
void Thumb1InstrInfo::
|
||||
|
|
|
@ -20,12 +20,15 @@ entry:
|
|||
|
||||
; CHECK-THUMB-LABEL: test_cmpxchg_res_i8
|
||||
; CHECK-THUMB: bl __sync_val_compare_and_swap_1
|
||||
; CHECK-THUMB: mov [[R1:r[0-9]+]], r0
|
||||
; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0
|
||||
; CHECK-THUMB: push {r0}
|
||||
; CHECK-THUMB: pop {[[R1:r[0-7]]]}
|
||||
; CHECK-THUMB: movs r0, #1
|
||||
; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
|
||||
; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
|
||||
; CHECK-THU<B: beq
|
||||
; CHECK-THUMB: mov r0, [[R2]]
|
||||
; CHECK-THUMB: push {[[R2]]}
|
||||
; CHECK-THUMB: pop {r0}
|
||||
|
||||
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
|
||||
; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: llc -mtriple=armv4-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=armv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=armv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=armv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=armv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=thumbv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; RUN: llc -mtriple=thumbv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
|
||||
; CHECK-LOLOMOV-LABEL: foo
|
||||
; CHECK-LOLOMOV: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
|
||||
; CHECK-LOLOMOV-NEXT: mov [[SRC1]], [[SRC2:r[01]]]
|
||||
; CHECK-LOLOMOV-NEXT: mov [[SRC2]], [[TMP]]
|
||||
; CHECK-LOLOMOV-LABEL: bar
|
||||
; CHECK-LOLOMOV-LABEL: fnend
|
||||
;
|
||||
; 'MOV lo, lo' in Thumb mode produces undefined results on pre-v6 hardware
|
||||
; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
|
||||
; RUN: llc -mtriple=thumbv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
|
||||
; CHECK-NOLOLOMOV-LABEL: foo
|
||||
; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
|
||||
; CHECK-NOLOLOMOV: push {[[SRC1:r[01]]]}
|
||||
; CHECK-NOLOLOMOV-NEXT: pop {[[TMP:r[0-7]]]}
|
||||
; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
|
||||
; CHECK-NOLOLOMOV: push {[[SRC2:r[01]]]}
|
||||
; CHECK-NOLOLOMOV-NEXT: pop {[[SRC1]]}
|
||||
; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
|
||||
; CHECK-NOLOLOMOV: push {[[TMP]]}
|
||||
; CHECK-NOLOLOMOV-NEXT: pop {[[SRC2]]}
|
||||
; CHECK-NOLOLOMOV-LABEL: bar
|
||||
; CHECK-NOLOLOMOV-LABEL: fnend
|
||||
|
||||
declare void @bar(i32, i32)
|
||||
|
||||
define void @foo(i32 %a, i32 %b) {
|
||||
entry:
|
||||
call void @bar(i32 %b, i32 %a);
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue