forked from OSchip/llvm-project
[ARM] Add a SelectTAddrModeImm7 for MVE narrow loads and stores
We were previously using the SelectT2AddrModeImm7 for both normal and narrowing MVE loads/stores. As the narrowing instructions do not accept sp as a register, it makes little sense to optimise a FrameIndex into the load, only to have to recover that later on. This adds a SelectTAddrModeImm7 which does not do that folding, and uses it for narrowing load/store patterns. Differential Revision: https://reviews.llvm.org/D67489 llvm-svn: 372134
This commit is contained in:
parent
c42ca16cfa
commit
91724b8530
|
@ -139,6 +139,8 @@ public:
|
|||
bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
|
||||
SDValue &OffImm);
|
||||
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
template <unsigned Shift>
|
||||
bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
|
||||
// Thumb 2 Addressing Modes:
|
||||
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
|
@ -1151,6 +1153,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
|
|||
return false;
|
||||
}
|
||||
|
||||
template <unsigned Shift>
|
||||
bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
|
||||
SDValue &OffImm) {
|
||||
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
|
||||
int RHSC;
|
||||
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
|
||||
RHSC)) {
|
||||
Base = N.getOperand(0);
|
||||
if (N.getOpcode() == ISD::SUB)
|
||||
RHSC = -RHSC;
|
||||
OffImm =
|
||||
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Base only.
|
||||
Base = N;
|
||||
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Thumb 2 Addressing Modes
|
||||
|
|
|
@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
|
|||
let RenderMethod = "addMemImmOffsetOperands";
|
||||
}
|
||||
|
||||
class taddrmode_imm7<int shift> : MemOperand {
|
||||
class taddrmode_imm7<int shift> : MemOperand,
|
||||
ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> {
|
||||
let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
|
||||
// They are printed the same way as the T2 imm8 version
|
||||
let PrintMethod = "printT2AddrModeImm8Operand<false>";
|
||||
|
@ -5157,12 +5158,12 @@ let MinAlignment = 2 in {
|
|||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
|
||||
(MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
|
||||
def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr),
|
||||
(MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>;
|
||||
def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr),
|
||||
(MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>;
|
||||
|
||||
def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
|
||||
(MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
|
||||
|
@ -5204,9 +5205,9 @@ multiclass MVEExtLoad<string DestLanes, string DestElemBits,
|
|||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
defm : MVEExtLoad<"4", "32", "8", "B", "", t2addrmode_imm7<0>>;
|
||||
defm : MVEExtLoad<"8", "16", "8", "B", "", t2addrmode_imm7<0>>;
|
||||
defm : MVEExtLoad<"4", "32", "16", "H", "_align2", t2addrmode_imm7<1>>;
|
||||
defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>;
|
||||
defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>;
|
||||
defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -77,10 +77,9 @@ define arm_aapcs_vfpcc void @vstrh32() {
|
|||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x6
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrh.32 q0, [r0, #4]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -101,10 +100,9 @@ define arm_aapcs_vfpcc void @vstrb32() {
|
|||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x6
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrb.32 q0, [r0, #6]
|
||||
; CHECK-NEXT: add r0, sp, #4
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x6
|
||||
; CHECK-NEXT: vstrb.32 q0, [r0, #2]
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -125,10 +123,9 @@ define arm_aapcs_vfpcc void @vstrb16() {
|
|||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vmov.i32 q0, #0x0
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vstrb.16 q0, [r0, #2]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
|
@ -212,16 +209,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i16> @vldrh32() {
|
||||
; CHECK-LABEL: vldrh32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: mov r4, sp
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
|
||||
; CHECK-NEXT: vldrh.u32 q0, [r4, #4]
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%d = alloca [4 x i16], align 2
|
||||
%arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 0
|
||||
|
@ -235,16 +232,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <4 x i8> @vldrb32() {
|
||||
; CHECK-LABEL: vldrb32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: add r0, sp, #4
|
||||
; CHECK-NEXT: add r4, sp, #4
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r0, #6]
|
||||
; CHECK-NEXT: vldrb.u32 q0, [r4, #2]
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%d = alloca [4 x i8], align 2
|
||||
%arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 0
|
||||
|
@ -258,16 +255,16 @@ entry:
|
|||
define arm_aapcs_vfpcc <8 x i8> @vldrb16() {
|
||||
; CHECK-LABEL: vldrb16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .save {r4, lr}
|
||||
; CHECK-NEXT: push {r4, lr}
|
||||
; CHECK-NEXT: .pad #8
|
||||
; CHECK-NEXT: sub sp, #8
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: mov r4, sp
|
||||
; CHECK-NEXT: mov r0, r4
|
||||
; CHECK-NEXT: bl func
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; CHECK-NEXT: vldrb.u16 q0, [r0, #2]
|
||||
; CHECK-NEXT: vldrb.u16 q0, [r4, #2]
|
||||
; CHECK-NEXT: add sp, #8
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
; CHECK-NEXT: pop {r4, pc}
|
||||
entry:
|
||||
%d = alloca [8 x i8], align 2
|
||||
%arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 0
|
||||
|
|
Loading…
Reference in New Issue