forked from OSchip/llvm-project
ARM: fix Thumb2 CodeGen for ldrex with folded frame-index.
Because t2LDREX (& t2STREX) were marked as AddrModeNone, but did allow a FrameIndex operand, rewriteT2FrameIndex asserted. This gives them a proper addressing-mode and tells the rewriter about it so that encodable offsets are exploited and others are rejected. Should fix PR38828. llvm-svn: 341642
This commit is contained in:
parent
d49c32ce3f
commit
bb7d7b3d33
|
@ -1516,6 +1516,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
|
|||
break;
|
||||
case ARMII::AddrMode5:
|
||||
case ARMII::AddrModeT2_i8s4:
|
||||
case ARMII::AddrModeT2_ldrex:
|
||||
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
|
||||
break;
|
||||
case ARMII::AddrModeT2_i12:
|
||||
|
|
|
@ -109,6 +109,7 @@ def AddrModeT2_pc : AddrMode<14>;
|
|||
def AddrModeT2_i8s4 : AddrMode<15>;
|
||||
def AddrMode_i12 : AddrMode<16>;
|
||||
def AddrMode5FP16 : AddrMode<17>;
|
||||
def AddrModeT2_ldrex : AddrMode<18>;
|
||||
|
||||
// Load / store index mode.
|
||||
class IndexMode<bits<2> val> {
|
||||
|
|
|
@ -3275,7 +3275,7 @@ def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
|||
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]>;
|
||||
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
AddrModeT2_ldrex, 4, NoItinerary,
|
||||
"ldrex", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]> {
|
||||
|
@ -3354,7 +3354,7 @@ def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
|
|||
|
||||
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
|
||||
t2addrmode_imm0_1020s4:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
AddrModeT2_ldrex, 4, NoItinerary,
|
||||
"strex", "\t$Rd, $Rt, $addr", "",
|
||||
[(set rGPR:$Rd,
|
||||
(strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
|
||||
|
|
|
@ -201,7 +201,8 @@ namespace ARMII {
|
|||
AddrModeT2_pc = 14, // +/- i12 for pc relative data
|
||||
AddrModeT2_i8s4 = 15, // i8 * 4
|
||||
AddrMode_i12 = 16,
|
||||
AddrMode5FP16 = 17 // i8 * 2
|
||||
AddrMode5FP16 = 17, // i8 * 2
|
||||
AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
|
||||
};
|
||||
|
||||
inline static const char *AddrModeToString(AddrMode addrmode) {
|
||||
|
@ -224,6 +225,7 @@ namespace ARMII {
|
|||
case AddrModeT2_pc: return "AddrModeT2_pc";
|
||||
case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
|
||||
case AddrMode_i12: return "AddrMode_i12";
|
||||
case AddrModeT2_ldrex:return "AddrModeT2_ldrex";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -621,6 +621,11 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
|||
// MCInst operand expects already scaled value.
|
||||
Scale = 1;
|
||||
assert((Offset & 3) == 0 && "Can't encode this offset!");
|
||||
} else if (AddrMode == ARMII::AddrModeT2_ldrex) {
|
||||
Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
|
||||
NumBits = 8; // 8 bits scaled by 4
|
||||
Scale = 4;
|
||||
assert((Offset & 3) == 0 && "Can't encode this offset!");
|
||||
} else {
|
||||
llvm_unreachable("Unsupported addressing mode!");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: llc -mtriple=thumbv7-linux-gnueabi -o - %s | FileCheck %s
|
||||
|
||||
; This alloca is just large enough that FrameLowering decides it needs a frame
|
||||
; to guarantee access, based on the range of ldrex.
|
||||
|
||||
; The actual alloca size is a bit of black magic, unfortunately: the real
|
||||
; maximum accessible is 1020, but FrameLowering adds 16 bytes to its estimated
|
||||
; stack size just because so the alloca is not actually the what the limit gets
|
||||
; compared to. The important point is that we don't go up to ~4096, which is the
|
||||
; default with no strange instructions.
|
||||
define void @test_large_frame() {
|
||||
; CHECK-LABEL: test_large_frame:
|
||||
; CHECK: push
|
||||
; CHECK: sub.w sp, sp, #1004
|
||||
|
||||
%ptr = alloca i32, i32 251
|
||||
|
||||
%addr = getelementptr i32, i32* %ptr, i32 1
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
; This alloca is just is just the other side of the limit, so no frame
|
||||
define void @test_small_frame() {
|
||||
; CHECK-LABEL: test_small_frame:
|
||||
; CHECK-NOT: push
|
||||
; CHECK: sub.w sp, sp, #1000
|
||||
|
||||
%ptr = alloca i32, i32 250
|
||||
|
||||
%addr = getelementptr i32, i32* %ptr, i32 1
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.arm.ldrex.p0i32(i32*)
|
|
@ -142,6 +142,91 @@ define void @excl_addrmode() {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @test_excl_addrmode_folded() {
|
||||
; CHECK-LABEL: test_excl_addrmode_folded:
|
||||
%local = alloca i8, i32 4096
|
||||
|
||||
%local.0 = getelementptr i8, i8* %local, i32 4
|
||||
%local32.0 = bitcast i8* %local.0 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #4]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #4]
|
||||
|
||||
%local.1 = getelementptr i8, i8* %local, i32 1020
|
||||
%local32.1 = bitcast i8* %local.1 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.1)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.1)
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #1020]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #1020]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_excl_addrmode_range() {
|
||||
; CHECK-LABEL: test_excl_addrmode_range:
|
||||
%local = alloca i8, i32 4096
|
||||
|
||||
%local.0 = getelementptr i8, i8* %local, i32 1024
|
||||
%local32.0 = bitcast i8* %local.0 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
|
||||
; CHECK-T2ADDRMODE: mov r[[TMP:[0-9]+]], sp
|
||||
; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], r[[TMP]], #1024
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_excl_addrmode_align() {
|
||||
; CHECK-LABEL: test_excl_addrmode_align:
|
||||
%local = alloca i8, i32 4096
|
||||
|
||||
%local.0 = getelementptr i8, i8* %local, i32 2
|
||||
%local32.0 = bitcast i8* %local.0 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
|
||||
; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
|
||||
; CHECK-T2ADDRMODE: adds r[[ADDR:[0-9]+]], #2
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_excl_addrmode_sign() {
|
||||
; CHECK-LABEL: test_excl_addrmode_sign:
|
||||
%local = alloca i8, i32 4096
|
||||
|
||||
%local.0 = getelementptr i8, i8* %local, i32 -4
|
||||
%local32.0 = bitcast i8* %local.0 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
|
||||
; CHECK-T2ADDRMODE: mov r[[ADDR:[0-9]+]], sp
|
||||
; CHECK-T2ADDRMODE: subs r[[ADDR:[0-9]+]], #4
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_excl_addrmode_combination() {
|
||||
; CHECK-LABEL: test_excl_addrmode_combination:
|
||||
%local = alloca i8, i32 4096
|
||||
%unused = alloca i8, i32 64
|
||||
|
||||
%local.0 = getelementptr i8, i8* %local, i32 4
|
||||
%local32.0 = bitcast i8* %local.0 to i32*
|
||||
call i32 @llvm.arm.ldrex.p0i32(i32* %local32.0)
|
||||
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %local32.0)
|
||||
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [sp, #68]
|
||||
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [sp, #68]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; LLVM should know, even across basic blocks, that ldrex is setting the high
|
||||
; bits of its i32 to 0. There should be no zero-extend operation.
|
||||
define zeroext i8 @test_cross_block_zext_i8(i1 %tst, i8* %addr) {
|
||||
|
|
Loading…
Reference in New Issue