forked from OSchip/llvm-project
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO. If we have a wide immediate which can't be represented in an add, we can end up with code like this: ``` mov x0, imm add x1, base, x0 ldr x2, [x1, 0] ``` If we use the [base, xN] addressing mode instead, we can produce this: ``` mov x0, imm ldr x2, [base, x0] ``` This saves 0.4% code size on 7zip at -O3, and gives a geomean code size improvement of 0.1% on CTMark. Differential Revision: https://reviews.llvm.org/D84784
This commit is contained in:
parent
766cb615a3
commit
7ff9575594
|
@ -5083,11 +5083,59 @@ InstructionSelector::ComplexRendererFns
|
|||
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
|
||||
unsigned SizeInBytes) const {
|
||||
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
||||
|
||||
// If we have a constant offset, then we probably don't want to match a
|
||||
// register offset.
|
||||
if (isBaseWithConstantOffset(Root, MRI))
|
||||
if (!Root.isReg())
|
||||
return None;
|
||||
MachineInstr *PtrAdd =
|
||||
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
|
||||
if (!PtrAdd)
|
||||
return None;
|
||||
|
||||
// Check for an immediates which cannot be encoded in the [base + imm]
|
||||
// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
|
||||
// end up with code like:
|
||||
//
|
||||
// mov x0, wide
|
||||
// add x1 base, x0
|
||||
// ldr x2, [x1, x0]
|
||||
//
|
||||
// In this situation, we can use the [base, xreg] addressing mode to save an
|
||||
// add/sub:
|
||||
//
|
||||
// mov x0, wide
|
||||
// ldr x2, [base, x0]
|
||||
auto ValAndVReg =
|
||||
getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
|
||||
if (ValAndVReg) {
|
||||
unsigned Scale = Log2_32(SizeInBytes);
|
||||
int64_t ImmOff = ValAndVReg->Value;
|
||||
|
||||
// Skip immediates that can be selected in the load/store addresing
|
||||
// mode.
|
||||
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
|
||||
ImmOff < (0x1000 << Scale))
|
||||
return None;
|
||||
|
||||
// Helper lambda to decide whether or not it is preferable to emit an add.
|
||||
auto isPreferredADD = [](int64_t ImmOff) {
|
||||
// Constants in [0x0, 0xfff] can be encoded in an add.
|
||||
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
|
||||
return true;
|
||||
|
||||
// Can it be encoded in an add lsl #12?
|
||||
if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
|
||||
return false;
|
||||
|
||||
// It can be encoded in an add lsl #12, but we may not want to. If it is
|
||||
// possible to select this as a single movz, then prefer that. A single
|
||||
// movz is faster than an add with a shift.
|
||||
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
|
||||
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
|
||||
};
|
||||
|
||||
// If the immediate can be encoded in a single add/sub, then bail out.
|
||||
if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try to fold shifts into the addressing mode.
|
||||
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Test using the xro addressing mode with immediates. This should be done for
|
||||
# wide constants which are preferably selected using a mov rather than an add.
|
||||
|
||||
...
|
||||
---
|
||||
name: use_xro_cannot_encode_add_lsl
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Check that we use the XRO addressing mode when the constant cannot be
|
||||
; represented using an add + lsl.
|
||||
;
|
||||
; cst = 0000000111000000
|
||||
; cst & 000fffffff000000 != 0
|
||||
;
|
||||
; CHECK-LABEL: name: use_xro_cannot_encode_add_lsl
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %cst:gpr64 = MOVi64imm 4580179968
|
||||
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 4580179968
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: use_xro_preferred_mov
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Check that we use the XRO addressing mode when the constant can be
|
||||
; represented using a single movk.
|
||||
;
|
||||
; cst = 000000000000f000
|
||||
; cst & 000fffffff000000 == 0
|
||||
; cst & ffffffffffff0fff != 0
|
||||
;
|
||||
; CHECK-LABEL: name: use_xro_preferred_mov
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 61440
|
||||
; CHECK: %cst:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
|
||||
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 61440
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: use_xro_negative_imm
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Check that this works even if we have a negative immediate.
|
||||
;
|
||||
; CHECK-LABEL: name: use_xro_negative_imm
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %cst:gpr64 = MOVi64imm -61440
|
||||
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 -61440
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_use_xro_selectable_imm
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Immediates that can be encoded in a LDRXui should be skipped.
|
||||
;
|
||||
; CHECK-LABEL: name: dont_use_xro_selectable_imm
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 16
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_use_xro_selectable_negative_imm
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Immediates that can be encoded in a LDRXui should be skipped.
|
||||
;
|
||||
; CHECK-LABEL: name: dont_use_xro_selectable_negative_imm
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 -16
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_use_xro_zero
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Immediates that can be encoded in a LDRXui should be skipped.
|
||||
;
|
||||
; CHECK-LABEL: name: dont_use_xro_zero
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 0
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_use_xro_in_range
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Check that we skip constants which can be encoded in an add.
|
||||
; 17 is in [0x0, 0xfff]
|
||||
;
|
||||
; CHECK-LABEL: name: dont_use_xro_in_range
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64sp = COPY $x0
|
||||
; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 17
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_use_xro_add_lsl
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; Check that we skip when we have an add with an lsl which cannot be
|
||||
; represented as a movk.
|
||||
;
|
||||
; cst = 0x0000000000111000
|
||||
; cst & 000fffffff000000 = 0
|
||||
; cst & ffffffffff00ffff != 0
|
||||
; cst & ffffffffffff0fff != 0
|
||||
;
|
||||
; CHECK-LABEL: name: dont_use_xro_add_lsl
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %copy
|
||||
; CHECK: %addr:gpr64sp = ADDXri [[COPY]], 273, 12
|
||||
; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load 8)
|
||||
; CHECK: RET_ReallyLR
|
||||
%copy:gpr(p0) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 1118208
|
||||
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
|
||||
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
|
||||
RET_ReallyLR
|
Loading…
Reference in New Issue