[AArch64][GlobalISel] Select XRO addressing mode with wide immediates

Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.

If we have a wide immediate which can't be represented in an add, we can end up
with code like this:

```
mov  x0, imm
add x1, base, x0
ldr  x2, [x1, 0]
```

If we use the [base, xN] addressing mode instead, we can produce this:

```
mov  x0, imm
ldr  x2, [base, x0]
```

This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.

Differential Revision: https://reviews.llvm.org/D84784
This commit is contained in:
Jessica Paquette 2020-07-28 11:33:39 -07:00
parent 766cb615a3
commit 7ff9575594
2 changed files with 263 additions and 4 deletions

View File

@ -5083,11 +5083,59 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
// If we have a constant offset, then we probably don't want to match a
// register offset.
if (isBaseWithConstantOffset(Root, MRI))
if (!Root.isReg())
return None;
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd)
return None;
// Check for an immediates which cannot be encoded in the [base + imm]
// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
// end up with code like:
//
// mov x0, wide
// add x1 base, x0
// ldr x2, [x1, x0]
//
// In this situation, we can use the [base, xreg] addressing mode to save an
// add/sub:
//
// mov x0, wide
// ldr x2, [base, x0]
auto ValAndVReg =
getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
if (ValAndVReg) {
unsigned Scale = Log2_32(SizeInBytes);
int64_t ImmOff = ValAndVReg->Value;
// Skip immediates that can be selected in the load/store addresing
// mode.
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
ImmOff < (0x1000 << Scale))
return None;
// Helper lambda to decide whether or not it is preferable to emit an add.
auto isPreferredADD = [](int64_t ImmOff) {
// Constants in [0x0, 0xfff] can be encoded in an add.
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
return true;
// Can it be encoded in an add lsl #12?
if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
return false;
// It can be encoded in an add lsl #12, but we may not want to. If it is
// possible to select this as a single movz, then prefer that. A single
// movz is faster than an add with a shift.
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
};
// If the immediate can be encoded in a single add/sub, then bail out.
if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
return None;
}
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);

View File

@ -0,0 +1,211 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Test using the xro addressing mode with immediates. This should be done for
# wide constants which are preferably selected using a mov rather than an add.
...
---
name: use_xro_cannot_encode_add_lsl
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Check that we use the XRO addressing mode when the constant cannot be
; represented using an add + lsl.
;
; cst = 0000000111000000
; cst & 000fffffff000000 != 0
;
; CHECK-LABEL: name: use_xro_cannot_encode_add_lsl
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %cst:gpr64 = MOVi64imm 4580179968
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 4580179968
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: use_xro_preferred_mov
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Check that we use the XRO addressing mode when the constant can be
; represented using a single movk.
;
; cst = 000000000000f000
; cst & 000fffffff000000 == 0
; cst & ffffffffffff0fff != 0
;
; CHECK-LABEL: name: use_xro_preferred_mov
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 61440
; CHECK: %cst:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 61440
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: use_xro_negative_imm
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Check that this works even if we have a negative immediate.
;
; CHECK-LABEL: name: use_xro_negative_imm
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %cst:gpr64 = MOVi64imm -61440
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 -61440
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: dont_use_xro_selectable_imm
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Immediates that can be encoded in a LDRXui should be skipped.
;
; CHECK-LABEL: name: dont_use_xro_selectable_imm
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 16
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: dont_use_xro_selectable_negative_imm
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Immediates that can be encoded in a LDRXui should be skipped.
;
; CHECK-LABEL: name: dont_use_xro_selectable_negative_imm
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 -16
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: dont_use_xro_zero
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Immediates that can be encoded in a LDRXui should be skipped.
;
; CHECK-LABEL: name: dont_use_xro_zero
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 0
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: dont_use_xro_in_range
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Check that we skip constants which can be encoded in an add.
; 17 is in [0x0, 0xfff]
;
; CHECK-LABEL: name: dont_use_xro_in_range
; CHECK: liveins: $x0
; CHECK: %copy:gpr64sp = COPY $x0
; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 17
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR
...
---
name: dont_use_xro_add_lsl
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $x0
; Check that we skip when we have an add with an lsl which cannot be
; represented as a movk.
;
; cst = 0x0000000000111000
; cst & 000fffffff000000 = 0
; cst & ffffffffff00ffff != 0
; cst & ffffffffffff0fff != 0
;
; CHECK-LABEL: name: dont_use_xro_add_lsl
; CHECK: liveins: $x0
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %copy
; CHECK: %addr:gpr64sp = ADDXri [[COPY]], 273, 12
; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load 8)
; CHECK: RET_ReallyLR
%copy:gpr(p0) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 1118208
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
RET_ReallyLR