forked from OSchip/llvm-project
[AArch64][GlobalISel] Select arith extended add/sub in manual selection code
The manual selection code for add/sub was not checking if it was possible to fold in shifts + extends (the *rx opcode variants). As a result, we could never select things like ``` cmp x1, w0, uxtw #2 ``` Because we don't import any patterns for compares. This adds support for the arithmetic shifted register forms and updates tests for instructions selected using `emitADD`, `emitADDS`, and `emitSUBS`. This is a 0.1% geomean code size improvement on SPECINT2000 at -Os. Differential Revision: https://reviews.llvm.org/D91207
This commit is contained in:
parent
20de182246
commit
c42053f79b
|
@ -188,7 +188,8 @@ private:
|
|||
/// {{AArch64::ADDXri, AArch64::ADDWri},
|
||||
/// {AArch64::ADDXrs, AArch64::ADDWrs},
|
||||
/// {AArch64::ADDXrr, AArch64::ADDWrr},
|
||||
/// {AArch64::SUBXri, AArch64::SUBWri}}};
|
||||
/// {AArch64::SUBXri, AArch64::SUBWri},
|
||||
/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
|
||||
/// \endcode
|
||||
///
|
||||
/// Each row in the table corresponds to a different addressing mode. Each
|
||||
|
@ -199,6 +200,7 @@ private:
|
|||
/// - Row 1: The rs opcode variants
|
||||
/// - Row 2: The rr opcode variants
|
||||
/// - Row 3: The ri opcode variants for negative immediates
|
||||
/// - Row 4: The rx opcode variants
|
||||
///
|
||||
/// \attention Columns must be structured as follows:
|
||||
/// - Column 0: The 64-bit opcode variants
|
||||
|
@ -208,7 +210,7 @@ private:
|
|||
/// \p LHS is the left-hand operand of the binop to emit.
|
||||
/// \p RHS is the right-hand operand of the binop to emit.
|
||||
MachineInstr *emitAddSub(
|
||||
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
|
||||
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
|
||||
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
|
||||
|
@ -3821,7 +3823,7 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
|
|||
}
|
||||
|
||||
MachineInstr *AArch64InstructionSelector::emitAddSub(
|
||||
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
|
||||
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
|
||||
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
|
||||
|
@ -3842,6 +3844,11 @@ MachineInstr *AArch64InstructionSelector::emitAddSub(
|
|||
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
|
||||
MIRBuilder, Fns);
|
||||
|
||||
// INSTRrx form.
|
||||
if (auto Fns = selectArithExtendedRegister(RHS))
|
||||
return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
|
||||
MIRBuilder, Fns);
|
||||
|
||||
// INSTRrs form.
|
||||
if (auto Fns = selectShiftedRegister(RHS))
|
||||
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
|
||||
|
@ -3854,11 +3861,12 @@ MachineInstr *
|
|||
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
|
||||
MachineOperand &RHS,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
const std::array<std::array<unsigned, 2>, 4> OpcTable{
|
||||
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
||||
{{AArch64::ADDXri, AArch64::ADDWri},
|
||||
{AArch64::ADDXrs, AArch64::ADDWrs},
|
||||
{AArch64::ADDXrr, AArch64::ADDWrr},
|
||||
{AArch64::SUBXri, AArch64::SUBWri}}};
|
||||
{AArch64::SUBXri, AArch64::SUBWri},
|
||||
{AArch64::ADDXrx, AArch64::ADDWrx}}};
|
||||
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
|
||||
}
|
||||
|
||||
|
@ -3866,11 +3874,12 @@ MachineInstr *
|
|||
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
|
||||
MachineOperand &RHS,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
const std::array<std::array<unsigned, 2>, 4> OpcTable{
|
||||
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
||||
{{AArch64::ADDSXri, AArch64::ADDSWri},
|
||||
{AArch64::ADDSXrs, AArch64::ADDSWrs},
|
||||
{AArch64::ADDSXrr, AArch64::ADDSWrr},
|
||||
{AArch64::SUBSXri, AArch64::SUBSWri}}};
|
||||
{AArch64::SUBSXri, AArch64::SUBSWri},
|
||||
{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
|
||||
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
|
||||
}
|
||||
|
||||
|
@ -3878,11 +3887,12 @@ MachineInstr *
|
|||
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
|
||||
MachineOperand &RHS,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
const std::array<std::array<unsigned, 2>, 4> OpcTable{
|
||||
const std::array<std::array<unsigned, 2>, 5> OpcTable{
|
||||
{{AArch64::SUBSXri, AArch64::SUBSWri},
|
||||
{AArch64::SUBSXrs, AArch64::SUBSWrs},
|
||||
{AArch64::SUBSXrr, AArch64::SUBSWrr},
|
||||
{AArch64::ADDSXri, AArch64::ADDSWri}}};
|
||||
{AArch64::ADDSXri, AArch64::ADDSWri},
|
||||
{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
|
||||
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
|
||||
}
|
||||
|
||||
|
|
|
@ -603,3 +603,36 @@ body: |
|
|||
%cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmn_arith_extended_shl
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $x0, $x1
|
||||
; We should be able to fold away the extend + shift and select ADDSXrx.
|
||||
|
||||
; CHECK-LABEL: name: cmn_arith_extended_shl
|
||||
; CHECK: liveins: $w0, $x0, $x1
|
||||
; CHECK: %reg0:gpr64sp = COPY $x0
|
||||
; CHECK: %reg1:gpr32 = COPY $w0
|
||||
; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s64) = COPY $x0
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%sub:gpr(s64) = G_SUB %zero, %reg0
|
||||
|
||||
%reg1:gpr(s32) = COPY $w0
|
||||
%ext:gpr(s64) = G_SEXT %reg1(s32)
|
||||
%cst:gpr(s64) = G_CONSTANT i64 2
|
||||
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
|
||||
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
|
|
@ -182,3 +182,91 @@ body: |
|
|||
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: cmp_arith_extended_s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $x1
|
||||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_s64
|
||||
; CHECK: liveins: $w0, $x1
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %reg1:gpr64sp = COPY $x1
|
||||
; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%reg1:gpr(s64) = COPY $x1
|
||||
%ext:gpr(s64) = G_ZEXT %reg0(s32)
|
||||
%cst:gpr(s64) = G_CONSTANT i64 2
|
||||
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmp_arith_extended_s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $h0
|
||||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_s32
|
||||
; CHECK: liveins: $w0, $w1, $h0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
|
||||
; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: %reg1:gpr32sp = COPY $w1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
|
||||
; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s16) = COPY $h0
|
||||
%reg1:gpr(s32) = COPY $w1
|
||||
%ext:gpr(s32) = G_ZEXT %reg0(s16)
|
||||
%cst:gpr(s32) = G_CONSTANT i32 2
|
||||
%shift:gpr(s32) = G_SHL %ext, %cst(s32)
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmp_arith_extended_shl_too_large
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $x1
|
||||
|
||||
; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx
|
||||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
|
||||
; CHECK: liveins: $w0, $x1
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %reg1:gpr64 = COPY $x1
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32
|
||||
; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
|
||||
; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%reg1:gpr(s64) = COPY $x1
|
||||
%ext:gpr(s64) = G_ZEXT %reg0(s32)
|
||||
%cst:gpr(s64) = G_CONSTANT i64 5
|
||||
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
|
||||
$w0 = COPY %cmp(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
|
|
|
@ -89,3 +89,24 @@ body: |
|
|||
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
|
||||
$x0 = COPY %2(p0)
|
||||
...
|
||||
---
|
||||
name: ptr_add_arith_extended
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: ptr_add_arith_extended
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %ptr:gpr64 = COPY $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr
|
||||
; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18
|
||||
; CHECK: $x0 = COPY %ptr_add
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%ptr:gpr(p0) = COPY $x1
|
||||
%ext:gpr(s64) = G_ZEXT %reg0(s32)
|
||||
%cst:gpr(s64) = G_CONSTANT i64 2
|
||||
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
|
||||
%ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64)
|
||||
$x0 = COPY %ptr_add(p0)
|
||||
...
|
||||
|
|
|
@ -136,3 +136,31 @@ body: |
|
|||
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
|
||||
$w0 = COPY %add(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: uaddo_arith_extended
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $w0, $x0
|
||||
; Check that we get ADDSXrx.
|
||||
; CHECK-LABEL: name: uaddo_arith_extended
|
||||
; CHECK: liveins: $w0, $x0
|
||||
; CHECK: %reg0:gpr64sp = COPY $x0
|
||||
; CHECK: %reg1:gpr32 = COPY $w0
|
||||
; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
|
||||
; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
|
||||
; CHECK: $x0 = COPY %add
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%reg0:gpr(s64) = COPY $x0
|
||||
%reg1:gpr(s32) = COPY $w0
|
||||
%ext:gpr(s64) = G_ZEXT %reg1(s32)
|
||||
%cst:gpr(s64) = G_CONSTANT i64 2
|
||||
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
|
||||
%add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift
|
||||
$x0 = COPY %add(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
|
Loading…
Reference in New Issue