[AArch64][GlobalISel] Manually select G_DUP with s8/s16 gpr scalar operands.

These don't get selected by the imported patterns, and avoiding generating them
is a whole load of not-worth-it-hassle (until we have fp types in GlobalISel).
This commit is contained in:
Amara Emerson 2020-09-25 01:28:50 -07:00
parent ade6fa46f9
commit f7b36b35b6
2 changed files with 64 additions and 0 deletions

View File

@ -2971,6 +2971,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
} }
} }
case AArch64::G_DUP: {
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
// difficult because at RBS we may end up pessimizing the fpr case if we
// decided to add an anyextend to fix this. Manual selection is the most
// robust solution for now.
Register SrcReg = I.getOperand(1).getReg();
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
return false; // We expect the fpr regbank case to be imported.
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.getSizeInBits() == 16)
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
else if (SrcTy.getSizeInBits() == 8)
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
else
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI); return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND: case TargetOpcode::G_INTRINSIC_ROUND:

View File

@ -223,6 +223,29 @@ body: |
$q0 = COPY %dup(<8 x s16>) $q0 = COPY %dup(<8 x s16>)
RET_ReallyLR implicit $q0 RET_ReallyLR implicit $q0
...
---
name: DUPv8i16gpr_s16_src
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; Checks that we can still select the gpr variant if the scalar is an s16.
; CHECK-LABEL: name: DUPv8i16gpr_s16_src
; CHECK: liveins: $w0
; CHECK: %copy:gpr32 = COPY $w0
; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
; CHECK: $q0 = COPY %dup
; CHECK: RET_ReallyLR implicit $q0
%copy:gpr(s32) = COPY $w0
%trunc:gpr(s16) = G_TRUNC %copy
%dup:fpr(<8 x s16>) = G_DUP %trunc(s16)
$q0 = COPY %dup(<8 x s16>)
RET_ReallyLR implicit $q0
... ...
--- ---
name: DUPv8i8gpr name: DUPv8i8gpr
@ -264,3 +287,26 @@ body: |
%dup:fpr(<16 x s8>) = G_DUP %copy(s32) %dup:fpr(<16 x s8>) = G_DUP %copy(s32)
$q0 = COPY %dup(<16 x s8>) $q0 = COPY %dup(<16 x s8>)
RET_ReallyLR implicit $q0 RET_ReallyLR implicit $q0
...
---
name: DUPv16i8gpr_s8_src
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; Check we still select the gpr variant when scalar is an s8.
; CHECK-LABEL: name: DUPv16i8gpr_s8_src
; CHECK: liveins: $w0
; CHECK: %copy:gpr32 = COPY $w0
; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
; CHECK: $q0 = COPY %dup
; CHECK: RET_ReallyLR implicit $q0
%copy:gpr(s32) = COPY $w0
%trunc:gpr(s8) = G_TRUNC %copy
%dup:fpr(<16 x s8>) = G_DUP %trunc(s8)
$q0 = COPY %dup(<16 x s8>)
RET_ReallyLR implicit $q0
...