forked from OSchip/llvm-project
[AArch64][GlobalISel] Manually select G_DUP with s8/s16 gpr scalar operands.
These don't get selected by the imported patterns, and avoiding generating them is a whole load of not-worth-it-hassle (until we have fp types in GlobalISel).
This commit is contained in:
parent
ade6fa46f9
commit
f7b36b35b6
|
@ -2971,6 +2971,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
||||
}
|
||||
}
|
||||
case AArch64::G_DUP: {
|
||||
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
|
||||
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
|
||||
// difficult because at RBS we may end up pessimizing the fpr case if we
|
||||
// decided to add an anyextend to fix this. Manual selection is the most
|
||||
// robust solution for now.
|
||||
Register SrcReg = I.getOperand(1).getReg();
|
||||
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
|
||||
return false; // We expect the fpr regbank case to be imported.
|
||||
LLT SrcTy = MRI.getType(SrcReg);
|
||||
if (SrcTy.getSizeInBits() == 16)
|
||||
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
|
||||
else if (SrcTy.getSizeInBits() == 8)
|
||||
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
|
||||
else
|
||||
return false;
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
}
|
||||
case TargetOpcode::G_INTRINSIC_TRUNC:
|
||||
return selectIntrinsicTrunc(I, MRI);
|
||||
case TargetOpcode::G_INTRINSIC_ROUND:
|
||||
|
|
|
@ -223,6 +223,29 @@ body: |
|
|||
$q0 = COPY %dup(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv8i16gpr_s16_src
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; Checks that we can still select the gpr variant if the scalar is an s16.
|
||||
; CHECK-LABEL: name: DUPv8i16gpr_s16_src
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
|
||||
; CHECK: $q0 = COPY %dup
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%trunc:gpr(s16) = G_TRUNC %copy
|
||||
%dup:fpr(<8 x s16>) = G_DUP %trunc(s16)
|
||||
$q0 = COPY %dup(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv8i8gpr
|
||||
|
@ -264,3 +287,26 @@ body: |
|
|||
%dup:fpr(<16 x s8>) = G_DUP %copy(s32)
|
||||
$q0 = COPY %dup(<16 x s8>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: DUPv16i8gpr_s8_src
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; Check we still select the gpr variant when scalar is an s8.
|
||||
; CHECK-LABEL: name: DUPv16i8gpr_s8_src
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
|
||||
; CHECK: $q0 = COPY %dup
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%trunc:gpr(s8) = G_TRUNC %copy
|
||||
%dup:fpr(<16 x s8>) = G_DUP %trunc(s8)
|
||||
$q0 = COPY %dup(<16 x s8>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue