forked from OSchip/llvm-project
[AArch64][GlobalISel] Manually select G_DUP with s8/s16 gpr scalar operands.
These don't get selected by the imported patterns, and avoiding generating them is a whole load of not-worth-it-hassle (until we have fp types in GlobalISel).
This commit is contained in:
parent
ade6fa46f9
commit
f7b36b35b6
|
@ -2971,6 +2971,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
||||||
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case AArch64::G_DUP: {
|
||||||
|
// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
|
||||||
|
// imported patterns. Do it manually here. Avoiding generating s16 gpr is
|
||||||
|
// difficult because at RBS we may end up pessimizing the fpr case if we
|
||||||
|
// decided to add an anyextend to fix this. Manual selection is the most
|
||||||
|
// robust solution for now.
|
||||||
|
Register SrcReg = I.getOperand(1).getReg();
|
||||||
|
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
|
||||||
|
return false; // We expect the fpr regbank case to be imported.
|
||||||
|
LLT SrcTy = MRI.getType(SrcReg);
|
||||||
|
if (SrcTy.getSizeInBits() == 16)
|
||||||
|
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
|
||||||
|
else if (SrcTy.getSizeInBits() == 8)
|
||||||
|
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||||
|
}
|
||||||
case TargetOpcode::G_INTRINSIC_TRUNC:
|
case TargetOpcode::G_INTRINSIC_TRUNC:
|
||||||
return selectIntrinsicTrunc(I, MRI);
|
return selectIntrinsicTrunc(I, MRI);
|
||||||
case TargetOpcode::G_INTRINSIC_ROUND:
|
case TargetOpcode::G_INTRINSIC_ROUND:
|
||||||
|
|
|
@ -223,6 +223,29 @@ body: |
|
||||||
$q0 = COPY %dup(<8 x s16>)
|
$q0 = COPY %dup(<8 x s16>)
|
||||||
RET_ReallyLR implicit $q0
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: DUPv8i16gpr_s16_src
|
||||||
|
alignment: 4
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $w0
|
||||||
|
; Checks that we can still select the gpr variant if the scalar is an s16.
|
||||||
|
; CHECK-LABEL: name: DUPv8i16gpr_s16_src
|
||||||
|
; CHECK: liveins: $w0
|
||||||
|
; CHECK: %copy:gpr32 = COPY $w0
|
||||||
|
; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
|
||||||
|
; CHECK: $q0 = COPY %dup
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%copy:gpr(s32) = COPY $w0
|
||||||
|
%trunc:gpr(s16) = G_TRUNC %copy
|
||||||
|
%dup:fpr(<8 x s16>) = G_DUP %trunc(s16)
|
||||||
|
$q0 = COPY %dup(<8 x s16>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
...
|
...
|
||||||
---
|
---
|
||||||
name: DUPv8i8gpr
|
name: DUPv8i8gpr
|
||||||
|
@ -264,3 +287,26 @@ body: |
|
||||||
%dup:fpr(<16 x s8>) = G_DUP %copy(s32)
|
%dup:fpr(<16 x s8>) = G_DUP %copy(s32)
|
||||||
$q0 = COPY %dup(<16 x s8>)
|
$q0 = COPY %dup(<16 x s8>)
|
||||||
RET_ReallyLR implicit $q0
|
RET_ReallyLR implicit $q0
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: DUPv16i8gpr_s8_src
|
||||||
|
alignment: 4
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $w0
|
||||||
|
; Check we still select the gpr variant when scalar is an s8.
|
||||||
|
; CHECK-LABEL: name: DUPv16i8gpr_s8_src
|
||||||
|
; CHECK: liveins: $w0
|
||||||
|
; CHECK: %copy:gpr32 = COPY $w0
|
||||||
|
; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
|
||||||
|
; CHECK: $q0 = COPY %dup
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%copy:gpr(s32) = COPY $w0
|
||||||
|
%trunc:gpr(s8) = G_TRUNC %copy
|
||||||
|
%dup:fpr(<16 x s8>) = G_DUP %trunc(s8)
|
||||||
|
$q0 = COPY %dup(<16 x s8>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
...
|
||||||
|
|
Loading…
Reference in New Issue