forked from OSchip/llvm-project
[AArch64][GlobalISel] Fix manual selection for v4s16 and v8s8 G_DUP
The manual G_DUP selection code would produce DUPv16i8 for v8s8s and DUPv8i16 for v4s16. This adds the missing cases to the manual selection code, and makes it return false when there is an unexpected size. Update select-dup.mir to reflect the change. Differential Revision: https://reviews.llvm.org/D97240
This commit is contained in:
parent
086670d367
commit
e339bba637
|
@ -3194,14 +3194,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
// difficult because at RBS we may end up pessimizing the fpr case if we
|
||||
// decided to add an anyextend to fix this. Manual selection is the most
|
||||
// robust solution for now.
|
||||
Register SrcReg = I.getOperand(1).getReg();
|
||||
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
|
||||
if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
|
||||
AArch64::GPRRegBankID)
|
||||
return false; // We expect the fpr regbank case to be imported.
|
||||
LLT SrcTy = MRI.getType(SrcReg);
|
||||
if (SrcTy.getSizeInBits() == 16)
|
||||
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
|
||||
else if (SrcTy.getSizeInBits() == 8)
|
||||
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
|
||||
if (VecTy == LLT::vector(8, 8))
|
||||
I.setDesc(TII.get(AArch64::DUPv8i8gpr));
|
||||
else if (VecTy == LLT::vector(16, 8))
|
||||
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
|
||||
else if (VecTy == LLT::vector(4, 16))
|
||||
I.setDesc(TII.get(AArch64::DUPv4i16gpr));
|
||||
else if (VecTy == LLT::vector(8, 16))
|
||||
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
|
||||
else
|
||||
return false;
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
|
|
|
@ -246,6 +246,28 @@ body: |
|
|||
$q0 = COPY %dup(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv4s16gpr_s16_src
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: DUPv4s16gpr_s16_src
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: %dup:fpr64 = DUPv4i16gpr %copy
|
||||
; CHECK: $d0 = COPY %dup
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%trunc:gpr(s16) = G_TRUNC %copy
|
||||
%dup:fpr(<4 x s16>) = G_DUP %trunc(s16)
|
||||
$d0 = COPY %dup(<4 x s16>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv8i8gpr
|
||||
|
@ -267,6 +289,28 @@ body: |
|
|||
$d0 = COPY %dup(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv8i8gpr_s8_src
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: DUPv8i8gpr_s8_src
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: %dup:fpr64 = DUPv8i8gpr %copy
|
||||
; CHECK: $d0 = COPY %dup
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%trunc:gpr(s8) = G_TRUNC %copy(s32)
|
||||
%dup:fpr(<8 x s8>) = G_DUP %trunc(s8)
|
||||
$d0 = COPY %dup(<8 x s8>)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: DUPv16i8gpr
|
||||
|
|
Loading…
Reference in New Issue