forked from OSchip/llvm-project
[AArch64][GlobalISel] Add selection support for v2s32 and v2s64 reductions for FADD/ADD.
We'll need legalizer lower() support for the other types to work. Differential Revision: https://reviews.llvm.org/D89159
This commit is contained in:
parent
53b69820f4
commit
39c05a1a71
|
@ -152,6 +152,7 @@ private:
|
|||
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
|
||||
unsigned emitConstantPoolEntry(const Constant *CPVal,
|
||||
MachineFunction &MF) const;
|
||||
|
@ -2959,11 +2960,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
return selectConcatVectors(I, MRI);
|
||||
case TargetOpcode::G_JUMP_TABLE:
|
||||
return selectJumpTable(I, MRI);
|
||||
case TargetOpcode::G_VECREDUCE_FADD:
|
||||
case TargetOpcode::G_VECREDUCE_ADD:
|
||||
return selectReduction(I, MRI);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectReduction(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
Register VecReg = I.getOperand(1).getReg();
|
||||
LLT VecTy = MRI.getType(VecReg);
|
||||
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
|
||||
unsigned Opc = 0;
|
||||
if (VecTy == LLT::vector(16, 8))
|
||||
Opc = AArch64::ADDVv16i8v;
|
||||
else if (VecTy == LLT::vector(8, 16))
|
||||
Opc = AArch64::ADDVv8i16v;
|
||||
else if (VecTy == LLT::vector(4, 32))
|
||||
Opc = AArch64::ADDVv4i32v;
|
||||
else if (VecTy == LLT::vector(2, 64))
|
||||
Opc = AArch64::ADDPv2i64p;
|
||||
else {
|
||||
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
|
||||
return false;
|
||||
}
|
||||
I.setDesc(TII.get(Opc));
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
|
||||
unsigned Opc = 0;
|
||||
if (VecTy == LLT::vector(2, 32))
|
||||
Opc = AArch64::FADDPv2i32p;
|
||||
else if (VecTy == LLT::vector(2, 64))
|
||||
Opc = AArch64::FADDPv2i64p;
|
||||
else {
|
||||
LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
|
||||
return false;
|
||||
}
|
||||
I.setDesc(TII.get(Opc));
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
|
||||
MachineRegisterInfo &MRI) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
|
||||
---
|
||||
name: add_B
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: add_B
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
|
||||
; CHECK: [[ADDVv16i8v:%[0-9]+]]:fpr8 = ADDVv16i8v [[LDRQui]]
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv16i8v]], %subreg.bsub
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: $w0 = COPY [[COPY1]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load 16)
|
||||
%2:fpr(s8) = G_VECREDUCE_ADD %1(<16 x s8>)
|
||||
%4:gpr(s8) = COPY %2(s8)
|
||||
%3:gpr(s32) = G_ANYEXT %4(s8)
|
||||
$w0 = COPY %3(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: add_H
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: add_H
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
|
||||
; CHECK: [[ADDVv8i16v:%[0-9]+]]:fpr16 = ADDVv8i16v [[LDRQui]]
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv8i16v]], %subreg.hsub
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: $w0 = COPY [[COPY1]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load 16)
|
||||
%2:fpr(s16) = G_VECREDUCE_ADD %1(<8 x s16>)
|
||||
%4:gpr(s16) = COPY %2(s16)
|
||||
%3:gpr(s32) = G_ANYEXT %4(s16)
|
||||
$w0 = COPY %3(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: add_S
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: add_S
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
|
||||
; CHECK: [[ADDVv4i32v:%[0-9]+]]:fpr32 = ADDVv4i32v [[LDRQui]]
|
||||
; CHECK: $w0 = COPY [[ADDVv4i32v]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load 16)
|
||||
%2:fpr(s32) = G_VECREDUCE_ADD %1(<4 x s32>)
|
||||
$w0 = COPY %2(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: add_D
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$x0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: add_D
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
|
||||
; CHECK: [[ADDPv2i64p:%[0-9]+]]:fpr64 = ADDPv2i64p [[LDRQui]]
|
||||
; CHECK: $x0 = COPY [[ADDPv2i64p]]
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16)
|
||||
%2:fpr(s64) = G_VECREDUCE_ADD %1(<2 x s64>)
|
||||
$x0 = COPY %2(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
|
@ -0,0 +1,44 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
|
||||
---
|
||||
name: fadd_v2s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: fadd_v2s32
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
|
||||
; CHECK: $w0 = COPY [[FADDPv2i32p]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%0:fpr(<2 x s32>) = COPY $d0
|
||||
%1:fpr(s32) = G_VECREDUCE_FADD %0(<2 x s32>)
|
||||
$w0 = COPY %1(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: fadd_v2s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: fadd_v2s64
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
|
||||
; CHECK: $x0 = COPY [[FADDPv2i64p]]
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%0:fpr(<2 x s64>) = COPY $q0
|
||||
%2:fpr(s64) = G_VECREDUCE_FADD %0(<2 x s64>)
|
||||
$x0 = COPY %2(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
|
@ -142,7 +142,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
|
|||
}
|
||||
|
||||
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
|
||||
declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
|
||||
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
|
||||
|
||||
define i16 @uabd16b_rdx(<16 x i8>* %a, <16 x i8>* %b) {
|
||||
; CHECK-LABEL: uabd16b_rdx
|
||||
|
@ -168,7 +168,7 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
|
|||
%abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
|
||||
%ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
|
||||
%absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
|
||||
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
|
||||
%reduced_v = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %absel)
|
||||
ret i32 %reduced_v
|
||||
}
|
||||
|
||||
|
@ -181,13 +181,13 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
|
|||
%abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
|
||||
%ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
|
||||
%absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
|
||||
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
|
||||
%reduced_v = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %absel)
|
||||
ret i32 %reduced_v
|
||||
}
|
||||
|
||||
|
||||
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
|
||||
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
|
||||
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
|
||||
|
||||
define i32 @uabd8h_rdx(<8 x i16>* %a, <8 x i16>* %b) {
|
||||
; CHECK-LABEL: uabd8h_rdx
|
||||
|
@ -219,19 +219,22 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
|
|||
|
||||
define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
|
||||
; CHECK-LABEL: uabdl4s_rdx_i32
|
||||
; CHECK: uabdl.4s
|
||||
; DAG: uabdl.4s
|
||||
|
||||
; GISel doesn't match this pattern yet.
|
||||
; GISEL: addv.4s
|
||||
%aext = zext <4 x i16> %a to <4 x i32>
|
||||
%bext = zext <4 x i16> %b to <4 x i32>
|
||||
%abdiff = sub nsw <4 x i32> %aext, %bext
|
||||
%abcmp = icmp slt <4 x i32> %abdiff, zeroinitializer
|
||||
%ababs = sub nsw <4 x i32> zeroinitializer, %abdiff
|
||||
%absel = select <4 x i1> %abcmp, <4 x i32> %ababs, <4 x i32> %abdiff
|
||||
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %absel)
|
||||
%reduced_v = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %absel)
|
||||
ret i32 %reduced_v
|
||||
}
|
||||
|
||||
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
|
||||
declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
|
||||
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
|
||||
|
||||
define i64 @uabd4s_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
|
||||
; CHECK: uabd4s_rdx
|
||||
|
@ -263,14 +266,17 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
|
|||
|
||||
define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK-LABEL: uabdl2d_rdx_i64
|
||||
; CHECK: uabdl.2d
|
||||
; DAG: uabdl.2d
|
||||
|
||||
; GISel doesn't match this pattern yet
|
||||
; GISEL: addp.2d
|
||||
%aext = zext <2 x i32> %a to <2 x i64>
|
||||
%bext = zext <2 x i32> %b to <2 x i64>
|
||||
%abdiff = sub nsw <2 x i64> %aext, %bext
|
||||
%abcmp = icmp slt <2 x i64> %abdiff, zeroinitializer
|
||||
%ababs = sub nsw <2 x i64> zeroinitializer, %abdiff
|
||||
%absel = select <2 x i1> %abcmp, <2 x i64> %ababs, <2 x i64> %abdiff
|
||||
%reduced_v = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %absel)
|
||||
%reduced_v = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %absel)
|
||||
ret i64 %reduced_v
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue