[GlobalISel] Add support for lowering of vector G_SELECT and use for AArch64.

The lowering is a port of the SDAG expansion.

Differential Revision: https://reviews.llvm.org/D88364
This commit is contained in:
Amara Emerson 2020-09-26 10:02:39 -07:00
parent 25affb04aa
commit 082321909e
6 changed files with 102 additions and 46 deletions

View File

@ -368,6 +368,8 @@ public:
LegalizeResult lowerBitreverse(MachineInstr &MI);
LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
LegalizeResult lowerSelect(MachineInstr &MI);
};
/// Helper function that creates a libcall to the given \p Name using the given

View File

@ -3110,6 +3110,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
MI.eraseFromParent();
return Legalized;
}
case G_SELECT:
return lowerSelect(MI);
}
}
@ -6176,3 +6178,26 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
Register DstReg = MI.getOperand(0).getReg();
Register MaskReg = MI.getOperand(1).getReg();
Register Op1Reg = MI.getOperand(2).getReg();
Register Op2Reg = MI.getOperand(3).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT MaskTy = MRI.getType(MaskReg);
LLT Op1Ty = MRI.getType(Op1Reg);
if (!DstTy.isVector())
return UnableToLegalize;
if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits())
return UnableToLegalize;
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
MI.eraseFromParent();
return Legalized;
}

View File

@ -426,14 +426,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
// Select
// FIXME: We can probably do a bit better than just scalarizing vector
// selects.
getActionDefinitionsBuilder(G_SELECT)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.scalarize(0);
.minScalarEltSameAsIf(isVector(0), 1, 0)
.lowerIf(isVector(0));
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});

View File

@ -16,15 +16,18 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(sgt), [[COPY]](<2 x s64>), [[BUILD_VECTOR]]
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ICMP]](<2 x s64>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s64)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]]
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
; CHECK: $q0 = COPY [[BUILD_VECTOR1]](<2 x s64>)
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY [[ICMP]](<2 x s64>)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
; CHECK: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s64>)
; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s64>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s64>)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
; CHECK: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
; CHECK: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[ASHR]]
; CHECK: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY]], [[XOR]]
; CHECK: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
; CHECK: $q0 = COPY [[OR]](<2 x s64>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
@ -51,15 +54,18 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(sgt), [[COPY]](<2 x s32>), [[BUILD_VECTOR]]
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]]
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]]
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; CHECK: $d0 = COPY [[BUILD_VECTOR1]](<2 x s32>)
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[ICMP]](<2 x s32>)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s32>)
; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s32>)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[ASHR]]
; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[XOR]]
; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
; CHECK: $d0 = COPY [[OR]](<2 x s32>)
; CHECK: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
@ -71,3 +77,40 @@ body: |
RET_ReallyLR implicit $d0
...
---
name: v16s8
alignment: 4
tracksRegLiveness: true
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: v16s8
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
; CHECK: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(sgt), [[COPY]](<16 x s8>), [[BUILD_VECTOR]]
; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s8>) = COPY [[ICMP]](<16 x s8>)
; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
; CHECK: [[SHL:%[0-9]+]]:_(<16 x s8>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<16 x s8>)
; CHECK: [[ASHR:%[0-9]+]]:_(<16 x s8>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<16 x s8>)
; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
; CHECK: [[XOR:%[0-9]+]]:_(<16 x s8>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
; CHECK: [[AND:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY1]], [[ASHR]]
; CHECK: [[AND1:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY]], [[XOR]]
; CHECK: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[AND]], [[AND1]]
; CHECK: $q0 = COPY [[OR]](<16 x s8>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<16 x s8>) = COPY $q0
%1:_(<16 x s8>) = COPY $q1
%3:_(s8) = G_CONSTANT i8 0
%2:_(<16 x s8>) = G_BUILD_VECTOR %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
%4:_(<16 x s1>) = G_ICMP intpred(sgt), %0(<16 x s8>), %2
%5:_(<16 x s8>) = G_SELECT %4(<16 x s1>), %1, %0
$q0 = COPY %5(<16 x s8>)
RET_ReallyLR implicit $q0
...

View File

@ -288,8 +288,8 @@
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_UADDO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 2, OK

View File

@ -958,8 +958,7 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
; GISEL: neg.2s
; GISEL: cmge.2s
; GISEL: fcsel
; GISEL: fcsel
; GISEL: bif.8b
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sge <2 x i32> %a, zeroinitializer
%abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
@ -974,10 +973,7 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
; For GlobalISel, this generates terrible code until we can pattern match this to abs.
; GISEL-DAG: neg.4h
; GISEL-DAG: cmgt.4h
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: bif.8b
%tmp1neg = sub <4 x i16> zeroinitializer, %a
%b = icmp sgt <4 x i16> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
@ -1000,10 +996,7 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
; DAG-NEXT: ret
; GISEL: cmge.4s
; GISEL: fcsel
; GISEL: fcsel
; GISEL: fcsel
; GISEL: fcsel
; GISEL: bif.16b
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sge <4 x i32> %a, zeroinitializer
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
@ -1017,14 +1010,7 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
; GISEL-DAG: cmgt.8h
; GISEL-DAG: neg.8h
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: csel
; GISEL: bif.16b
%tmp1neg = sub <8 x i16> zeroinitializer, %a
%b = icmp sgt <8 x i16> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
@ -1033,8 +1019,11 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
; CHECK-LABEL: abspattern6:
; CHECK: abs.16b
; CHECK-NEXT: ret
; DAG: abs.16b
; DAG-NEXT: ret
; GISEL: cmgt.16b
; GISEL: bit.16b
%tmp1neg = sub <16 x i8> zeroinitializer, %a
%b = icmp slt <16 x i8> %a, zeroinitializer
%abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
@ -1048,8 +1037,7 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
; GISEL: neg.2d
; GISEL: cmge.2d
; GISEL: fcsel
; GISEL: fcsel
; GISEL: bit.16b
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sle <2 x i64> %a, zeroinitializer
%abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a