forked from OSchip/llvm-project
[GlobalISel] Add support for lowering of vector G_SELECT and use for AArch64.
The lowering is a port of the SDAG expansion. Differential Revision: https://reviews.llvm.org/D88364
This commit is contained in:
parent
25affb04aa
commit
082321909e
|
@ -368,6 +368,8 @@ public:
|
|||
LegalizeResult lowerBitreverse(MachineInstr &MI);
|
||||
LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
|
||||
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
|
||||
LegalizeResult lowerSelect(MachineInstr &MI);
|
||||
|
||||
};
|
||||
|
||||
/// Helper function that creates a libcall to the given \p Name using the given
|
||||
|
|
|
@ -3110,6 +3110,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
|
|||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
case G_SELECT:
|
||||
return lowerSelect(MI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6176,3 +6178,26 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
|
|||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
|
||||
// Implement vector G_SELECT in terms of XOR, AND, OR.
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register MaskReg = MI.getOperand(1).getReg();
|
||||
Register Op1Reg = MI.getOperand(2).getReg();
|
||||
Register Op2Reg = MI.getOperand(3).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
LLT MaskTy = MRI.getType(MaskReg);
|
||||
LLT Op1Ty = MRI.getType(Op1Reg);
|
||||
if (!DstTy.isVector())
|
||||
return UnableToLegalize;
|
||||
|
||||
if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits())
|
||||
return UnableToLegalize;
|
||||
|
||||
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
|
||||
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
|
||||
auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
|
||||
MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
|
@ -426,14 +426,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||
getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
|
||||
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
|
||||
|
||||
// Select
|
||||
// FIXME: We can probably do a bit better than just scalarizing vector
|
||||
// selects.
|
||||
getActionDefinitionsBuilder(G_SELECT)
|
||||
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
|
||||
.clampScalar(0, s32, s64)
|
||||
.widenScalarToNextPow2(0)
|
||||
.scalarize(0);
|
||||
.minScalarEltSameAsIf(isVector(0), 1, 0)
|
||||
.lowerIf(isVector(0));
|
||||
|
||||
// Pointer-handling
|
||||
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
|
||||
|
|
|
@ -16,15 +16,18 @@ body: |
|
|||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(sgt), [[COPY]](<2 x s64>), [[BUILD_VECTOR]]
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ICMP]](<2 x s64>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s64)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s64)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]]
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64)
|
||||
; CHECK: $q0 = COPY [[BUILD_VECTOR1]](<2 x s64>)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY [[ICMP]](<2 x s64>)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s64>)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s64>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s64>)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
|
||||
; CHECK: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[ASHR]]
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY]], [[XOR]]
|
||||
; CHECK: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
|
||||
; CHECK: $q0 = COPY [[OR]](<2 x s64>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%1:_(<2 x s64>) = COPY $q1
|
||||
|
@ -51,15 +54,18 @@ body: |
|
|||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(sgt), [[COPY]](<2 x s32>), [[BUILD_VECTOR]]
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]]
|
||||
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]]
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
|
||||
; CHECK: $d0 = COPY [[BUILD_VECTOR1]](<2 x s32>)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[ICMP]](<2 x s32>)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s32>)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s32>)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
|
||||
; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[ASHR]]
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[XOR]]
|
||||
; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
|
||||
; CHECK: $d0 = COPY [[OR]](<2 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s32>) = COPY $d0
|
||||
%1:_(<2 x s32>) = COPY $d1
|
||||
|
@ -71,3 +77,40 @@ body: |
|
|||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: v16s8
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0, $q1
|
||||
|
||||
; CHECK-LABEL: name: v16s8
|
||||
; CHECK: liveins: $q0, $q1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
|
||||
; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
|
||||
; CHECK: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(sgt), [[COPY]](<16 x s8>), [[BUILD_VECTOR]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s8>) = COPY [[ICMP]](<16 x s8>)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(<16 x s8>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<16 x s8>)
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(<16 x s8>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<16 x s8>)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(<16 x s8>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]]
|
||||
; CHECK: [[AND:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY1]], [[ASHR]]
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY]], [[XOR]]
|
||||
; CHECK: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[AND]], [[AND1]]
|
||||
; CHECK: $q0 = COPY [[OR]](<16 x s8>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<16 x s8>) = COPY $q0
|
||||
%1:_(<16 x s8>) = COPY $q1
|
||||
%3:_(s8) = G_CONSTANT i8 0
|
||||
%2:_(<16 x s8>) = G_BUILD_VECTOR %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
|
||||
%4:_(<16 x s1>) = G_ICMP intpred(sgt), %0(<16 x s8>), %2
|
||||
%5:_(<16 x s8>) = G_SELECT %4(<16 x s1>), %1, %0
|
||||
$q0 = COPY %5(<16 x s8>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
|
|
|
@ -288,8 +288,8 @@
|
|||
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
|
||||
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
|
||||
# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
|
||||
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: G_UADDO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
||||
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
|
||||
|
|
|
@ -958,8 +958,7 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
|
|||
|
||||
; GISEL: neg.2s
|
||||
; GISEL: cmge.2s
|
||||
; GISEL: fcsel
|
||||
; GISEL: fcsel
|
||||
; GISEL: bif.8b
|
||||
%tmp1neg = sub <2 x i32> zeroinitializer, %a
|
||||
%b = icmp sge <2 x i32> %a, zeroinitializer
|
||||
%abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
|
||||
|
@ -974,10 +973,7 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
|
|||
; For GlobalISel, this generates terrible code until we can pattern match this to abs.
|
||||
; GISEL-DAG: neg.4h
|
||||
; GISEL-DAG: cmgt.4h
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: bif.8b
|
||||
%tmp1neg = sub <4 x i16> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i16> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
|
||||
|
@ -1000,10 +996,7 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
|
|||
; DAG-NEXT: ret
|
||||
|
||||
; GISEL: cmge.4s
|
||||
; GISEL: fcsel
|
||||
; GISEL: fcsel
|
||||
; GISEL: fcsel
|
||||
; GISEL: fcsel
|
||||
; GISEL: bif.16b
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sge <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
|
@ -1017,14 +1010,7 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
|
|||
|
||||
; GISEL-DAG: cmgt.8h
|
||||
; GISEL-DAG: neg.8h
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: csel
|
||||
; GISEL: bif.16b
|
||||
%tmp1neg = sub <8 x i16> zeroinitializer, %a
|
||||
%b = icmp sgt <8 x i16> %a, zeroinitializer
|
||||
%abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
|
||||
|
@ -1033,8 +1019,11 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
|
|||
|
||||
define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
|
||||
; CHECK-LABEL: abspattern6:
|
||||
; CHECK: abs.16b
|
||||
; CHECK-NEXT: ret
|
||||
; DAG: abs.16b
|
||||
; DAG-NEXT: ret
|
||||
|
||||
; GISEL: cmgt.16b
|
||||
; GISEL: bit.16b
|
||||
%tmp1neg = sub <16 x i8> zeroinitializer, %a
|
||||
%b = icmp slt <16 x i8> %a, zeroinitializer
|
||||
%abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
|
||||
|
@ -1048,8 +1037,7 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
|
|||
|
||||
; GISEL: neg.2d
|
||||
; GISEL: cmge.2d
|
||||
; GISEL: fcsel
|
||||
; GISEL: fcsel
|
||||
; GISEL: bit.16b
|
||||
%tmp1neg = sub <2 x i64> zeroinitializer, %a
|
||||
%b = icmp sle <2 x i64> %a, zeroinitializer
|
||||
%abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a
|
||||
|
|
Loading…
Reference in New Issue