forked from OSchip/llvm-project
[CodeGen][AArch64] Ensure isSExtCheaperThanZExt returns true for negative constants
When we know the value we're extending is a negative constant then it makes sense to use SIGN_EXTEND because this may improve code quality in some cases, particularly when doing a constant splat of an unpacked vector type. For example, for SVE when splatting the value -1 into all elements of a vector of type <vscale x 2 x i32> the element type will get promoted from i32 -> i64. In this case we want the splat value to sign-extend from (i32 -1) -> (i64 -1), whereas currently it zero-extends from (i32 -1) -> (i64 0xFFFFFFFF). Sign-extending the constant means we can use a single mov immediate instruction. New tests added here: CodeGen/AArch64/sve-vector-splat.ll I believe we see some code quality improvements in these existing tests too: CodeGen/AArch64/reduce-and.ll CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll The apparent regressions in CodeGen/AArch64/fast-isel-cmp-vec.ll only occur because the test disables codegen prepare and branch folding. Differential Revision: https://reviews.llvm.org/D114357
This commit is contained in:
parent
5f2edada68
commit
197f3c0deb
|
@ -2647,9 +2647,9 @@ public:
|
|||
getApproximateEVTForLLT(ToTy, DL, Ctx));
|
||||
}
|
||||
|
||||
/// Return true if sign-extension from FromTy to ToTy is cheaper than
|
||||
/// zero-extension.
|
||||
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
|
||||
/// Return true if sign-extension of value \p V from FromTy to ToTy is
|
||||
/// cheaper than zero-extension, where \p V can be SDValue() if unknown.
|
||||
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy, SDValue V) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -7004,7 +7004,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
|
|||
// matching the argument extension instead.
|
||||
Instruction::CastOps ExtType = Instruction::ZExt;
|
||||
// Some targets prefer SExt over ZExt.
|
||||
if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
|
||||
if (TLI->isSExtCheaperThanZExt(OldVT, RegType, SDValue()))
|
||||
ExtType = Instruction::SExt;
|
||||
|
||||
if (auto *Arg = dyn_cast<Argument>(Cond)) {
|
||||
|
|
|
@ -1704,7 +1704,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &LHS, SDValue &RHS,
|
|||
SDValue OpL = GetPromotedInteger(LHS);
|
||||
SDValue OpR = GetPromotedInteger(RHS);
|
||||
|
||||
if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) {
|
||||
if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType(), LHS)) {
|
||||
// The target would prefer to promote the comparison operand with sign
|
||||
// extension. Honor that unless the promoted values are already zero
|
||||
// extended.
|
||||
|
|
|
@ -283,7 +283,7 @@ private:
|
|||
EVT OldVT = Op.getValueType();
|
||||
SDLoc DL(Op);
|
||||
Op = GetPromotedInteger(Op);
|
||||
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
|
||||
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType(), Op))
|
||||
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
|
||||
DAG.getValueType(OldVT));
|
||||
return DAG.getZeroExtendInReg(Op, DL, OldVT);
|
||||
|
|
|
@ -4765,7 +4765,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
C->isTargetOpcode(), C->isOpaque());
|
||||
case ISD::ANY_EXTEND:
|
||||
// Some targets like RISCV prefer to sign extend some types.
|
||||
if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT))
|
||||
if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT, Operand))
|
||||
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
|
||||
C->isTargetOpcode(), C->isOpaque());
|
||||
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
|
||||
|
|
|
@ -3844,7 +3844,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
|||
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
||||
(Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
|
||||
!isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
|
||||
OpVT)) {
|
||||
OpVT, N0.getOperand(1))) {
|
||||
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
|
||||
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
|
||||
EVT ExtDstTy = N0.getValueType();
|
||||
|
|
|
@ -1138,6 +1138,14 @@ private:
|
|||
|
||||
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
|
||||
LLT Ty2) const override;
|
||||
|
||||
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override {
|
||||
if (!V || SrcVT.getScalarType() == MVT::i1)
|
||||
return false;
|
||||
if (ConstantSDNode *C = isConstOrConstSplat(V))
|
||||
return C->getAPIntValue().isNegative();
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
namespace AArch64 {
|
||||
|
|
|
@ -1198,7 +1198,8 @@ bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
|||
return TargetLowering::isZExtFree(Val, VT2);
|
||||
}
|
||||
|
||||
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
|
||||
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT,
|
||||
SDValue V) const {
|
||||
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
|
||||
}
|
||||
|
||||
|
|
|
@ -326,7 +326,7 @@ public:
|
|||
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
|
||||
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
|
||||
bool isZExtFree(SDValue Val, EVT VT2) const override;
|
||||
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
|
||||
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT, SDValue V) const override;
|
||||
bool isCheapToSpeculateCttz() const override;
|
||||
bool isCheapToSpeculateCtlz() const override;
|
||||
bool hasAndNotCompare(SDValue Y) const override;
|
||||
|
|
|
@ -93,7 +93,7 @@ declare i7 @llvm.fshl.i7(i7, i7, i7)
|
|||
define i7 @fshl_i7_const_fold() {
|
||||
; CHECK-LABEL: fshl_i7_const_fold:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #67
|
||||
; CHECK-NEXT: mov w0, #-61
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
|
||||
ret i7 %f
|
||||
|
@ -102,7 +102,7 @@ define i7 @fshl_i7_const_fold() {
|
|||
define i8 @fshl_i8_const_fold_overshift_1() {
|
||||
; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #128
|
||||
; CHECK-NEXT: mov w0, #-128
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
|
||||
ret i8 %f
|
||||
|
@ -164,7 +164,7 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
|
|||
define i8 @fshl_i8_const_fold() {
|
||||
; CHECK-LABEL: fshl_i8_const_fold:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #128
|
||||
; CHECK-NEXT: mov w0, #-128
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
|
||||
ret i8 %f
|
||||
|
@ -241,7 +241,7 @@ define i7 @fshr_i7_const_fold() {
|
|||
define i8 @fshr_i8_const_fold_overshift_1() {
|
||||
; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #254
|
||||
; CHECK-NEXT: mov w0, #-2
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
|
||||
ret i8 %f
|
||||
|
@ -250,7 +250,7 @@ define i8 @fshr_i8_const_fold_overshift_1() {
|
|||
define i8 @fshr_i8_const_fold_overshift_2() {
|
||||
; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #225
|
||||
; CHECK-NEXT: mov w0, #-31
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
|
||||
ret i8 %f
|
||||
|
@ -259,7 +259,7 @@ define i8 @fshr_i8_const_fold_overshift_2() {
|
|||
define i8 @fshr_i8_const_fold_overshift_3() {
|
||||
; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #255
|
||||
; CHECK-NEXT: mov w0, #-1
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
|
||||
ret i8 %f
|
||||
|
@ -303,7 +303,7 @@ define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
|
|||
define i8 @fshr_i8_const_fold() {
|
||||
; CHECK-LABEL: fshr_i8_const_fold:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, #254
|
||||
; CHECK-NEXT: mov w0, #-2
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
|
||||
ret i8 %f
|
||||
|
|
|
@ -223,8 +223,7 @@ define i8 @test_redand_v3i8(<3 x i8> %a) {
|
|||
; CHECK-LABEL: test_redand_v3i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w8, w0, w1
|
||||
; CHECK-NEXT: and w8, w8, w2
|
||||
; CHECK-NEXT: and w0, w8, #0xff
|
||||
; CHECK-NEXT: and w0, w8, w2
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: test_redand_v3i8:
|
||||
|
|
|
@ -119,8 +119,7 @@ define <vscale x 8 x i8> @sve_splat_8xi8(i8 %val) {
|
|||
define <vscale x 8 x i8> @sve_splat_8xi8_imm() {
|
||||
; CHECK-LABEL: sve_splat_8xi8_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #255
|
||||
; CHECK-NEXT: mov z0.h, w8
|
||||
; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: ret
|
||||
%ins = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
|
||||
%splat = shufflevector <vscale x 8 x i8> %ins, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
|
||||
|
@ -151,8 +150,7 @@ define <vscale x 4 x i16> @sve_splat_4xi16(i16 %val) {
|
|||
define <vscale x 4 x i16> @sve_splat_4xi16_imm() {
|
||||
; CHECK-LABEL: sve_splat_4xi16_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #65535
|
||||
; CHECK-NEXT: mov z0.s, w8
|
||||
; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: ret
|
||||
%ins = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
|
||||
%splat = shufflevector <vscale x 4 x i16> %ins, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
|
||||
|
@ -173,8 +171,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
|
|||
define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
|
||||
; CHECK-LABEL: sve_splat_2xi32_imm:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #-1
|
||||
; CHECK-NEXT: mov z0.d, x8
|
||||
; CHECK-NEXT: mov z0.d, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: ret
|
||||
%ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
|
||||
%splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
|
||||
|
|
|
@ -29,11 +29,7 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
|
|||
define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi d3, #0x0000ff000000ff
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
|
||||
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%mx = and <2 x i8> %x, %mask
|
||||
%notmask = xor <2 x i8> %mask, <i8 -1, i8 -1>
|
||||
|
@ -61,11 +57,7 @@ define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwin
|
|||
define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi d3, #0xff00ff00ff00ff
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
|
||||
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%mx = and <4 x i8> %x, %mask
|
||||
%notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
|
@ -77,11 +69,7 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
|
|||
define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v4i8_undef:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi d3, #0xff00ff00ff00ff
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
|
||||
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%mx = and <4 x i8> %x, %mask
|
||||
%notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
|
||||
|
@ -93,11 +81,7 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
|
|||
define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi d3, #0x00ffff0000ffff
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
|
||||
; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
|
||||
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: ret
|
||||
%mx = and <2 x i16> %x, %mask
|
||||
%notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>
|
||||
|
|
|
@ -86,8 +86,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
|
|||
; CHECK-LABEL: test_v3i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w8, w0, w1
|
||||
; CHECK-NEXT: and w8, w8, w2
|
||||
; CHECK-NEXT: and w0, w8, #0xff
|
||||
; CHECK-NEXT: and w0, w8, w2
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
|
||||
ret i8 %b
|
||||
|
|
Loading…
Reference in New Issue