forked from OSchip/llvm-project
[CodeGen][AArch64][SVE] Canonicalize intrinsic rdffr{ => _z}
Follow up to D101357 / 3fa6510f6
.
Supersedes D102330.
Goal: Use flags setting rdffrs instead of rdffr + ptest.
Problem: RDFFR_P doesn't have have a flags setting equivalent.
Solution: in instcombine, canonicalize to RDFFR_PP at the IR level, and
rely on RDFFR_PP+PTEST => RDFFRS_PP optimization in
AArch64InstrInfo::optimizePTestInstr.
While here:
* Test that rdffr.z+ptest generates a rdffrs.
* Use update_{test,llc}_checks.py on the tests.
* Use sve attribute on functions.
Differential Revision: https://reviews.llvm.org/D102623
This commit is contained in:
parent
f34311c402
commit
2d574a1104
|
@ -7,7 +7,8 @@
|
||||||
svbool_t test_svrdffr()
|
svbool_t test_svrdffr()
|
||||||
{
|
{
|
||||||
// CHECK-LABEL: test_svrdffr
|
// CHECK-LABEL: test_svrdffr
|
||||||
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>
|
||||||
|
// CHECK-NOT: rdffr
|
||||||
// CHECK: ret <vscale x 16 x i1> %[[INTRINSIC]]
|
// CHECK: ret <vscale x 16 x i1> %[[INTRINSIC]]
|
||||||
return svrdffr();
|
return svrdffr();
|
||||||
}
|
}
|
||||||
|
|
|
@ -470,6 +470,23 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
||||||
return IC.replaceInstUsesWith(II, Extract);
|
return IC.replaceInstUsesWith(II, Extract);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
|
||||||
|
IntrinsicInst &II) {
|
||||||
|
LLVMContext &Ctx = II.getContext();
|
||||||
|
IRBuilder<> Builder(Ctx);
|
||||||
|
Builder.SetInsertPoint(&II);
|
||||||
|
// Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr
|
||||||
|
// can work with RDFFR_PP for ptest elimination.
|
||||||
|
auto *AllPat =
|
||||||
|
ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
|
||||||
|
auto *PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
|
||||||
|
{II.getType()}, {AllPat});
|
||||||
|
auto *RDFFR =
|
||||||
|
Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue});
|
||||||
|
RDFFR->takeName(&II);
|
||||||
|
return IC.replaceInstUsesWith(II, RDFFR);
|
||||||
|
}
|
||||||
|
|
||||||
Optional<Instruction *>
|
Optional<Instruction *>
|
||||||
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||||
IntrinsicInst &II) const {
|
IntrinsicInst &II) const {
|
||||||
|
@ -481,6 +498,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||||
return instCombineConvertFromSVBool(IC, II);
|
return instCombineConvertFromSVBool(IC, II);
|
||||||
case Intrinsic::aarch64_sve_dup:
|
case Intrinsic::aarch64_sve_dup:
|
||||||
return instCombineSVEDup(IC, II);
|
return instCombineSVEDup(IC, II);
|
||||||
|
case Intrinsic::aarch64_sve_rdffr:
|
||||||
|
return instCombineRDFFR(IC, II);
|
||||||
case Intrinsic::aarch64_sve_lasta:
|
case Intrinsic::aarch64_sve_lasta:
|
||||||
case Intrinsic::aarch64_sve_lastb:
|
case Intrinsic::aarch64_sve_lastb:
|
||||||
return instCombineSVELast(IC, II);
|
return instCombineSVELast(IC, II);
|
||||||
|
|
|
@ -1,33 +1,51 @@
|
||||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc < %s | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
;
|
;
|
||||||
; RDFFR
|
; RDFFR
|
||||||
;
|
;
|
||||||
|
|
||||||
define <vscale x 16 x i1> @rdffr() {
|
define <vscale x 16 x i1> @rdffr() #0 {
|
||||||
; CHECK-LABEL: rdffr:
|
; CHECK-LABEL: rdffr:
|
||||||
; CHECK: rdffr p0.b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rdffr p0.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
||||||
ret <vscale x 16 x i1> %out
|
ret <vscale x 16 x i1> %out
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) {
|
define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) #0 {
|
||||||
; CHECK-LABEL: rdffr_z:
|
; CHECK-LABEL: rdffr_z:
|
||||||
; CHECK: rdffr p0.b, p0/z
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: rdffr p0.b, p0/z
|
||||||
|
; CHECK-NEXT: ret
|
||||||
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
|
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
|
||||||
ret <vscale x 16 x i1> %out
|
ret <vscale x 16 x i1> %out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test that rdffr.z followed by ptest optimizes to flags-setting rdffrs.
|
||||||
|
define i1 @rdffr_z_ptest(<vscale x 16 x i1> %pg) #0 {
|
||||||
|
; CHECK-LABEL: rdffr_z_ptest:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: rdffrs p0.b, p0/z
|
||||||
|
; CHECK-NEXT: cset w0, ne
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%rdffr = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
|
||||||
|
%out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %rdffr)
|
||||||
|
ret i1 %out
|
||||||
|
}
|
||||||
|
|
||||||
;
|
;
|
||||||
; SETFFR
|
; SETFFR
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @set_ffr() {
|
define void @set_ffr() #0 {
|
||||||
; CHECK-LABEL: set_ffr:
|
; CHECK-LABEL: set_ffr:
|
||||||
; CHECK: setffr
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: setffr
|
||||||
|
; CHECK-NEXT: ret
|
||||||
call void @llvm.aarch64.sve.setffr()
|
call void @llvm.aarch64.sve.setffr()
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -36,10 +54,11 @@ define void @set_ffr() {
|
||||||
; WRFFR
|
; WRFFR
|
||||||
;
|
;
|
||||||
|
|
||||||
define void @wrffr(<vscale x 16 x i1> %a) {
|
define void @wrffr(<vscale x 16 x i1> %a) #0 {
|
||||||
; CHECK-LABEL: wrffr:
|
; CHECK-LABEL: wrffr:
|
||||||
; CHECK: wrffr p0.b
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: wrffr p0.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
call void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1> %a)
|
call void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1> %a)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
@ -48,3 +67,7 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
||||||
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>)
|
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>)
|
||||||
declare void @llvm.aarch64.sve.setffr()
|
declare void @llvm.aarch64.sve.setffr()
|
||||||
declare void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1>)
|
declare void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1>)
|
||||||
|
|
||||||
|
declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+sve" }
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
; Test that rdffr is substituted with predicated form which enables ptest optimization later.
|
||||||
|
define <vscale x 16 x i1> @predicate_rdffr() #0 {
|
||||||
|
; CHECK-LABEL: @predicate_rdffr(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
|
||||||
|
; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP1]])
|
||||||
|
; CHECK-NEXT: ret <vscale x 16 x i1> [[OUT]]
|
||||||
|
;
|
||||||
|
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
||||||
|
ret <vscale x 16 x i1> %out
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue