forked from OSchip/llvm-project
[ARM] Constant fold VCTP intrinsics
We can sometimes get into the situation where the operand to a vctp intrinsic becomes constant, such as after a loop is fully unrolled. This adds the constant folding needed for them, allowing them to simplify away and hopefully simplifying remaining instructions. Differential Revision: https://reviews.llvm.org/D84110
This commit is contained in:
parent
e37b220442
commit
becaa6803a
|
@ -41,6 +41,7 @@
|
|||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/IntrinsicsARM.h"
|
||||
#include "llvm/IR/IntrinsicsX86.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
|
@ -1456,6 +1457,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
|||
case Intrinsic::experimental_vector_reduce_smax:
|
||||
case Intrinsic::experimental_vector_reduce_umin:
|
||||
case Intrinsic::experimental_vector_reduce_umax:
|
||||
// Target intrinsics
|
||||
case Intrinsic::arm_mve_vctp8:
|
||||
case Intrinsic::arm_mve_vctp16:
|
||||
case Intrinsic::arm_mve_vctp32:
|
||||
case Intrinsic::arm_mve_vctp64:
|
||||
return true;
|
||||
|
||||
// Floating point operations cannot be folded in strictfp functions in
|
||||
|
@ -2719,7 +2725,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
|
|||
SmallVector<Constant *, 4> Lane(Operands.size());
|
||||
Type *Ty = FVTy->getElementType();
|
||||
|
||||
if (IntrinsicID == Intrinsic::masked_load) {
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::masked_load: {
|
||||
auto *SrcPtr = Operands[0];
|
||||
auto *Mask = Operands[2];
|
||||
auto *Passthru = Operands[3];
|
||||
|
@ -2757,6 +2764,32 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
|
|||
return nullptr;
|
||||
return ConstantVector::get(NewElements);
|
||||
}
|
||||
case Intrinsic::arm_mve_vctp8:
|
||||
case Intrinsic::arm_mve_vctp16:
|
||||
case Intrinsic::arm_mve_vctp32:
|
||||
case Intrinsic::arm_mve_vctp64: {
|
||||
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
|
||||
unsigned Lanes = FVTy->getNumElements();
|
||||
uint64_t Limit = Op->getZExtValue();
|
||||
// vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
|
||||
// sure we get the limit right in that case and set all relevant lanes.
|
||||
if (IntrinsicID == Intrinsic::arm_mve_vctp64)
|
||||
Limit *= 2;
|
||||
|
||||
SmallVector<Constant *, 16> NCs;
|
||||
for (unsigned i = 0; i < Lanes; i++) {
|
||||
if (i < Limit)
|
||||
NCs.push_back(ConstantInt::getTrue(Ty));
|
||||
else
|
||||
NCs.push_back(ConstantInt::getFalse(Ty));
|
||||
}
|
||||
return ConstantVector::get(NCs);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
|
||||
// Gather a column of constants.
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
if not 'ARM' in config.root.targets:
|
||||
config.unsupported = True
|
|
@ -0,0 +1,267 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -instsimplify -S -o - %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
|
||||
define <16 x i1> @vctp8_0() {
|
||||
; CHECK-LABEL: @vctp8_0(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> zeroinitializer
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_1() {
|
||||
; CHECK-LABEL: @vctp8_1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_8() {
|
||||
; CHECK-LABEL: @vctp8_8(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_15() {
|
||||
; CHECK-LABEL: @vctp8_15(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_16() {
|
||||
; CHECK-LABEL: @vctp8_16(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_100() {
|
||||
; CHECK-LABEL: @vctp8_100(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
define <16 x i1> @vctp8_m1() {
|
||||
; CHECK-LABEL: @vctp8_m1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1)
|
||||
ret <16 x i1> %int
|
||||
}
|
||||
|
||||
|
||||
|
||||
define <8 x i1> @vctp16_0() {
|
||||
; CHECK-LABEL: @vctp16_0(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> zeroinitializer
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_1() {
|
||||
; CHECK-LABEL: @vctp16_1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_4() {
|
||||
; CHECK-LABEL: @vctp16_4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_7() {
|
||||
; CHECK-LABEL: @vctp16_7(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_8() {
|
||||
; CHECK-LABEL: @vctp16_8(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_100() {
|
||||
; CHECK-LABEL: @vctp16_100(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
define <8 x i1> @vctp16_m1() {
|
||||
; CHECK-LABEL: @vctp16_m1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1)
|
||||
ret <8 x i1> %int
|
||||
}
|
||||
|
||||
|
||||
|
||||
define <4 x i1> @vctp32_0() {
|
||||
; CHECK-LABEL: @vctp32_0(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> zeroinitializer
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp32_1() {
|
||||
; CHECK-LABEL: @vctp32_1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp32_3() {
|
||||
; CHECK-LABEL: @vctp32_3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp32_4() {
|
||||
; CHECK-LABEL: @vctp32_4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp32_100() {
|
||||
; CHECK-LABEL: @vctp32_100(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp32_m1() {
|
||||
; CHECK-LABEL: @vctp32_m1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
|
||||
|
||||
define <4 x i1> @vctp64_0() {
|
||||
; CHECK-LABEL: @vctp64_0(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> zeroinitializer
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp64_1() {
|
||||
; CHECK-LABEL: @vctp64_1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 false, i1 false>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp64_2() {
|
||||
; CHECK-LABEL: @vctp64_2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp64_100() {
|
||||
; CHECK-LABEL: @vctp64_100(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
define <4 x i1> @vctp64_m1() {
|
||||
; CHECK-LABEL: @vctp64_m1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
|
||||
;
|
||||
entry:
|
||||
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)
|
||||
ret <4 x i1> %int
|
||||
}
|
||||
|
||||
|
||||
|
||||
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
|
||||
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
|
||||
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
|
||||
declare <16 x i1> @llvm.arm.mve.vctp8(i32)
|
Loading…
Reference in New Issue