[ARM] Constant fold VCTP intrinsics

We can sometimes get into the situation where the operand to a vctp
intrinsic becomes constant, such as after a loop is fully unrolled. This
adds the constant folding needed for them, allowing them to simplify
away and hopefully simplifying remaining instructions.

Differential Revision: https://reviews.llvm.org/D84110
This commit is contained in:
David Green 2020-07-21 08:24:37 +01:00
parent e37b220442
commit becaa6803a
3 changed files with 303 additions and 1 deletions

View File

@ -41,6 +41,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@ -1456,6 +1457,11 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_umax:
// Target intrinsics
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64:
return true;
// Floating point operations cannot be folded in strictfp functions in
@ -2719,7 +2725,8 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = FVTy->getElementType();
if (IntrinsicID == Intrinsic::masked_load) {
switch (IntrinsicID) {
case Intrinsic::masked_load: {
auto *SrcPtr = Operands[0];
auto *Mask = Operands[2];
auto *Passthru = Operands[3];
@ -2757,6 +2764,32 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
return nullptr;
return ConstantVector::get(NewElements);
}
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64: {
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
unsigned Lanes = FVTy->getNumElements();
uint64_t Limit = Op->getZExtValue();
// vctp64 are currently modelled as returning a v4i1, not a v2i1. Make
// sure we get the limit right in that case and set all relevant lanes.
if (IntrinsicID == Intrinsic::arm_mve_vctp64)
Limit *= 2;
SmallVector<Constant *, 16> NCs;
for (unsigned i = 0; i < Lanes; i++) {
if (i < Limit)
NCs.push_back(ConstantInt::getTrue(Ty));
else
NCs.push_back(ConstantInt::getFalse(Ty));
}
return ConstantVector::get(NCs);
}
break;
}
default:
break;
}
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.

View File

@ -0,0 +1,2 @@
if not 'ARM' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,267 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instsimplify -S -o - %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
define <16 x i1> @vctp8_0() {
; CHECK-LABEL: @vctp8_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> zeroinitializer
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_1() {
; CHECK-LABEL: @vctp8_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_8() {
; CHECK-LABEL: @vctp8_8(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_15() {
; CHECK-LABEL: @vctp8_15(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_16() {
; CHECK-LABEL: @vctp8_16(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_100() {
; CHECK-LABEL: @vctp8_100(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100)
ret <16 x i1> %int
}
define <16 x i1> @vctp8_m1() {
; CHECK-LABEL: @vctp8_m1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1)
ret <16 x i1> %int
}
define <8 x i1> @vctp16_0() {
; CHECK-LABEL: @vctp16_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> zeroinitializer
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_1() {
; CHECK-LABEL: @vctp16_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_4() {
; CHECK-LABEL: @vctp16_4(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_7() {
; CHECK-LABEL: @vctp16_7(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_8() {
; CHECK-LABEL: @vctp16_8(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_100() {
; CHECK-LABEL: @vctp16_100(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100)
ret <8 x i1> %int
}
define <8 x i1> @vctp16_m1() {
; CHECK-LABEL: @vctp16_m1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1)
ret <8 x i1> %int
}
define <4 x i1> @vctp32_0() {
; CHECK-LABEL: @vctp32_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> zeroinitializer
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0)
ret <4 x i1> %int
}
define <4 x i1> @vctp32_1() {
; CHECK-LABEL: @vctp32_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1)
ret <4 x i1> %int
}
define <4 x i1> @vctp32_3() {
; CHECK-LABEL: @vctp32_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3)
ret <4 x i1> %int
}
define <4 x i1> @vctp32_4() {
; CHECK-LABEL: @vctp32_4(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4)
ret <4 x i1> %int
}
define <4 x i1> @vctp32_100() {
; CHECK-LABEL: @vctp32_100(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100)
ret <4 x i1> %int
}
define <4 x i1> @vctp32_m1() {
; CHECK-LABEL: @vctp32_m1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1)
ret <4 x i1> %int
}
define <4 x i1> @vctp64_0() {
; CHECK-LABEL: @vctp64_0(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> zeroinitializer
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0)
ret <4 x i1> %int
}
define <4 x i1> @vctp64_1() {
; CHECK-LABEL: @vctp64_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 false, i1 false>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1)
ret <4 x i1> %int
}
define <4 x i1> @vctp64_2() {
; CHECK-LABEL: @vctp64_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2)
ret <4 x i1> %int
}
define <4 x i1> @vctp64_100() {
; CHECK-LABEL: @vctp64_100(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100)
ret <4 x i1> %int
}
define <4 x i1> @vctp64_m1() {
; CHECK-LABEL: @vctp64_m1(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
;
entry:
%int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1)
ret <4 x i1> %int
}
declare <4 x i1> @llvm.arm.mve.vctp64(i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)