forked from OSchip/llvm-project
[InstSimplify] Properly constrain {insert,extract}_subvector intrinsic fold
The previous rule: (insert_vector _, (extract_vector X, 0), 0) -> X is not quite correct. The correct fold should be: (insert_vector Y, (extract_vector X, 0), 0) -> X where: Y is X, or Y is undef This commit updates the pattern. Reviewed By: peterwaller-arm, paulwalker-arm Differential Revision: https://reviews.llvm.org/D102699
This commit is contained in:
parent
35490329cb
commit
5a476987f7
|
@ -5734,16 +5734,19 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
|
|||
return nullptr;
|
||||
}
|
||||
case Intrinsic::experimental_vector_insert: {
|
||||
Value *Vec = Call->getArgOperand(0);
|
||||
Value *SubVec = Call->getArgOperand(1);
|
||||
Value *Idx = Call->getArgOperand(2);
|
||||
Type *ReturnType = F->getReturnType();
|
||||
|
||||
// (insert_vector _, (extract_vector X, 0), 0) -> X
|
||||
// (insert_vector Y, (extract_vector X, 0), 0) -> X
|
||||
// where: Y is X, or Y is undef
|
||||
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
Value *X = nullptr;
|
||||
if (match(SubVec, m_Intrinsic<Intrinsic::experimental_vector_extract>(
|
||||
m_Value(X), m_Zero())) &&
|
||||
IdxN == 0 && X->getType() == ReturnType)
|
||||
(Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 &&
|
||||
X->getType() == ReturnType)
|
||||
return X;
|
||||
|
||||
return nullptr;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
|
||||
define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i8> %x) {
|
||||
; CHECK-LABEL: @redundant_extract_insert_chain(
|
||||
; CHECK-NEXT: ret <vscale x 16 x i8> [[X:%.*]]
|
||||
;
|
||||
|
@ -10,8 +10,8 @@ define <vscale x 16 x i8> @redundant_extract_insert_chain(<vscale x 16 x i1> %pg
|
|||
ret <vscale x 16 x i8> %inserted
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @non_redundant_extract_insert_chain(<vscale x 16 x i1> %pg, <vscale x 32 x i8> %x) {
|
||||
; CHECK-LABEL: @non_redundant_extract_insert_chain(
|
||||
define <vscale x 16 x i8> @non_redundant_extract_insert_chain_0(<vscale x 32 x i8> %x) {
|
||||
; CHECK-LABEL: @non_redundant_extract_insert_chain_0(
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv32i8(<vscale x 32 x i8> [[X:%.*]], i64 0)
|
||||
; CHECK-NEXT: [[INSERTED:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> undef, <32 x i8> [[EXTRACTED]], i64 0)
|
||||
; CHECK-NEXT: ret <vscale x 16 x i8> [[INSERTED]]
|
||||
|
@ -21,6 +21,17 @@ define <vscale x 16 x i8> @non_redundant_extract_insert_chain(<vscale x 16 x i1>
|
|||
ret <vscale x 16 x i8> %inserted
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @non_redundant_extract_insert_chain_1(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
|
||||
; CHECK-LABEL: @non_redundant_extract_insert_chain_1(
|
||||
; CHECK-NEXT: [[EXTRACTED:%.*]] = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8> [[X:%.*]], i64 0)
|
||||
; CHECK-NEXT: [[INSERTED:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> [[Y:%.*]], <32 x i8> [[EXTRACTED]], i64 0)
|
||||
; CHECK-NEXT: ret <vscale x 16 x i8> [[INSERTED]]
|
||||
;
|
||||
%extracted = call <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8> %x, i64 0)
|
||||
%inserted = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8> %y, <32 x i8> %extracted, i64 0)
|
||||
ret <vscale x 16 x i8> %inserted
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv16i8(<vscale x 16 x i8>, i64)
|
||||
declare <32 x i8> @llvm.experimental.vector.extract.v32i8.nxv32i8(<vscale x 32 x i8>, i64)
|
||||
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v32i8(<vscale x 16 x i8>, <32 x i8>, i64)
|
||||
|
|
Loading…
Reference in New Issue