[InstCombine] For vector extract when extract vector and insert value type is the same

This patch has implements these optimizations:

 extract.vector(insert.vector(Vector, Value, Idx), Idx) --> Value

 extract.vector(insert.vector(Vector, Value, InsertIndex), ExtractIndex)
  --> extract.vector(Vector, ExtractIndex)

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D132137
This commit is contained in:
Caroline Concatto 2022-08-19 12:08:14 +01:00
parent e99f6df726
commit 09afe4155b
2 changed files with 69 additions and 1 deletions

View File

@ -2409,7 +2409,31 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
Type *ReturnType = II->getType();
// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
// ExtractIdx)
unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
Value *InsertTuple, *InsertIdx, *InsertValue;
if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
m_Value(InsertValue),
m_Value(InsertIdx))) &&
InsertValue->getType() == ReturnType) {
unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
// Case where we get the same index right after setting it.
// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
// InsertValue
if (ExtractIdx == Index)
return replaceInstUsesWith(CI, InsertValue);
// If we are getting a different index than what was set in the
// insert.vector intrinsic. We can just set the input tuple to the one up
// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
// InsertIndex), ExtractIndex)
// --> extract.vector(InsertTuple, ExtractIndex)
else
return replaceOperand(CI, 0, InsertTuple);
}
auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
// Only canonicalize if the the destination vector and Vec are fixed

View File

@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
; Check that the redundant sequences of extract/insert are eliminated.
; extract.vector(insert.vector(Tuple, Value, Idx), Idx) --> Value
define <vscale x 16 x i8> @test_extract_insert_same_idx(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
; CHECK-LABEL: @test_extract_insert_same_idx(
; CHECK-NEXT: ret <vscale x 16 x i8> [[V1:%.*]]
;
%vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 48)
%vec.ext = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 48)
ret <vscale x 16 x i8> %vec.ext
}
; extract.vector(insert.vector(Vector, Value, InsertIndex), ExtractIndex)
; --> extract.vector(Vector, ExtractIndex)
define <vscale x 16 x i8> @test_extract_insert_dif_idx(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
; CHECK-LABEL: @test_extract_insert_dif_idx(
; CHECK-NEXT: [[VEC_EXT:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V0:%.*]], i64 0)
; CHECK-NEXT: ret <vscale x 16 x i8> [[VEC_EXT]]
;
%vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 48)
%vec.ext = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 0)
ret <vscale x 16 x i8> %vec.ext
}
; Negative test
; The extracted vector-size != inserted vector-size
define <vscale x 32 x i8> @neg_test_extract_insert_same_idx_dif_ret_size(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
; CHECK-LABEL: @neg_test_extract_insert_same_idx_dif_ret_size(
; CHECK-NEXT: [[VEC_INS:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[V0:%.*]], <vscale x 16 x i8> [[V1:%.*]], i64 32)
; CHECK-NEXT: [[VEC_EXT:%.*]] = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8> [[VEC_INS]], i64 32)
; CHECK-NEXT: ret <vscale x 32 x i8> [[VEC_EXT]]
;
%vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 32)
%vec.ext = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 32)
ret <vscale x 32 x i8> %vec.ext
}
declare <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8>, <vscale x 16 x i8>, i64)
declare <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8>, i64)
declare <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8>, i64)