[InstCombine] For vector extract when extract vector and insert value type is the same

This patch has implements these optimizations: extract.vector(insert.vector(Vector, Value, Idx), Idx) --> Value extract.vector(insert.vector(Vector, Value, InsertIndex), ExtractIndex) --> extract.vector(Vector, ExtractIndex) Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D132137
2022-08-19 12:08:14 +01:00 · 2022-08-19 12:08:14 +01:00 · 09afe4155b
parent e99f6df726
commit 09afe4155b
2 changed files with 69 additions and 1 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -2409,7 +2409,31 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
    Value *Vec = II->getArgOperand(0);
    Value *Idx = II->getArgOperand(1);

-    auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
+    Type *ReturnType = II->getType();
+    // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
+    // ExtractIdx)
+    unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
+    Value *InsertTuple, *InsertIdx, *InsertValue;
+    if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
+                                                         m_Value(InsertValue),
+                                                         m_Value(InsertIdx))) &&
+        InsertValue->getType() == ReturnType) {
+      unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
+      // Case where we get the same index right after setting it.
+      // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
+      // InsertValue
+      if (ExtractIdx == Index)
+        return replaceInstUsesWith(CI, InsertValue);
+      // If we are getting a different index than what was set in the
+      // insert.vector intrinsic. We can just set the input tuple to the one up
+      // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
+      // InsertIndex), ExtractIndex)
+      // --> extract.vector(InsertTuple, ExtractIndex)
+      else
+        return replaceOperand(CI, 0, InsertTuple);
+    }
+
+    auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);
    auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());

    // Only canonicalize if the the destination vector and Vec are fixed
--- a/llvm/test/Transforms/InstCombine/opts-tuples-extract-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/opts-tuples-extract-intrinsic.ll
@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Check that the redundant sequences of extract/insert are eliminated.
+
+; extract.vector(insert.vector(Tuple, Value, Idx), Idx) --> Value
+define <vscale x 16 x i8> @test_extract_insert_same_idx(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
+; CHECK-LABEL: @test_extract_insert_same_idx(
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[V1:%.*]]
+;
+  %vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 48)
+  %vec.ext = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 48)
+  ret <vscale x 16 x i8> %vec.ext
+}
+
+; extract.vector(insert.vector(Vector, Value, InsertIndex), ExtractIndex)
+;  --> extract.vector(Vector, ExtractIndex)
+define <vscale x 16 x i8> @test_extract_insert_dif_idx(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
+; CHECK-LABEL: @test_extract_insert_dif_idx(
+; CHECK-NEXT:    [[VEC_EXT:%.*]] = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V0:%.*]], i64 0)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[VEC_EXT]]
+;
+  %vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 48)
+  %vec.ext = call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 0)
+  ret <vscale x 16 x i8> %vec.ext
+}
+
+; Negative test
+; The extracted vector-size != inserted vector-size
+define <vscale x 32 x i8> @neg_test_extract_insert_same_idx_dif_ret_size(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1) {
+; CHECK-LABEL: @neg_test_extract_insert_same_idx_dif_ret_size(
+; CHECK-NEXT:    [[VEC_INS:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[V0:%.*]], <vscale x 16 x i8> [[V1:%.*]], i64 32)
+; CHECK-NEXT:    [[VEC_EXT:%.*]] = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8> [[VEC_INS]], i64 32)
+; CHECK-NEXT:    ret <vscale x 32 x i8> [[VEC_EXT]]
+;
+  %vec.ins = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %v0, <vscale x 16 x i8> %v1, i64 32)
+  %vec.ext = call <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8> %vec.ins, i64 32)
+  ret <vscale x 32 x i8> %vec.ext
+}
+
+
+declare <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8>, <vscale x 16 x i8>, i64)
+declare <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8>, i64)
+declare <vscale x 32 x i8> @llvm.vector.extract.nxv32i8.nxv64i8(<vscale x 64 x i8>, i64)