forked from OSchip/llvm-project
[LV] Add support for insertelt/extractelt processing during type truncation
Summary: While shrinking types according to the required bits, we can encounter insert/extract element instructions. This will cause us to reach an llvm_unreachable statement. This change adds support for truncating insert/extract element operations, and adds a regression test. Reviewers: jmolloy Subscribers: mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D17078 llvm-svn: 260893
This commit is contained in:
parent
036c08874a
commit
ec7063ac77
|
@ -3187,6 +3187,9 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
|
||||||
if (TruncatedTy == OriginalTy)
|
if (TruncatedTy == OriginalTy)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (!isa<Instruction>(I))
|
||||||
|
continue;
|
||||||
|
|
||||||
IRBuilder<> B(cast<Instruction>(I));
|
IRBuilder<> B(cast<Instruction>(I));
|
||||||
auto ShrinkOperand = [&](Value *V) -> Value* {
|
auto ShrinkOperand = [&](Value *V) -> Value* {
|
||||||
if (auto *ZI = dyn_cast<ZExtInst>(V))
|
if (auto *ZI = dyn_cast<ZExtInst>(V))
|
||||||
|
@ -3242,6 +3245,17 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
|
||||||
} else if (isa<LoadInst>(I)) {
|
} else if (isa<LoadInst>(I)) {
|
||||||
// Don't do anything with the operands, just extend the result.
|
// Don't do anything with the operands, just extend the result.
|
||||||
continue;
|
continue;
|
||||||
|
} else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
|
||||||
|
auto Elements = IE->getOperand(0)->getType()->getVectorNumElements();
|
||||||
|
auto *O0 = B.CreateZExtOrTrunc(
|
||||||
|
IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
|
||||||
|
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
|
||||||
|
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
|
||||||
|
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
|
||||||
|
auto Elements = EE->getOperand(0)->getType()->getVectorNumElements();
|
||||||
|
auto *O0 = B.CreateZExtOrTrunc(
|
||||||
|
EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
|
||||||
|
NewI = B.CreateExtractElement(O0, EE->getOperand(2));
|
||||||
} else {
|
} else {
|
||||||
llvm_unreachable("Unhandled instruction type!");
|
llvm_unreachable("Unhandled instruction type!");
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
; RUN: opt -S < %s -loop-vectorize -force-vector-width=4 | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "aarch64--linux-gnu"
|
||||||
|
|
||||||
|
; CHECK-LABEL: test0
|
||||||
|
define void @test0(i16* noalias %M3) {
|
||||||
|
entry:
|
||||||
|
br label %if.then1165.us
|
||||||
|
|
||||||
|
if.then1165.us: ; preds = %if.then1165.us, %entry
|
||||||
|
%indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
|
||||||
|
%conv1177.us = zext i16 undef to i32
|
||||||
|
%add1178.us = add nsw i32 %conv1177.us, undef
|
||||||
|
%conv1179.us = trunc i32 %add1178.us to i16
|
||||||
|
%idxprom1181.us = ashr exact i64 undef, 32
|
||||||
|
%arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
|
||||||
|
store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
|
||||||
|
%indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
|
||||||
|
%exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
|
||||||
|
br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
|
||||||
|
|
||||||
|
for.inc1286.loopexit: ; preds = %if.then1165.us
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test1
|
||||||
|
define void @test1(i16* noalias %M3) {
|
||||||
|
entry:
|
||||||
|
br label %if.then1165.us
|
||||||
|
|
||||||
|
if.then1165.us: ; preds = %if.then1165.us, %entry
|
||||||
|
%indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
|
||||||
|
%fptr = load i32, i32* undef, align 4
|
||||||
|
%conv1177.us = zext i16 undef to i32
|
||||||
|
%add1178.us = add nsw i32 %conv1177.us, %fptr
|
||||||
|
%conv1179.us = trunc i32 %add1178.us to i16
|
||||||
|
%idxprom1181.us = ashr exact i64 undef, 32
|
||||||
|
%arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
|
||||||
|
store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
|
||||||
|
%indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
|
||||||
|
%exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
|
||||||
|
br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
|
||||||
|
|
||||||
|
for.inc1286.loopexit: ; preds = %if.then1165.us
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue