Precommit analysis/etc tests for inselt poison placeholder

This adds tests in directories missing from https://reviews.llvm.org/rGdb7a2f347f132b3920415013d62d1adfb18d8d58
This commit is contained in:
Juneyoung Lee 2020-12-24 12:14:19 +09:00
parent db7a2f347f
commit 3036547248
15 changed files with 9753 additions and 1 deletions

View File

@ -0,0 +1,26 @@
; RUN: opt < %s -cost-model -analyze -mcpu=kryo | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
; CHECK-LABEL: vectorInstrCost
define void @vectorInstrCost() {
; Vector extracts - extracting the first element should have a zero cost;
; all other elements should have a cost of two.
;
; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0
; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1
%t1 = extractelement <2 x i64> undef, i32 0
%t2 = extractelement <2 x i64> undef, i32 1
; Vector inserts - inserting the first element should have a zero cost; all
; other elements should have a cost of two.
;
; CHECK: cost of 0 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0
; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 1
%t3 = insertelement <2 x i64> poison, i64 undef, i32 0
%t4 = insertelement <2 x i64> poison, i64 undef, i32 1
ret void
}

View File

@ -0,0 +1,187 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck --check-prefix=CHECK-P7 %s
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck --check-prefix=CHECK-P8LE %s
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9BE %s
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9LE %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define i32 @insert(i32 %arg) {
; CHECK: cost of 10 {{.*}} insertelement
; CHECK-P7-LABEL: 'insert'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-P8LE-LABEL: 'insert'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-P9BE-LABEL: 'insert'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-P9LE-LABEL: 'insert'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x = insertelement <4 x i32> poison, i32 %arg, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%x = insertelement <4 x i32> poison, i32 %arg, i32 0
ret i32 undef
}
define i32 @extract(<4 x i32> %arg) {
; CHECK: cost of 3 {{.*}} extractelement
; CHECK-P7-LABEL: 'extract'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
;
; CHECK-P8LE-LABEL: 'extract'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
;
; CHECK-P9BE-LABEL: 'extract'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
;
; CHECK-P9LE-LABEL: 'extract'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
;
%x = extractelement <4 x i32> %arg, i32 0
ret i32 %x
}
define void @test2xdouble(<2 x double> %arg1) {
; CHECK-P7-LABEL: 'test2xdouble'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P8LE-LABEL: 'test2xdouble'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9BE-LABEL: 'test2xdouble'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9LE-LABEL: 'test2xdouble'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = extractelement <2 x double> %arg1, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement <2 x double> %arg1, i32 1
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v1 = extractelement <2 x double> %arg1, i32 0
%v2 = extractelement <2 x double> %arg1, i32 1
ret void
}
define void @test4xi32(<4 x i32> %v1, i32 %x1) {
; CHECK-P7-LABEL: 'test4xi32'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P8LE-LABEL: 'test4xi32'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9BE-LABEL: 'test4xi32'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9LE-LABEL: 'test4xi32'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
ret void
}
define void @vexti32(<4 x i32> %p1) {
; CHECK-P7-LABEL: 'vexti32'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P8LE-LABEL: 'vexti32'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9BE-LABEL: 'vexti32'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9LE-LABEL: 'vexti32'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <4 x i32> %p1, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <4 x i32> %p1, i32 1
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i3 = extractelement <4 x i32> %p1, i32 2
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = extractelement <4 x i32> %p1, i32 3
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%i1 = extractelement <4 x i32> %p1, i32 0
%i2 = extractelement <4 x i32> %p1, i32 1
%i3 = extractelement <4 x i32> %p1, i32 2
%i4 = extractelement <4 x i32> %p1, i32 3
ret void
}
define void @vexti64(<2 x i64> %p1) {
; CHECK-P7-LABEL: 'vexti64'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P8LE-LABEL: 'vexti64'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9BE-LABEL: 'vexti64'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9LE-LABEL: 'vexti64'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <2 x i64> %p1, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = extractelement <2 x i64> %p1, i32 1
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%i1 = extractelement <2 x i64> %p1, i32 0
%i2 = extractelement <2 x i64> %p1, i32 1
ret void
}
define void @vext(<8 x i16> %p1, <16 x i8> %p2) {
; CHECK-P7-LABEL: 'vext'
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0
; CHECK-P7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P8LE-LABEL: 'vext'
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0
; CHECK-P8LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9BE-LABEL: 'vext'
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0
; CHECK-P9BE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-P9LE-LABEL: 'vext'
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i1 = extractelement <8 x i16> %p1, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = extractelement <16 x i8> %p2, i32 0
; CHECK-P9LE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%i1 = extractelement <8 x i16> %p1, i32 0
%i2 = extractelement <16 x i8> %p2, i32 0
ret void
}

View File

@ -0,0 +1,56 @@
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
; CHECK: vecinstrs
define void @vecinstrs() {
;; Extract element is penalized somewhat with a cost of 2 for index 0.
extractelement <16 x i8> undef, i32 0
extractelement <16 x i8> undef, i32 1
extractelement <8 x i16> undef, i32 0
extractelement <8 x i16> undef, i32 1
extractelement <4 x i32> undef, i32 0
extractelement <4 x i32> undef, i32 1
extractelement <2 x i64> undef, i32 0
extractelement <2 x i64> undef, i32 1
extractelement <2 x double> undef, i32 0
extractelement <2 x double> undef, i32 1
; Extraction of i1 means extract + test under mask before branch.
extractelement <2 x i1> undef, i32 0
extractelement <4 x i1> undef, i32 1
extractelement <8 x i1> undef, i32 2
;; Insert element
insertelement <16 x i8> poison, i8 undef, i32 0
insertelement <8 x i16> poison, i16 undef, i32 0
insertelement <4 x i32> poison, i32 undef, i32 0
; vlvgp will do two grs into a vector register: only add cost half of the time.
insertelement <2 x i64> poison, i64 undef, i32 0
insertelement <2 x i64> poison, i64 undef, i32 1
ret void
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <16 x i8> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = extractelement <16 x i8> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %3 = extractelement <8 x i16> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = extractelement <8 x i16> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %5 = extractelement <4 x i32> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = extractelement <4 x i32> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = extractelement <2 x i64> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = extractelement <2 x i64> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = extractelement <2 x double> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = extractelement <2 x double> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %11 = extractelement <2 x i1> undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %12 = extractelement <4 x i1> undef, i32 1
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %13 = extractelement <8 x i1> undef, i32 2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = insertelement <16 x i8> poison, i8 undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = insertelement <8 x i16> poison, i16 undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = insertelement <4 x i32> poison, i32 undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %17 = insertelement <2 x i64> poison, i64 undef, i32 0
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %18 = insertelement <2 x i64> poison, i64 undef, i32 1
}

View File

@ -0,0 +1,40 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
;CHECK: cost of 0 {{.*}} extract
%A = extractelement <4 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%B = extractelement <4 x i32> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%C = extractelement <4 x float> undef, i32 1
;CHECK: cost of 0 {{.*}} extract
%D = extractelement <8 x float> undef, i32 0
;CHECK: cost of 1 {{.*}} extract
%E = extractelement <8 x float> undef, i32 1
;CHECK: cost of 1 {{.*}} extract
%F = extractelement <8 x float> undef, i32 %arg
;CHECK: cost of 0 {{.*}} insert
%G = insertelement <4 x float> poison, float %fl, i32 0
;CHECK: cost of 1 {{.*}} insert
%H = insertelement <4 x float> poison, float %fl, i32 1
;CHECK: cost of 1 {{.*}} insert
%I = insertelement <4 x i32> poison, i32 %arg, i32 0
;CHECK: cost of 0 {{.*}} insert
%J = insertelement <4 x double> poison, double undef, i32 0
;CHECK: cost of 0 {{.*}} insert
%K = insertelement <8 x double> poison, double undef, i32 4
;CHECK: cost of 0 {{.*}} insert
%L = insertelement <16 x double> poison, double undef, i32 8
;CHECK: cost of 1 {{.*}} insert
%M = insertelement <16 x double> poison, double undef, i32 9
ret i32 0
}

View File

@ -0,0 +1,39 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
define i32 @foo(i32* nocapture %A) nounwind uwtable readonly ssp {
vector.ph:
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%0 = getelementptr inbounds i32, i32* %A, i64 %index
%1 = bitcast i32* %0 to <2 x i32>*
%2 = load <2 x i32>, <2 x i32>* %1, align 4
%3 = sext <2 x i32> %2 to <2 x i64>
;CHECK: cost of 1 {{.*}} extract
%4 = extractelement <2 x i64> %3, i32 0
%5 = getelementptr inbounds i32, i32* %A, i64 %4
;CHECK: cost of 1 {{.*}} extract
%6 = extractelement <2 x i64> %3, i32 1
%7 = getelementptr inbounds i32, i32* %A, i64 %6
%8 = load i32, i32* %5, align 4
;CHECK: cost of 1 {{.*}} insert
%9 = insertelement <2 x i32> poison, i32 %8, i32 0
%10 = load i32, i32* %7, align 4
;CHECK: cost of 1 {{.*}} insert
%11 = insertelement <2 x i32> %9, i32 %10, i32 1
%12 = add nsw <2 x i32> %11, %vec.phi
%index.next = add i64 %index, 2
%13 = icmp eq i64 %index.next, 192
br i1 %13, label %for.end, label %vector.body
for.end: ; preds = %vector.body
%14 = extractelement <2 x i32> %12, i32 0
%15 = extractelement <2 x i32> %12, i32 1
%16 = add i32 %14, %15
ret i32 %16
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,39 @@
; RUN: llc -mtriple=x86_64-apple-darwin -mattr=+sse2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
define <4 x i32> @shl(<4 x i32> %vector, i32 %scalar) {
entry:
; SSE2: 'shl'
; SSE2: cost of 1 {{.*}} shl
; SSE2-CODEGEN: movd %edi, %xmm1
; SSE2-CODEGEN: pslld %xmm1, %xmm0
%insert = insertelement <4 x i32> poison, i32 %scalar, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
%ret = shl <4 x i32> %vector , %splat
ret <4 x i32> %ret
}
define <4 x i32> @ashr(<4 x i32> %vector, i32 %scalar) {
entry:
; SSE2: 'ashr'
; SSE2: cost of 1 {{.*}} ashr
; SSE2-CODEGEN: movd %edi, %xmm1
; SSE2-CODEGEN: psrad %xmm1, %xmm0
%insert = insertelement <4 x i32> poison, i32 %scalar, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
%ret = ashr <4 x i32> %vector , %splat
ret <4 x i32> %ret
}
define <4 x i32> @lshr(<4 x i32> %vector, i32 %scalar) {
entry:
; SSE2: 'lshr'
; SSE2: cost of 1 {{.*}} lshr
; SSE2-CODEGEN: movd %edi, %xmm1
; SSE2-CODEGEN: psrld %xmm1, %xmm0
%insert = insertelement <4 x i32> poison, i32 %scalar, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
%ret = lshr <4 x i32> %vector , %splat
ret <4 x i32> %ret
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-linux-unknown-unknown -mattr=+avx512f | FileCheck %s
%struct.S = type { [1000 x i32] }
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
%temp = insertelement <4 x i64> poison, i64 %base, i32 0
%vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
%B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
%arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
; RUN: opt -S -demanded-bits -analyze -enable-new-pm=0 < %s | FileCheck %s
; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
; CHECK-DAG: DemandedBits: 0xff00 for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff00 for %y = or <2 x i32> %b, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff00 for %z = or <2 x i32> %x, %y
; CHECK-DAG: DemandedBits: 0xff for %u = lshr <2 x i32> %z, <i32 8, i32 8>
; CHECK-DAG: DemandedBits: 0xff for %r = trunc <2 x i32> %u to <2 x i8>
define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) {
%x = or <2 x i32> %a, zeroinitializer
%y = or <2 x i32> %b, zeroinitializer
%z = or <2 x i32> %x, %y
%u = lshr <2 x i32> %z, <i32 8, i32 8>
%r = trunc <2 x i32> %u to <2 x i8>
ret <2 x i8> %r
}
; Vector-specific instructions
; CHECK-DAG: DemandedBits: 0xff for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0 for %z = extractelement <2 x i32> %x, i32 1
; CHECK-DAG: DemandedBits: 0xf for %y = extractelement <2 x i32> %x, i32 0
; CHECK-DAG: DemandedBits: 0xffffffff for %u = and i32 %y, 15
; CHECK-DAG: DemandedBits: 0xffffffff for %v = and i32 %z, 240
; CHECK-DAG: DemandedBits: 0xffffffff for %r = or i32 %u, %v
define i32 @test_extractelement(<2 x i32> %a) {
%x = or <2 x i32> %a, zeroinitializer
%y = extractelement <2 x i32> %x, i32 0
%z = extractelement <2 x i32> %x, i32 1
%u = and i32 %y, 15
%v = and i32 %z, 240
%r = or i32 %u, %v
ret i32 %r
}
; CHECK-DAG: DemandedBits: 0xff for %x = or i32 %a, 0
; CHECK-DAG: DemandedBits: 0xff for %y = or i32 %b, 0
; CHECK-DAG: DemandedBits: 0xff for %z = insertelement <2 x i32> poison, i32 %x, i32 0
; CHECK-DAG: DemandedBits: 0xff for %u = insertelement <2 x i32> %z, i32 %y, i32 1
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %u, <i32 255, i32 127>
define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
%x = or i32 %a, 0
%y = or i32 %b, 0
%z = insertelement <2 x i32> poison, i32 %x, i32 0
%u = insertelement <2 x i32> %z, i32 %y, i32 1
%r = and <2 x i32> %u, <i32 255, i32 127>
ret <2 x i32> %r
}
; CHECK-DAG: DemandedBits: 0xff for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff for %y = or <2 x i32> %b, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff for %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) {
%x = or <2 x i32> %a, zeroinitializer
%y = or <2 x i32> %b, zeroinitializer
%z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
%r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
ret <3 x i32> %r
}
; Shifts with splat shift amounts
; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0 for %y = shl <2 x i32> %x, <i32 4, i32 4>
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, <i32 240, i32 240>
define <2 x i32> @test_shl(<2 x i32> %a) {
%x = or <2 x i32> %a, zeroinitializer
%y = shl <2 x i32> %x, <i32 4, i32 4>
%r = and <2 x i32> %y, <i32 240, i32 240>
ret <2 x i32> %r
}
; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0 for %y = ashr <2 x i32> %x, <i32 4, i32 4>
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, <i32 240, i32 240>
define <2 x i32> @test_ashr(<2 x i32> %a) {
%x = or <2 x i32> %a, zeroinitializer
%y = ashr <2 x i32> %x, <i32 4, i32 4>
%r = and <2 x i32> %y, <i32 240, i32 240>
ret <2 x i32> %r
}
; CHECK-DAG: DemandedBits: 0xf00 for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0 for %y = lshr <2 x i32> %x, <i32 4, i32 4>
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %y, <i32 240, i32 240>
define <2 x i32> @test_lshr(<2 x i32> %a) {
%x = or <2 x i32> %a, zeroinitializer
%y = lshr <2 x i32> %x, <i32 4, i32 4>
%r = and <2 x i32> %y, <i32 240, i32 240>
ret <2 x i32> %r
}
declare <2 x i32> @llvm.fshl.i32(<2 x i32>, <2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>)
; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, <i32 255, i32 255>
define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) {
%x = or <2 x i32> %a, zeroinitializer
%y = or <2 x i32> %b, zeroinitializer
%z = call <2 x i32> @llvm.fshl.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
%r = and <2 x i32> %z, <i32 255, i32 255>
ret <2 x i32> %r
}
; CHECK-DAG: DemandedBits: 0xf for %x = or <2 x i32> %a, zeroinitializer
; CHECK-DAG: DemandedBits: 0xf0000000 for %y = or <2 x i32> %b, zeroinitializer
; CHECK-DAG: DemandedBits: 0xff for %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
; CHECK-DAG: DemandedBits: 0xffffffff for %r = and <2 x i32> %z, <i32 255, i32 255>
define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) {
%x = or <2 x i32> %a, zeroinitializer
%y = or <2 x i32> %b, zeroinitializer
%z = call <2 x i32> @llvm.fshr.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
%r = and <2 x i32> %z, <i32 255, i32 255>
ret <2 x i32> %r
}
; FP / Int conversion. These have different input / output types.
; CHECK-DAG: DemandedBits: 0xffffffff for %x = or <2 x i32> %a, zeroinitializer
define <2 x float> @test_uitofp(<2 x i32> %a) {
%x = or <2 x i32> %a, zeroinitializer
%r = uitofp <2 x i32> %x to <2 x float>
ret <2 x float> %r
}
; CHECK-DAG: DemandedBits: 0xffffffff for %y = fptoui <2 x float> %x to <2 x i32>
define <2 x i32> @test_fptoui(<2 x float> %a) {
%x = fadd <2 x float> %a, <float 1.0, float 1.0>
%y = fptoui <2 x float> %x to <2 x i32>
%r = and <2 x i32> %y, <i32 255, i32 255>
ret <2 x i32> %y
}

View File

@ -0,0 +1,114 @@
; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump -debug-info -debug-loc - | FileCheck %s
; Test that DW_OP_piece is emitted for constants.
;
; // Generated from:
; typedef struct { int a, b; } I;
; I i(int i) {
; I r = {i, 0};
; return r;
; }
;
; typedef struct { float a, b; } F;
; F f(float f) {
; F r = {f, 0};
; return r;
; }
; CHECK: .debug_info contents:
; CHECK: DW_TAG_subprogram
; CHECK: DW_AT_name ("i")
; CHECK: DW_TAG_variable
; CHECK-NEXT: DW_AT_location ([[I:0x[0-9a-f]+]]
; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_reg5 RDI, DW_OP_piece 0x4, DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4)
; CHECK-NEXT: DW_AT_name ("r")
;
; CHECK: DW_TAG_subprogram
; CHECK: DW_AT_name ("f")
; CHECK: DW_TAG_variable
; CHECK-NEXT: DW_AT_location ([[F:0x[0-9a-f]+]]
; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_reg17 XMM0, DW_OP_piece 0x4, {{(DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00)}}
; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_piece 0x4, {{(DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00)}})
; CHECK-NEXT: DW_AT_name ("r")
;
; CHECK: .debug_loc contents:
; CHECK: [[I]]:
; CHECK-NEXT: ({{.*}}, {{.*}}): DW_OP_reg5 RDI, DW_OP_piece 0x4, DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4
; CHECK: [[F]]:
; CHECK-NEXT: ({{.*}}, {{.*}}): DW_OP_reg17 XMM0, DW_OP_piece 0x4, {{DW_OP_lit0, DW_OP_stack_value, DW_OP_piece 0x4|DW_OP_implicit_value 0x4 0x00 0x00 0x00 0x00}}
source_filename = "stack-value-piece.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
%struct.I = type { i32, i32 }
%struct.F = type { float, float }
; Function Attrs: nounwind readnone ssp uwtable
define i64 @i(i32 %i) local_unnamed_addr #0 !dbg !7 {
entry:
tail call void @llvm.dbg.value(metadata i32 %i, metadata !18, metadata !22), !dbg !21
tail call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !23), !dbg !21
%retval.sroa.0.0.insert.ext = zext i32 %i to i64, !dbg !24
ret i64 %retval.sroa.0.0.insert.ext, !dbg !24
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind readnone ssp uwtable
define <2 x float> @f(float %f) local_unnamed_addr #0 !dbg !25 {
entry:
tail call void @llvm.dbg.value(metadata float %f, metadata !36, metadata !22), !dbg !38
tail call void @llvm.dbg.value(metadata float 0.000000e+00, metadata !36, metadata !23), !dbg !38
%retval.sroa.0.0.vec.insert = insertelement <2 x float> poison, float %f, i32 0, !dbg !39
%retval.sroa.0.4.vec.insert = insertelement <2 x float> %retval.sroa.0.0.vec.insert, float 0.000000e+00, i32 1, !dbg !39
ret <2 x float> %retval.sroa.0.4.vec.insert, !dbg !40
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { nounwind readnone ssp uwtable }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 285655) (llvm/trunk 285654)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "stack-value-piece.c", directory: "/")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"PIC Level", i32 2}
!6 = !{!"clang version 4.0.0 (trunk 285655) (llvm/trunk 285654)"}
!7 = distinct !DISubprogram(name: "i", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
!8 = !DISubroutineType(types: !9)
!9 = !{!10, !14}
!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "I", file: !1, line: 1, baseType: !11)
!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 1, size: 64, elements: !12)
!12 = !{!13, !15}
!13 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !11, file: !1, line: 1, baseType: !14, size: 32)
!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !11, file: !1, line: 1, baseType: !14, size: 32, offset: 32)
!18 = !DILocalVariable(name: "r", scope: !7, file: !1, line: 3, type: !10)
!19 = !DIExpression()
!20 = !DILocation(line: 2, column: 9, scope: !7)
!21 = !DILocation(line: 3, column: 5, scope: !7)
!22 = !DIExpression(DW_OP_LLVM_fragment, 0, 32)
!23 = !DIExpression(DW_OP_LLVM_fragment, 32, 32)
!24 = !DILocation(line: 5, column: 1, scope: !7)
!25 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 8, type: !26, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
!26 = !DISubroutineType(types: !27)
!27 = !{!28, !32}
!28 = !DIDerivedType(tag: DW_TAG_typedef, name: "F", file: !1, line: 7, baseType: !29)
!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 7, size: 64, elements: !30)
!30 = !{!31, !33}
!31 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !29, file: !1, line: 7, baseType: !32, size: 32)
!32 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
!33 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !29, file: !1, line: 7, baseType: !32, size: 32, offset: 32)
!36 = !DILocalVariable(name: "r", scope: !25, file: !1, line: 9, type: !28)
!37 = !DILocation(line: 8, column: 11, scope: !25)
!38 = !DILocation(line: 9, column: 5, scope: !25)
!39 = !DILocation(line: 10, column: 10, scope: !25)
!40 = !DILocation(line: 11, column: 1, scope: !25)

View File

@ -200,6 +200,16 @@ define <vscale x 8 x half> @shufflevector(half %val) {
ret <vscale x 8 x half> %r
}
define <vscale x 8 x half> @shufflevector2(half %val) {
; CHECK-LABEL: @shufflevector
; CHECK: %insvec = insertelement <vscale x 8 x half> poison, half %val, i32 0
; CHECK-NEXT: %r = shufflevector <vscale x 8 x half> %insvec, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 8 x half> %r
%insvec = insertelement <vscale x 8 x half> poison, half %val, i32 0
%r = shufflevector <vscale x 8 x half> %insvec, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x half> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Memory Access and Addressing Operations
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;