2016-07-19 22:12:45 +08:00
|
|
|
; RUN: llc < %s -mtriple=aarch64-linux--gnu -aarch64-neon-syntax=generic | FileCheck %s
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
|
|
|
|
declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
|
|
|
|
declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
|
|
|
|
declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
|
|
|
|
declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
|
|
|
|
declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
|
|
|
|
|
|
|
|
declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
|
|
|
|
declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
|
|
|
|
declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
|
|
|
|
declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
|
|
|
|
declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
|
|
|
|
declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
|
|
|
|
|
|
|
|
declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
|
|
|
|
declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
|
|
|
|
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
; CHECK-LABEL: smax_B
|
|
|
|
; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
|
|
|
define i8 @smax_B(<16 x i8>* nocapture readonly %arr) {
|
|
|
|
%arr.load = load <16 x i8>, <16 x i8>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: smax_H
|
|
|
|
; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
|
|
|
define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <8 x i16>, <8 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: smax_S
|
|
|
|
; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
|
|
|
define i32 @smax_S(<4 x i32> * nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x i32>, <4 x i32>* %arr
|
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umax_B
|
|
|
|
; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
|
|
|
define i8 @umax_B(<16 x i8>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i8>, <16 x i8>* %arr
|
|
|
|
%r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umax_H
|
|
|
|
; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
|
|
|
define i16 @umax_H(<8 x i16>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <8 x i16>, <8 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umax_S
|
|
|
|
; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
|
|
|
define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x i32>, <4 x i32>* %arr
|
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: smin_B
|
|
|
|
; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
|
|
|
define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i8>, <16 x i8>* %arr
|
|
|
|
%r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: smin_H
|
|
|
|
; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
|
|
|
define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <8 x i16>, <8 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: smin_S
|
|
|
|
; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
|
|
|
define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x i32>, <4 x i32>* %arr
|
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umin_B
|
|
|
|
; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
|
|
|
|
define i8 @umin_B(<16 x i8>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i8>, <16 x i8>* %arr
|
|
|
|
%r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i8 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umin_H
|
|
|
|
; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
|
|
|
|
define i16 @umin_H(<8 x i16>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <8 x i16>, <8 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: umin_S
|
|
|
|
; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
|
|
|
|
define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x i32>, <4 x i32>* %arr
|
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load)
|
Improve ISel using across lane min/max reduction
In vectorized integer min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
%svn0 = vector_shuffle %0, undef<2,3,u,u>
%smax0 = smax %0, svn0
%svn3 = vector_shuffle %smax0, undef<1,u,u,u>
%sc = setcc %smax0, %svn3, gt
%n0 = extract_vector_elt %sc, #0
%n1 = extract_vector_elt %smax0, #0
%n2 = extract_vector_elt $smax0, #1
%result = select %n0, %n1, n2
becomes :
%1 = smaxv %0
%result = extract_vector_elt %1, 0
This change extends r246790.
llvm-svn: 247575
2015-09-15 00:19:52 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2015-10-12 23:34:52 +08:00
|
|
|
; CHECK-LABEL: fmaxnm_S
|
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
svn0 = vector_shuffle t0, undef<2,3,u,u>
fmin = fminnum t0,svn0
svn1 = vector_shuffle fmin, undef<1,u,u,u>
cc = setcc fmin, svn1, ole
n0 = extract_vector_elt cc, #0
n1 = extract_vector_elt fmin, #0
n2 = extract_vector_elt fmin, #1
result = select n0, n1,n2
into :
result = llvm.aarch64.neon.fminnmv t0
This change extends r247575.
llvm-svn: 249834
2015-10-09 22:11:25 +08:00
|
|
|
; CHECK: fmaxnmv
|
2015-10-12 23:34:52 +08:00
|
|
|
define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x float>, <4 x float>* %arr
|
|
|
|
%r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load)
|
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
svn0 = vector_shuffle t0, undef<2,3,u,u>
fmin = fminnum t0,svn0
svn1 = vector_shuffle fmin, undef<1,u,u,u>
cc = setcc fmin, svn1, ole
n0 = extract_vector_elt cc, #0
n1 = extract_vector_elt fmin, #0
n2 = extract_vector_elt fmin, #1
result = select n0, n1,n2
into :
result = llvm.aarch64.neon.fminnmv t0
This change extends r247575.
llvm-svn: 249834
2015-10-09 22:11:25 +08:00
|
|
|
ret float %r
|
|
|
|
}
|
|
|
|
|
2015-10-12 23:34:52 +08:00
|
|
|
; CHECK-LABEL: fminnm_S
|
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
svn0 = vector_shuffle t0, undef<2,3,u,u>
fmin = fminnum t0,svn0
svn1 = vector_shuffle fmin, undef<1,u,u,u>
cc = setcc fmin, svn1, ole
n0 = extract_vector_elt cc, #0
n1 = extract_vector_elt fmin, #0
n2 = extract_vector_elt fmin, #1
result = select n0, n1,n2
into :
result = llvm.aarch64.neon.fminnmv t0
This change extends r247575.
llvm-svn: 249834
2015-10-09 22:11:25 +08:00
|
|
|
; CHECK: fminnmv
|
2015-10-12 23:34:52 +08:00
|
|
|
define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <4 x float>, <4 x float>* %arr
|
|
|
|
%r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load)
|
Improve ISel across lane float min/max reduction
In vectorized float min/max reduction code, the final "reduce" step
is sub-optimal. In AArch64, this change wll combine :
svn0 = vector_shuffle t0, undef<2,3,u,u>
fmin = fminnum t0,svn0
svn1 = vector_shuffle fmin, undef<1,u,u,u>
cc = setcc fmin, svn1, ole
n0 = extract_vector_elt cc, #0
n1 = extract_vector_elt fmin, #0
n2 = extract_vector_elt fmin, #1
result = select n0, n1,n2
into :
result = llvm.aarch64.neon.fminnmv t0
This change extends r247575.
llvm-svn: 249834
2015-10-09 22:11:25 +08:00
|
|
|
ret float %r
|
|
|
|
}
|
2015-10-16 23:38:25 +08:00
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_umax_256
|
|
|
|
; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
|
|
|
; CHECK: umaxv {{h[0-9]+}}, [[V0]]
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i16>, <16 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_umax_512
|
|
|
|
; CHECK: umax v
|
|
|
|
; CHECK-NEXT: umax v
|
|
|
|
; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
|
|
|
; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
|
|
|
|
%arr.load = load <16 x i32>, <16 x i32>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_umin_256
|
|
|
|
; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
|
|
|
; CHECK: uminv {{h[0-9]+}}, [[V0]]
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i16>, <16 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_umin_512
|
|
|
|
; CHECK: umin v
|
|
|
|
; CHECK-NEXT: umin v
|
|
|
|
; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
|
|
|
; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
|
|
|
|
%arr.load = load <16 x i32>, <16 x i32>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_smax_256
|
|
|
|
; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
|
|
|
; CHECK: smaxv {{h[0-9]+}}, [[V0]]
|
|
|
|
%arr.load = load <16 x i16>, <16 x i16>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_smax_512
|
|
|
|
; CHECK: smax v
|
|
|
|
; CHECK-NEXT: smax v
|
|
|
|
; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
|
|
|
; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
|
|
|
|
%arr.load = load <16 x i32>, <16 x i32>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_smin_256
|
|
|
|
; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
|
|
|
; CHECK: sminv {{h[0-9]+}}, [[V0]]
|
2017-05-17 05:29:22 +08:00
|
|
|
%arr.load = load <16 x i16>, <16 x i16>* %arr
|
|
|
|
%r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i16 %r
|
|
|
|
}
|
|
|
|
|
2017-05-17 05:29:22 +08:00
|
|
|
declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
|
|
|
|
|
2015-10-16 23:38:25 +08:00
|
|
|
define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) {
|
|
|
|
; CHECK-LABEL: oversized_smin_512
|
|
|
|
; CHECK: smin v
|
|
|
|
; CHECK-NEXT: smin v
|
|
|
|
; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
|
|
|
; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
|
|
|
|
%arr.load = load <16 x i32>, <16 x i32>* %arr
|
2017-05-17 05:29:22 +08:00
|
|
|
%r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load)
|
2015-10-16 23:38:25 +08:00
|
|
|
ret i32 %r
|
|
|
|
}
|