2016-09-09 00:15:21 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2015-04-17 01:52:13 +08:00
|
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
|
|
|
|
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
|
|
|
|
|
|
|
; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_non_const_imm(
|
|
|
|
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[RES]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
; If all zero mask bits are set, return a zero regardless of the other control bits.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x0f(
|
|
|
|
; CHECK-NEXT: ret <4 x float> zeroinitializer
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xff(
|
|
|
|
; CHECK-NEXT: ret <4 x float> zeroinitializer
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-04-26 04:55:25 +08:00
|
|
|
; If some zero mask bits are set that do not override the insertion, we do not change anything.
|
2015-04-17 01:52:13 +08:00
|
|
|
|
2015-04-26 04:55:25 +08:00
|
|
|
define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x0c(
|
|
|
|
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[RES]]
|
|
|
|
;
|
2015-04-26 04:55:25 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
|
2015-04-17 01:52:13 +08:00
|
|
|
ret <4 x float> %res
|
|
|
|
|
2015-04-26 04:55:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
; ...unless both input vectors are the same operand.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x15_single_input(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-26 04:55:25 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
; The zero mask overrides the insertion lane.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x1a_single_input(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-26 04:55:25 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
; The zero mask overrides the insertion lane, so the second input vector is not used.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xc1(
|
[InstCombine] Fix for PR29124: reduce insertelements to shufflevector
If inserting more than one constant into a vector:
define <4 x float> @foo(<4 x float> %x) {
%ins1 = insertelement <4 x float> %x, float 1.0, i32 1
%ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2
ret <4 x float> %ins2
}
InstCombine could reduce that to a shufflevector:
define <4 x float> @goo(<4 x float> %x) {
%shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32><i32 0, i32 5, i32 6, i32 3>
ret <4 x float> %shuf
}
Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e.
shufflevector <4 x float> %v, <4 x float> <float undef, float 1.0, float
undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ->
insertelement <4 x float> %v, float 1.0, 1
Differential Revision: https://reviews.llvm.org/D24182
llvm-svn: 282237
2016-09-23 17:14:08 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-26 04:55:25 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
2015-04-17 01:52:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
; If no zero mask bits are set, convert to a shuffle.
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x00(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x10(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x20(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0x30(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xc0(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xd0(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xe0(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
|
2016-09-09 00:15:21 +08:00
|
|
|
; CHECK-LABEL: @insertps_0xf0(
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
|
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
|
|
;
|
2015-04-17 01:52:13 +08:00
|
|
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
|
|
|
|
ret <4 x float> %res
|
|
|
|
|
|
|
|
}
|
|
|
|
|