llvm-project/llvm/test/CodeGen/X86/sse41-blend.ll

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s

;CHECK: vsel_float
;CHECK: blendvps
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
  ret <4 x float> %vsel
}


;CHECK: vsel_4xi8
;CHECK: blendvps
;CHECK: ret
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
  ret <4 x i8> %vsel
}

;CHECK: vsel_4xi16
;CHECK: blendvps
;CHECK: ret
define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %v1, <4 x i16> %v2
  ret <4 x i16> %vsel
}


;CHECK: vsel_i32
;CHECK: blendvps
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
  ret <4 x i32> %vsel
}


;CHECK: vsel_double
;CHECK: blendvpd
;CHECK: ret
define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2
  ret <4 x double> %vsel
}


;CHECK: vsel_i64
;CHECK: blendvpd
;CHECK: ret
define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2
  ret <4 x i64> %vsel
}


;CHECK: vsel_i8
;CHECK: pblendvb
;CHECK: ret
define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
  ret <16 x i8> %vsel
}

;; TEST blend + compares
; CHECK: A
define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
  ; CHECK: cmplepd
  ; CHECK: blendvpd
  %max_is_x = fcmp oge <2 x double> %x, %y
  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
  ret <2 x double> %max
}

; CHECK: B
define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
  ; CHECK: cmpnlepd
  ; CHECK: blendvpd
  %min_is_x = fcmp ult <2 x double> %x, %y
  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
  ret <2 x double> %min
}

; CHECK: float_crash
define void @float_crash() nounwind {
entry:
  %merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
  %extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0
  store double %extract214vector_func.i, double addrspace(1)* undef, align 8
  ret void
}
Remove the "-promote-elements" flag. This flag is now enabled by default. llvm-svn: 157925 2012-06-04 19:27:21 +08:00			`; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 \| FileCheck %s`
add a testcase for the previous patch llvm-svn: 139287 2011-09-08 16:31:31 +08:00
			`;CHECK: vsel_float`
			`;CHECK: blendvps`
			`;CHECK: ret`
			`define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2`
			`ret <4 x float> %vsel`
			`}`


Add integer promotion support for vselect llvm-svn: 139692 2011-09-14 22:42:15 +08:00			`;CHECK: vsel_4xi8`
			`;CHECK: blendvps`
			`;CHECK: ret`
			`define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2`
			`ret <4 x i8> %vsel`
			`}`

			`;CHECK: vsel_4xi16`
			`;CHECK: blendvps`
			`;CHECK: ret`
			`define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %v1, <4 x i16> %v2`
			`ret <4 x i16> %vsel`
			`}`


add a testcase for the previous patch llvm-svn: 139287 2011-09-08 16:31:31 +08:00			`;CHECK: vsel_i32`
			`;CHECK: blendvps`
			`;CHECK: ret`
			`define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2`
			`ret <4 x i32> %vsel`
			`}`


			`;CHECK: vsel_double`
			`;CHECK: blendvpd`
			`;CHECK: ret`
			`define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2`
			`ret <4 x double> %vsel`
			`}`


			`;CHECK: vsel_i64`
			`;CHECK: blendvpd`
			`;CHECK: ret`
			`define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {`
			`%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2`
			`ret <4 x i64> %vsel`
			`}`


			`;CHECK: vsel_i8`
			`;CHECK: pblendvb`
			`;CHECK: ret`
			`define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {`
			`%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2`
			`ret <16 x i8> %vsel`
			`}`

Not sure how CMPPS and CMPPD had already ever worked, I guess it didn't. However with this fix it does now. Basically the operand order for the x86 target specific node is not the same as the instruction, but since the intrinsic need that specific order at the instruction definition, just change the order during legalization. Also, there were some wrong invertions of condition codes, such as GE => LE, GT => LT, fix that too. Fix PR10907. llvm-svn: 139528 2011-09-13 03:30:40 +08:00			`;; TEST blend + compares`
			`; CHECK: A`
			`define <2 x double> @A(<2 x double> %x, <2 x double> %y) {`
Revert the wrong part of r139528, and fix testcases. llvm-svn: 139541 2011-09-13 05:24:07 +08:00			`; CHECK: cmplepd`
Not sure how CMPPS and CMPPD had already ever worked, I guess it didn't. However with this fix it does now. Basically the operand order for the x86 target specific node is not the same as the instruction, but since the intrinsic need that specific order at the instruction definition, just change the order during legalization. Also, there were some wrong invertions of condition codes, such as GE => LE, GT => LT, fix that too. Fix PR10907. llvm-svn: 139528 2011-09-13 03:30:40 +08:00			`; CHECK: blendvpd`
			`%max_is_x = fcmp oge <2 x double> %x, %y`
			`%max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y`
			`ret <2 x double> %max`
			`}`

			`; CHECK: B`
			`define <2 x double> @B(<2 x double> %x, <2 x double> %y) {`
Synthesize x86 max/min instructions also for vectors (i.e. produce maxps and maxpd). This broke the sse41-blend.ll testcase by causing maxpd to be produced rather than a cmp+blend pair, which is the reason I tweaked it. Gives a small speedup on doduc with dragonegg when the GCC vectorizer is used. llvm-svn: 139986 2011-09-18 00:49:39 +08:00			`; CHECK: cmpnlepd`
Not sure how CMPPS and CMPPD had already ever worked, I guess it didn't. However with this fix it does now. Basically the operand order for the x86 target specific node is not the same as the instruction, but since the intrinsic need that specific order at the instruction definition, just change the order during legalization. Also, there were some wrong invertions of condition codes, such as GE => LE, GT => LT, fix that too. Fix PR10907. llvm-svn: 139528 2011-09-13 03:30:40 +08:00			`; CHECK: blendvpd`
Synthesize x86 max/min instructions also for vectors (i.e. produce maxps and maxpd). This broke the sse41-blend.ll testcase by causing maxpd to be produced rather than a cmp+blend pair, which is the reason I tweaked it. Gives a small speedup on doduc with dragonegg when the GCC vectorizer is used. llvm-svn: 139986 2011-09-18 00:49:39 +08:00			`%min_is_x = fcmp ult <2 x double> %x, %y`
			`%min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y`
			`ret <2 x double> %min`
Not sure how CMPPS and CMPPD had already ever worked, I guess it didn't. However with this fix it does now. Basically the operand order for the x86 target specific node is not the same as the instruction, but since the intrinsic need that specific order at the instruction definition, just change the order during legalization. Also, there were some wrong invertions of condition codes, such as GE => LE, GT => LT, fix that too. Fix PR10907. llvm-svn: 139528 2011-09-13 03:30:40 +08:00			`}`
add a testcase for the previous patch llvm-svn: 139287 2011-09-08 16:31:31 +08:00
When emulating vselect using OR/AND/XOR make sure to bitcast the result back to the original type. llvm-svn: 154764 2012-04-15 23:08:09 +08:00			`; CHECK: float_crash`
			`define void @float_crash() nounwind {`
			`entry:`
			`%merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef`
			`%extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0`
			`store double %extract214vector_func.i, double addrspace(1)* undef, align 8`
			`ret void`
			`}`