llvm-project/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll

; RUN: llc  -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
; RUN:        < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s

define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_i8:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
  %cmp31 = icmp eq i8 %a, %b
  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
  ret <8x i8> %e
}

define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
  ret <8x i8> %e
}

define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f64:
; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq double %a, %b
  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
  ret <8x i8> %e
}

define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_i8:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
  %cmp31 = icmp eq i8 %a, %b
  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
  ret <16x i8> %e
}

define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
  ret <16x i8> %e
}

define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f64:
; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
  %cmp31 = fcmp oeq double %a, %b
  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
  ret <16x i8> %e
}

define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
; CHECK-LABEL: test_select_cc_v4i16:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
  %cmp31 = icmp eq i16 %a, %b
  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
  ret <4x i16> %e
}

define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
; CHECK-LABEL: test_select_cc_v8i16:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
  %cmp31 = icmp eq i16 %a, %b
  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
  ret <8x i16> %e
}

define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
; CHECK-LABEL: test_select_cc_v2i32:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
  %cmp31 = icmp eq i32 %a, %b
  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
  ret <2x i32> %e
}

define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
; CHECK-LABEL: test_select_cc_v4i32:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
  %cmp31 = icmp eq i32 %a, %b
  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
  ret <4x i32> %e
}

define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
; CHECK-LABEL: test_select_cc_v1i64:
; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
  %cmp31 = icmp eq i64 %a, %b
  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
  ret <1x i64> %e
}

define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
; CHECK-LABEL: test_select_cc_v2i64:
; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
  %cmp31 = icmp eq i64 %a, %b
  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
  ret <2x i64> %e
}

define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
; CHECK-LABEL: test_select_cc_v1f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
  ret <1 x float> %e
}

define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
; CHECK-LABEL: test_select_cc_v2f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
  ret <2 x float> %e
}

define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
  ret <4x float> %e
}

define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32_icmp:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
  %cmp31 = icmp eq i32 %a, %b
  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
  ret <4x float> %e
}

define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
; CHECK-LABEL: test_select_cc_v1f64:
; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq double %a, %b
  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
  ret <1 x double> %e
}

define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
; CHECK-LABEL: test_select_cc_v1f64_icmp:
; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
  %cmp31 = icmp eq i64 %a, %b
  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
  ret <1 x double> %e
}

define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
; CHECK-LABEL: test_select_cc_v2f64:
; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
  %cmp31 = fcmp oeq double %a, %b
  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
  ret <2 x double> %e
}

; Special case: when the select condition is an icmp with i1 operands, don't
; do the comparison on vectors.
; Part of PR21549.
define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
; CHECK: tst   w0, #0x1
; CHECK: csetm [[MASK:w[0-9]+]], ne
; CHECK: dup   [[DUPMASK:v[0-9]+]].2s, [[MASK]]
; CHECK: bsl   [[DUPMASK]].8b, v0.8b, v1.8b
; CHECK: mov   v0.16b, [[DUPMASK]].16b
  %cmp = icmp ne i1 %cc, 0
  %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
  ret <2 x i32> %e
}

; Also make sure we support irregular/non-power-of-2 types such as v3f32.
define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
  %cc = fcmp oeq float %c1, %c2
  %r = select i1 %cc, <3 x float> %a, <3 x float> %b
  ret <3 x float> %r
}

define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
  %cc = fcmp oeq double %c1, %c2
  %r = select i1 %cc, <3 x float> %a, <3 x float> %b
  ret <3 x float> %r
}

attributes #0 = { nounwind}
[AArch64] Don't assert when combining (v3f32 select (setcc f64)). When the setcc has f64 operands, we can't build a vector setcc mask to feed a vselect, because f64 doesn't divide v3f32 evenly. Just bail out when that happens. llvm-svn: 235917 2015-04-28 05:01:20 +08:00			`; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \`
			`; RUN: < %s -verify-machineinstrs -asm-verbose=false \| FileCheck %s`
AArch64/ARM64: optimise vector selects & enable test When performing a scalar comparison that feeds into a vector select, it's actually better to do the comparison on the vector side: the scalar route would be "CMP -> CSEL -> DUP", the vector is "CM -> DUP" since the vector comparisons are all mask based. llvm-svn: 208210 2014-05-07 22:10:27 +08:00
			`define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v8i8_i8:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]`
			`; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b`
			`%cmp31 = icmp eq i8 %a, %b`
			`%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d`
			`ret <8x i8> %e`
			`}`

			`define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v8i8_f32:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s`
			`; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]`
			`; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b`
			`%cmp31 = fcmp oeq float %a, %b`
			`%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d`
			`ret <8x i8> %e`
			`}`

			`define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v8i8_f64:`
			`; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1`
			`; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b`
			`%cmp31 = fcmp oeq double %a, %b`
			`%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d`
			`ret <8x i8> %e`
			`}`

			`define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v16i8_i8:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b`
			`%cmp31 = icmp eq i8 %a, %b`
			`%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d`
			`ret <16x i8> %e`
			`}`

			`define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v16i8_f32:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s`
			`; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]`
			`; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b`
			`%cmp31 = fcmp oeq float %a, %b`
			`%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d`
			`ret <16x i8> %e`
			`}`

			`define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {`
			`; CHECK-LABEL: test_select_cc_v16i8_f64:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d`
			`; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]`
			`; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b`
			`%cmp31 = fcmp oeq double %a, %b`
			`%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d`
			`ret <16x i8> %e`
			`}`

			`define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {`
			`; CHECK-LABEL: test_select_cc_v4i16:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]`
			`; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b`
			`%cmp31 = icmp eq i16 %a, %b`
			`%e = select i1 %cmp31, <4x i16> %c, <4x i16> %d`
			`ret <4x i16> %e`
			`}`

			`define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {`
			`; CHECK-LABEL: test_select_cc_v8i16:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b`
			`%cmp31 = icmp eq i16 %a, %b`
			`%e = select i1 %cmp31, <8x i16> %c, <8x i16> %d`
			`ret <8x i16> %e`
			`}`

			`define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {`
			`; CHECK-LABEL: test_select_cc_v2i32:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]`
			`; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b`
			`%cmp31 = icmp eq i32 %a, %b`
			`%e = select i1 %cmp31, <2x i32> %c, <2x i32> %d`
			`ret <2x i32> %e`
			`}`

			`define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {`
			`; CHECK-LABEL: test_select_cc_v4i32:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b`
			`%cmp31 = icmp eq i32 %a, %b`
			`%e = select i1 %cmp31, <4x i32> %c, <4x i32> %d`
			`ret <4x i32> %e`
			`}`

			`define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {`
			`; CHECK-LABEL: test_select_cc_v1i64:`
			`; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0`
			`; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1`
			`; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]`
			`; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b`
			`%cmp31 = icmp eq i64 %a, %b`
			`%e = select i1 %cmp31, <1x i64> %c, <1x i64> %d`
			`ret <1x i64> %e`
			`}`

			`define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {`
			`; CHECK-LABEL: test_select_cc_v2i64:`
			`; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0`
			`; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b`
			`%cmp31 = icmp eq i64 %a, %b`
			`%e = select i1 %cmp31, <2x i64> %c, <2x i64> %d`
			`ret <2x i64> %e`
			`}`

			`define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {`
			`; CHECK-LABEL: test_select_cc_v1f32:`
[codegen,aarch64] Add a target hook to the code generator to control vector type legalization strategies in a more fine grained manner, and change the legalization of several v1iN types and v1f32 to be widening rather than scalarization on AArch64. This fixes an assertion failure caused by scalarizing nodes like "v1i32 trunc v1i64". As v1i64 is legal it will fail to scalarize v1i32. This also provides a foundation for other targets to have more granular control over how vector types are legalized. Patch by Hao Liu, reviewed by Tim Northover. I'm committing it to allow some work to start taking place on top of this patch as it adds some really important hooks to the backend that I'd like to immediately start using. =] http://reviews.llvm.org/D4322 llvm-svn: 212242 2014-07-03 08:23:43 +08:00			`; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s`
			`; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b`
AArch64/ARM64: optimise vector selects & enable test When performing a scalar comparison that feeds into a vector select, it's actually better to do the comparison on the vector side: the scalar route would be "CMP -> CSEL -> DUP", the vector is "CM -> DUP" since the vector comparisons are all mask based. llvm-svn: 208210 2014-05-07 22:10:27 +08:00			`%cmp31 = fcmp oeq float %a, %b`
			`%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d`
			`ret <1 x float> %e`
			`}`

			`define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {`
			`; CHECK-LABEL: test_select_cc_v2f32:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]`
			`; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b`
			`%cmp31 = fcmp oeq float %a, %b`
			`%e = select i1 %cmp31, <2 x float> %c, <2 x float> %d`
			`ret <2 x float> %e`
			`}`

			`define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {`
			`; CHECK-LABEL: test_select_cc_v4f32:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b`
			`%cmp31 = fcmp oeq float %a, %b`
			`%e = select i1 %cmp31, <4x float> %c, <4x float> %d`
			`ret <4x float> %e`
			`}`

			`define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {`
			`; CHECK-LABEL: test_select_cc_v4f32_icmp:`
			`; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0`
			`; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1`
			`; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b`
			`%cmp31 = icmp eq i32 %a, %b`
			`%e = select i1 %cmp31, <4x float> %c, <4x float> %d`
			`ret <4x float> %e`
			`}`

			`define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {`
			`; CHECK-LABEL: test_select_cc_v1f64:`
			`; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1`
			`; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b`
			`%cmp31 = fcmp oeq double %a, %b`
			`%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d`
			`ret <1 x double> %e`
			`}`

			`define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {`
			`; CHECK-LABEL: test_select_cc_v1f64_icmp:`
			`; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0`
			`; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1`
			`; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]`
			`; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b`
			`%cmp31 = icmp eq i64 %a, %b`
			`%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d`
			`ret <1 x double> %e`
			`}`

			`define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {`
			`; CHECK-LABEL: test_select_cc_v2f64:`
			`; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]`
			`; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b`
			`%cmp31 = fcmp oeq double %a, %b`
			`%e = select i1 %cmp31, <2 x double> %c, <2 x double> %d`
			`ret <2 x double> %e`
			`}`
[AArch64] Don't combine "select (setcc i1 LHS, RHS), vL, vR". r208210 introduced an optimization that improves the vector select codegen by doing the setcc on vectors directly. This is a problem they the setcc operands are i1s, because the optimization would create vectors of i1, which aren't legal. Part of PR21549. Differential Revision: http://reviews.llvm.org/D6308 llvm-svn: 223075 2014-12-02 04:59:00 +08:00
			`; Special case: when the select condition is an icmp with i1 operands, don't`
			`; do the comparison on vectors.`
			`; Part of PR21549.`
			`define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {`
			`; CHECK-LABEL: test_select_cc_v2i32_icmpi1:`
			`; CHECK: tst w0, #0x1`
			`; CHECK: csetm [[MASK:w[0-9]+]], ne`
			`; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]]`
			`; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b`
			`; CHECK: mov v0.16b, [[DUPMASK]].16b`
			`%cmp = icmp ne i1 %cc, 0`
			`%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b`
			`ret <2 x i32> %e`
			`}`
[AArch64] Don't assert when combining (v3f32 select (setcc f64)). When the setcc has f64 operands, we can't build a vector setcc mask to feed a vselect, because f64 doesn't divide v3f32 evenly. Just bail out when that happens. llvm-svn: 235917 2015-04-28 05:01:20 +08:00
			`; Also make sure we support irregular/non-power-of-2 types such as v3f32.`
			`define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {`
			`; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:`
			`; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s`
[AArch64] Also combine vector selects fed by non-i1 SETCCs. After legalization, scalar SETCC has an i32 result type on AArch64. The i1 requirement seems too conservative, replace it with an assert. This also means that we now can run after legalization. That should also be fine, since the ops legalizer runs again after each combine, and all types created all have the same sizes as the (legal) inputs. Exposed by r235917; while there, robustize its tests (bsl also uses the register it defines). llvm-svn: 235922 2015-04-28 05:43:12 +08:00			`; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]`
			`; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b`
			`; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b`
[AArch64] Don't assert when combining (v3f32 select (setcc f64)). When the setcc has f64 operands, we can't build a vector setcc mask to feed a vselect, because f64 doesn't divide v3f32 evenly. Just bail out when that happens. llvm-svn: 235917 2015-04-28 05:01:20 +08:00			`; CHECK-NEXT: ret`
			`%cc = fcmp oeq float %c1, %c2`
			`%r = select i1 %cc, <3 x float> %a, <3 x float> %b`
			`ret <3 x float> %r`
			`}`

			`define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {`
			`; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:`
[AArch64] Also combine vector selects fed by non-i1 SETCCs. After legalization, scalar SETCC has an i32 result type on AArch64. The i1 requirement seems too conservative, replace it with an assert. This also means that we now can run after legalization. That should also be fine, since the ops legalizer runs again after each combine, and all types created all have the same sizes as the (legal) inputs. Exposed by r235917; while there, robustize its tests (bsl also uses the register it defines). llvm-svn: 235922 2015-04-28 05:43:12 +08:00			`; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d`
			`; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]`
			`; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b`
			`; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b`
[AArch64] Don't assert when combining (v3f32 select (setcc f64)). When the setcc has f64 operands, we can't build a vector setcc mask to feed a vselect, because f64 doesn't divide v3f32 evenly. Just bail out when that happens. llvm-svn: 235917 2015-04-28 05:01:20 +08:00			`; CHECK-NEXT: ret`
			`%cc = fcmp oeq double %c1, %c2`
			`%r = select i1 %cc, <3 x float> %a, <3 x float> %b`
			`ret <3 x float> %r`
			`}`

			`attributes #0 = { nounwind}`