llvm-project/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll

; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s

; This is the analogue of AArch64's file of the same name. It's mostly testing
; some form of correct lowering occurs, the tests are a little artificial but I
; strongly suspect there's room for improved CodeGen (FIXME).

define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
; CHECK-LABEL: test_sext_extr_cmp_0:
; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: cset
  %1 = icmp sge <1 x i64> %v1, %v2
  %2 = extractelement <1 x i1> %1, i32 0
  %vget_lane = sext i1 %2 to i64
  ret i64 %vget_lane
}

define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
; CHECK-LABEL: test_sext_extr_cmp_1:
; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
  %1 = fcmp oeq <1 x double> %v1, %v2
  %2 = extractelement <1 x i1> %1, i32 0
  %vget_lane = sext i1 %2 to i64
  ret i64 %vget_lane
}

define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_0:
; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
  %1 = icmp eq <1 x i64> %v1, %v2
  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
  ret <1 x i64> %res
}

define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_1:
; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
  %1 = fcmp oeq <1 x double> %v1, %v2
  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
  ret <1 x i64> %res
}

define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
; CHECK-LABEL: test_select_v1i1_2:
; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
  %1 = icmp eq <1 x i64> %v1, %v2
  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
  ret <1 x double> %res
}

; For v1i64, it's not clear which of the vector or scalar compare is better.
; Let's stick to the vector form, like for all other vector selects fed by a
; scalar setcc.  If anything, it exposes more ILP.
define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_3:
; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
  %tst = icmp eq i64 %lhs, %rhs
  %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
  %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
  ret <1 x i64> %res
}

define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
; CHECK-LABEL: test_br_extr_cmp:
; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
  %1 = icmp eq <1 x i64> %v1, %v2
  %2 = extractelement <1 x i1> %1, i32 0
  br i1 %2, label %if.end, label %if.then

if.then:
  ret i32 0;

if.end:
  ret i32 1;
}
ARM64: handle v1i1 types arising from setcc properly. There were several overlapping problems here, and this solution is closely inspired by the one adopted in AArch64 in r201381. Firstly, scalarisation of v1i1 setcc operations simply fails if the input types are legal. This is fixed in LegalizeVectorTypes.cpp this time, and allows AArch64 code to be simplified slightly. Second, vselect with such a setcc feeding into it ends up in ScalarizeVectorOperand, where it's not handled. I experimented with an implementation, but found that whatever DAG came out was rather horrific. I think Hao's DAG combine approach is a good one for quality, though there are edge cases it won't catch (to be fixed separately). Should fix PR19335. llvm-svn: 205625 2014-04-04 22:49:21 +08:00			`; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu \| FileCheck %s`

			`; This is the analogue of AArch64's file of the same name. It's mostly testing`
			`; some form of correct lowering occurs, the tests are a little artificial but I`
			`; strongly suspect there's room for improved CodeGen (FIXME).`

			`define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {`
			`; CHECK-LABEL: test_sext_extr_cmp_0:`
			`; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}`
TableGen: use PrintMethods to print more aliases llvm-svn: 208607 2014-05-13 02:04:06 +08:00			`; CHECK: cset`
ARM64: handle v1i1 types arising from setcc properly. There were several overlapping problems here, and this solution is closely inspired by the one adopted in AArch64 in r201381. Firstly, scalarisation of v1i1 setcc operations simply fails if the input types are legal. This is fixed in LegalizeVectorTypes.cpp this time, and allows AArch64 code to be simplified slightly. Second, vselect with such a setcc feeding into it ends up in ScalarizeVectorOperand, where it's not handled. I experimented with an implementation, but found that whatever DAG came out was rather horrific. I think Hao's DAG combine approach is a good one for quality, though there are edge cases it won't catch (to be fixed separately). Should fix PR19335. llvm-svn: 205625 2014-04-04 22:49:21 +08:00			`%1 = icmp sge <1 x i64> %v1, %v2`
			`%2 = extractelement <1 x i1> %1, i32 0`
			`%vget_lane = sext i1 %2 to i64`
			`ret i64 %vget_lane`
			`}`

			`define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {`
			`; CHECK-LABEL: test_sext_extr_cmp_1:`
			`; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}`
			`%1 = fcmp oeq <1 x double> %v1, %v2`
			`%2 = extractelement <1 x i1> %1, i32 0`
			`%vget_lane = sext i1 %2 to i64`
			`ret i64 %vget_lane`
			`}`

			`define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {`
			`; CHECK-LABEL: test_select_v1i1_0:`
			`; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}`
			`; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b`
			`%1 = icmp eq <1 x i64> %v1, %v2`
			`%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3`
			`ret <1 x i64> %res`
			`}`

			`define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {`
			`; CHECK-LABEL: test_select_v1i1_1:`
			`; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}`
			`; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b`
			`%1 = fcmp oeq <1 x double> %v1, %v2`
			`%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3`
			`ret <1 x i64> %res`
			`}`

			`define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {`
			`; CHECK-LABEL: test_select_v1i1_2:`
			`; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}`
			`; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b`
			`%1 = icmp eq <1 x i64> %v1, %v2`
			`%res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3`
			`ret <1 x double> %res`
			`}`

[AArch64] Also combine vector selects fed by non-i1 SETCCs. After legalization, scalar SETCC has an i32 result type on AArch64. The i1 requirement seems too conservative, replace it with an assert. This also means that we now can run after legalization. That should also be fine, since the ops legalizer runs again after each combine, and all types created all have the same sizes as the (legal) inputs. Exposed by r235917; while there, robustize its tests (bsl also uses the register it defines). llvm-svn: 235922 2015-04-28 05:43:12 +08:00			`; For v1i64, it's not clear which of the vector or scalar compare is better.`
			`; Let's stick to the vector form, like for all other vector selects fed by a`
			`; scalar setcc. If anything, it exposes more ILP.`
DAGLegalize: add last-ditch type-legalization for VSELECT. When LLVM sees something like (v1iN (vselect v1i1, v1iN, v1iN)) it can decide that the result is OK (v1i64 is legal on AArch64, for example) but it still need scalarising because of that v1i1. There was no code to do this though. AArch64 and ARM64 have DAG combines to produce efficient code and prevent that occuring in most such situations, but there are edge cases that they miss. This adds a legalization to cope with that. llvm-svn: 205626 2014-04-04 22:49:30 +08:00			`define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {`
			`; CHECK-LABEL: test_select_v1i1_3:`
[AArch64] Also combine vector selects fed by non-i1 SETCCs. After legalization, scalar SETCC has an i32 result type on AArch64. The i1 requirement seems too conservative, replace it with an assert. This also means that we now can run after legalization. That should also be fine, since the ops legalizer runs again after each combine, and all types created all have the same sizes as the (legal) inputs. Exposed by r235917; while there, robustize its tests (bsl also uses the register it defines). llvm-svn: 235922 2015-04-28 05:43:12 +08:00			`; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}`
			`; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}`
			`; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}`
			`; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b`
DAGLegalize: add last-ditch type-legalization for VSELECT. When LLVM sees something like (v1iN (vselect v1i1, v1iN, v1iN)) it can decide that the result is OK (v1i64 is legal on AArch64, for example) but it still need scalarising because of that v1i1. There was no code to do this though. AArch64 and ARM64 have DAG combines to produce efficient code and prevent that occuring in most such situations, but there are edge cases that they miss. This adds a legalization to cope with that. llvm-svn: 205626 2014-04-04 22:49:30 +08:00			`%tst = icmp eq i64 %lhs, %rhs`
			`%evil = insertelement <1 x i1> undef, i1 %tst, i32 0`
			`%res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3`
			`ret <1 x i64> %res`
			`}`

ARM64: handle v1i1 types arising from setcc properly. There were several overlapping problems here, and this solution is closely inspired by the one adopted in AArch64 in r201381. Firstly, scalarisation of v1i1 setcc operations simply fails if the input types are legal. This is fixed in LegalizeVectorTypes.cpp this time, and allows AArch64 code to be simplified slightly. Second, vselect with such a setcc feeding into it ends up in ScalarizeVectorOperand, where it's not handled. I experimented with an implementation, but found that whatever DAG came out was rather horrific. I think Hao's DAG combine approach is a good one for quality, though there are edge cases it won't catch (to be fixed separately). Should fix PR19335. llvm-svn: 205625 2014-04-04 22:49:21 +08:00			`define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {`
			`; CHECK-LABEL: test_br_extr_cmp:`
			`; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}`
			`%1 = icmp eq <1 x i64> %v1, %v2`
			`%2 = extractelement <1 x i1> %1, i32 0`
			`br i1 %2, label %if.end, label %if.then`

			`if.then:`
			`ret i32 0;`

			`if.end:`
			`ret i32 1;`
			`}`