forked from OSchip/llvm-project
[AArch64] Add patterns for FMCLA*_indexed.
This patch adds patterns for the indexed variants of FCMLA. Mostly based on a patch by Tim Northover. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D92947
This commit is contained in:
parent
23b4198652
commit
d38a0258a5
|
@ -951,6 +951,7 @@ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
|
|||
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
|
||||
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasComplxNum, HasNEON] in {
|
||||
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
|
||||
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
|
||||
|
@ -975,14 +976,34 @@ multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
|
|||
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
|
||||
}
|
||||
|
||||
multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> {
|
||||
def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
|
||||
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
|
||||
def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
|
||||
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
|
||||
def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
|
||||
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
|
||||
def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
|
||||
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
|
||||
}
|
||||
|
||||
|
||||
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
|
||||
defm : FCMLA_PATS<v4f16, V64>;
|
||||
defm : FCMLA_PATS<v8f16, V128>;
|
||||
|
||||
defm : FCMLA_LANE_PATS<v4f16, V64,
|
||||
(v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
|
||||
defm : FCMLA_LANE_PATS<v8f16, V128,
|
||||
(v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
|
||||
}
|
||||
let Predicates = [HasComplxNum, HasNEON] in {
|
||||
defm : FCMLA_PATS<v2f32, V64>;
|
||||
defm : FCMLA_PATS<v4f32, V128>;
|
||||
defm : FCMLA_PATS<v2f64, V128>;
|
||||
|
||||
defm : FCMLA_LANE_PATS<v4f32, V128,
|
||||
(v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
|
||||
}
|
||||
|
||||
// v8.3a Pointer Authentication
|
||||
|
|
|
@ -9,6 +9,17 @@ entry:
|
|||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_16x4_lane_1
|
||||
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[1], #0
|
||||
;
|
||||
%c.cast = bitcast <4 x half> %c to <2 x i32>
|
||||
%c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
|
||||
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
entry:
|
||||
|
@ -19,6 +30,18 @@ entry:
|
|||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot90_16x4_lane_0
|
||||
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #90
|
||||
;
|
||||
%c.cast = bitcast <4 x half> %c to <2 x i32>
|
||||
%c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> <i32 0, i32 0>
|
||||
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
|
||||
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot180_16x4
|
||||
|
@ -28,6 +51,18 @@ entry:
|
|||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot180_16x4_lane_0
|
||||
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #180
|
||||
|
||||
%c.cast = bitcast <8 x half> %c to <4 x i32>
|
||||
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> <i32 0, i32 0>
|
||||
%c.res = bitcast <2 x i32> %c.dup to <4 x half>
|
||||
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res)
|
||||
ret <4 x half> %res
|
||||
}
|
||||
|
||||
define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot270_16x4
|
||||
|
@ -82,6 +117,18 @@ entry:
|
|||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_16x8_lane_0
|
||||
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #0
|
||||
;
|
||||
%c.cast = bitcast <8 x half> %c to <4 x i32>
|
||||
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
||||
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
|
||||
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot90_16x8
|
||||
|
@ -91,6 +138,18 @@ entry:
|
|||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot90_16x8_lane_1
|
||||
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #90
|
||||
;
|
||||
%c.cast = bitcast <8 x half> %c to <4 x i32>
|
||||
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
|
||||
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot180_16x8
|
||||
|
@ -100,6 +159,18 @@ entry:
|
|||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot180_16x8_lane_1
|
||||
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #180
|
||||
;
|
||||
%c.cast = bitcast <8 x half> %c to <4 x i32>
|
||||
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
|
||||
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot270_16x8
|
||||
|
@ -109,6 +180,18 @@ entry:
|
|||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot270_16x8_lane_0
|
||||
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #270
|
||||
;
|
||||
%c.cast = bitcast <8 x half> %c to <4 x i32>
|
||||
%c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
|
||||
%c.res = bitcast <4 x i32> %c.dup to <8 x half>
|
||||
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c.res)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_32x4
|
||||
|
@ -118,6 +201,18 @@ entry:
|
|||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_32x4_lane_0
|
||||
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0], #0
|
||||
;
|
||||
%c.cast = bitcast <4 x float> %c to <2 x i64>
|
||||
%c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> <i32 0, i32 0>
|
||||
%c.res = bitcast <2 x i64> %c.dup to <4 x float>
|
||||
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c.res)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test_rot90_32x4
|
||||
|
|
Loading…
Reference in New Issue