forked from OSchip/llvm-project
[AArch64] Re-add patterns for (s/u)mull2.
These patterns were added in D46009, but removed in D54276 due to missing test coverage. Differential Revision: https://reviews.llvm.org/D69831
This commit is contained in:
parent
eadb65f273
commit
35cf9a1fc5
|
@ -4441,6 +4441,25 @@ defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
|
|||
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
|
||||
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
|
||||
|
||||
// Patterns for smull2/umull2.
|
||||
multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
|
||||
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
|
||||
def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
|
||||
(extract_high_v16i8 V128:$Rm))),
|
||||
(INST8B V128:$Rn, V128:$Rm)>;
|
||||
def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
|
||||
(extract_high_v8i16 V128:$Rm))),
|
||||
(INST4H V128:$Rn, V128:$Rm)>;
|
||||
def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
|
||||
(extract_high_v4i32 V128:$Rm))),
|
||||
(INST2S V128:$Rn, V128:$Rm)>;
|
||||
}
|
||||
|
||||
defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
|
||||
SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
|
||||
defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
|
||||
UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
|
||||
|
||||
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
|
||||
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
|
||||
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
|
||||
|
|
|
@ -327,6 +327,66 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define <16 x i16> @umull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
|
||||
; CHECK-LABEL: umull2_i8:
|
||||
; CHECK-DAG: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK-DAG: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%arg1_ext = zext <16 x i8> %arg1 to <16 x i16>
|
||||
%arg2_ext = zext <16 x i8> %arg2 to <16 x i16>
|
||||
%mul = mul <16 x i16> %arg1_ext, %arg2_ext
|
||||
ret <16 x i16> %mul
|
||||
}
|
||||
|
||||
define <16 x i16> @smull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) {
|
||||
; CHECK-LABEL: smull2_i8:
|
||||
; CHECK-DAG: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK-DAG: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%arg1_ext = sext <16 x i8> %arg1 to <16 x i16>
|
||||
%arg2_ext = sext <16 x i8> %arg2 to <16 x i16>
|
||||
%mul = mul <16 x i16> %arg1_ext, %arg2_ext
|
||||
ret <16 x i16> %mul
|
||||
}
|
||||
|
||||
define <8 x i32> @umull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
|
||||
; CHECK-LABEL: umull2_i16:
|
||||
; CHECK-DAG: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
||||
; CHECK-DAG: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
%arg1_ext = zext <8 x i16> %arg1 to <8 x i32>
|
||||
%arg2_ext = zext <8 x i16> %arg2 to <8 x i32>
|
||||
%mul = mul <8 x i32> %arg1_ext, %arg2_ext
|
||||
ret <8 x i32> %mul
|
||||
}
|
||||
|
||||
define <8 x i32> @smull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
|
||||
; CHECK-LABEL: smull2_i16:
|
||||
; CHECK-DAG: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
|
||||
; CHECK-DAG: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
|
||||
%arg1_ext = sext <8 x i16> %arg1 to <8 x i32>
|
||||
%arg2_ext = sext <8 x i16> %arg2 to <8 x i32>
|
||||
%mul = mul <8 x i32> %arg1_ext, %arg2_ext
|
||||
ret <8 x i32> %mul
|
||||
}
|
||||
|
||||
define <4 x i64> @umull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
|
||||
; CHECK-LABEL: umull2_i32:
|
||||
; CHECK-DAG: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-DAG: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
%arg1_ext = zext <4 x i32> %arg1 to <4 x i64>
|
||||
%arg2_ext = zext <4 x i32> %arg2 to <4 x i64>
|
||||
%mul = mul <4 x i64> %arg1_ext, %arg2_ext
|
||||
ret <4 x i64> %mul
|
||||
}
|
||||
|
||||
define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
|
||||
; CHECK-LABEL: smull2_i32:
|
||||
; CHECK-DAG: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
; CHECK-DAG: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
|
||||
%arg1_ext = sext <4 x i32> %arg1 to <4 x i64>
|
||||
%arg2_ext = sext <4 x i32> %arg2 to <4 x i64>
|
||||
%mul = mul <4 x i64> %arg1_ext, %arg2_ext
|
||||
ret <4 x i64> %mul
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly
|
||||
|
||||
declare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
|
||||
|
|
Loading…
Reference in New Issue