[AArch64] Fix scalar imm variants of SIMD shift left instructions

This issue was reported in PR50057: Cannot select:
t10: i64 = AArch64ISD::VSHL t2, Constant:i32<2>

Shift intrinsics (llvm.aarch64.neon.ushl.i64 and sshl) with a constant
shift operand are lowered into AArch64ISD::VSHL in tryCombineShiftImm.
VSHL has i64 and v1i64 patterns for a right shift, but only v1i64 for
a left shift.

This patch adds the missing i64 pattern for AArch64ISD::VSHL, and LIT
tests to cover scalar variants (i64 and v1i64) of all shift
intrinsics (only ushl and sshl cases fail without the patch, others
were just not covered).

Differential Revision: https://reviews.llvm.org/D101580
This commit is contained in:
Andrew Savonichev 2021-04-29 19:34:39 +03:00
parent 3ee826594a
commit 1ee50b4731
2 changed files with 371 additions and 2 deletions

View File

@ -8958,10 +8958,13 @@ multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
FPR64, FPR64, vecshiftL64, asm,
[(set (v1i64 FPR64:$Rd),
(OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
[(set (i64 FPR64:$Rd),
(OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
let Inst{21-16} = imm{5-0};
}
def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))),
(!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
}
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in

View File

@ -27,6 +27,40 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
ret <2 x i32> %tmp3
}
define <1 x i64> @sqshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: sqshl1d:
;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: sqshl1d_constant:
;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: sqshl_scalar:
;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: sqshl_scalar_constant:
;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uqshl8b:
;CHECK: uqshl.8b
@ -126,15 +160,52 @@ define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @uqshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: uqshl1d:
;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: uqshl1d_constant:
;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: uqshl_scalar:
;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: uqshl_scalar_constant:
;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@ -173,6 +244,44 @@ define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
ret <2 x i32> %tmp3
}
define <1 x i64> @srshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: srshl1d:
;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @srshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: srshl1d_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @srshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: srshl_scalar:
;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @srshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: srshl_scalar_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: urshl8b:
;CHECK: urshl.8b
@ -200,6 +309,44 @@ define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
ret <2 x i32> %tmp3
}
define <1 x i64> @urshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: urshl1d:
;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @urshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: urshl1d_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @urshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: urshl_scalar:
;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @urshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: urshl_scalar_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: srshl16b:
;CHECK: srshl.16b
@ -276,11 +423,13 @@ declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind rea
declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@ -382,6 +531,44 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @sqrshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: sqrshl1d:
;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @sqrshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: sqrshl1d_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqrshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: sqrshl_scalar:
;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @sqrshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: sqrshl_scalar_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uqrshl16b:
;CHECK: uqrshl.16b
@ -418,15 +605,55 @@ define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @uqrshl1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: uqrshl1d:
;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
define <1 x i64> @uqrshl1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: uqrshl1d_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @uqrshl_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: uqrshl_scalar:
;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load i64, i64* %A
%tmp2 = load i64, i64* %B
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
ret i64 %tmp3
}
define i64 @uqrshl_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: uqrshl_scalar_constant:
;CHECK: mov w[[GCONST:[0-9]+]], #1
;CHECK: fmov d[[DCONST:[0-9]+]], x[[GCONST]]
;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, d[[DCONST]]
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@ -494,6 +721,22 @@ define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @urshr1d(<1 x i64>* %A) nounwind {
;CHECK-LABEL: urshr1d:
;CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @urshr_scalar(i64* %A) nounwind {
;CHECK-LABEL: urshr_scalar:
;CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: srshr8b:
;CHECK: srshr.8b
@ -550,6 +793,22 @@ define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @srshr1d(<1 x i64>* %A) nounwind {
;CHECK-LABEL: srshr1d:
;CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
ret <1 x i64> %tmp3
}
define i64 @srshr_scalar(i64* %A) nounwind {
;CHECK-LABEL: srshr_scalar:
;CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
ret i64 %tmp3
}
define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sqshlu8b:
;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
@ -606,10 +865,27 @@ define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @sqshlu1d_constant(<1 x i64>* %A) nounwind {
;CHECK-LABEL: sqshlu1d_constant:
;CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @sqshlu_scalar_constant(i64* %A) nounwind {
;CHECK-LABEL: sqshlu_scalar_constant:
;CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@ -1196,6 +1472,8 @@ declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
define <8 x i16> @neon.ushll8h_constant_shift(<8 x i8>* %A) nounwind {
;CHECK-LABEL: neon.ushll8h_constant_shift
@ -1282,6 +1560,24 @@ define <2 x i64> @neon.ushll2d_constant_shift(<2 x i32>* %A) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @neon.ushl_vscalar_constant_shift(<1 x i32>* %A) nounwind {
;CHECK-LABEL: neon.ushl_vscalar_constant_shift
;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i32>, <1 x i32>* %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon.ushl_scalar_constant_shift(i32* %A) nounwind {
;CHECK-LABEL: neon.ushl_scalar_constant_shift
;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i32, i32* %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sshll8h:
;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
@ -1304,6 +1600,8 @@ declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
define <16 x i8> @neon.sshl16b_constant_shift(<16 x i8>* %A) nounwind {
;CHECK-LABEL: neon.sshl16b_constant_shift
@ -1400,6 +1698,24 @@ define <2 x i64> @neon.sshll2d_constant_shift(<2 x i32>* %A) nounwind {
ret <2 x i64> %tmp3
}
define <1 x i64> @neon.sshll_vscalar_constant_shift(<1 x i32>* %A) nounwind {
;CHECK-LABEL: neon.sshll_vscalar_constant_shift
;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i32>, <1 x i32>* %A
%tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
%tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
ret <1 x i64> %tmp3
}
define i64 @neon.sshll_scalar_constant_shift(i32* %A) nounwind {
;CHECK-LABEL: neon.sshll_scalar_constant_shift
;CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i32, i32* %A
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
ret i64 %tmp3
}
; FIXME: should be constant folded.
define <2 x i64> @neon.sshl2d_constant_fold() nounwind {
;CHECK-LABEL: neon.sshl2d_constant_fold
@ -1637,6 +1953,26 @@ define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp5
}
define <1 x i64> @ursra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: ursra1d:
;CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, <1 x i64>* %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @ursra_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: ursra_scalar:
;CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, i64* %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: srsra8b:
;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
@ -1707,6 +2043,26 @@ define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp5
}
define <1 x i64> @srsra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: srsra1d:
;CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
%tmp4 = load <1 x i64>, <1 x i64>* %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define i64 @srsra_scalar(i64* %A, i64* %B) nounwind {
;CHECK-LABEL: srsra_scalar:
;CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load i64, i64* %A
%tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
%tmp4 = load i64, i64* %B
%tmp5 = add i64 %tmp3, %tmp4
ret i64 %tmp5
}
define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: usra8b:
;CHECK: usra.8b v0, {{v[0-9]+}}, #1
@ -1777,6 +2133,16 @@ define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
ret <2 x i64> %tmp5
}
define <1 x i64> @usra1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: usra1d:
;CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #1
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp3 = lshr <1 x i64> %tmp1, <i64 1>
%tmp4 = load <1 x i64>, <1 x i64>* %B
%tmp5 = add <1 x i64> %tmp3, %tmp4
ret <1 x i64> %tmp5
}
define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: ssra8b:
;CHECK: ssra.8b v0, {{v[0-9]+}}, #1