forked from OSchip/llvm-project
[AArch64][SLP] Precommit tests which would be better not to SLP vectorize. NFC.
This commit is contained in:
parent
7793db35ca
commit
a2016dc887
|
@ -0,0 +1,108 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
; These examples correspond to input code like:
|
||||
;
|
||||
; void t(long * __restrict a, long * __restrict b) {
|
||||
; a[0] *= b[0];
|
||||
; a[1] *= b[1];
|
||||
; }
|
||||
;
|
||||
; If we SLP vectorise this then we end up with something like this because we
|
||||
; don't have a mul.2d:
|
||||
;
|
||||
; ldr q0, [x1]
|
||||
; ldr q1, [x0]
|
||||
; fmov x8, d0
|
||||
; mov x10, v0.d[1]
|
||||
; fmov x9, d1
|
||||
; mov x11, v1.d[1]
|
||||
; mul x8, x9, x8
|
||||
; mul x9, x11, x10
|
||||
; fmov d0, x8
|
||||
; mov v0.d[1], x9
|
||||
; str q0, [x0]
|
||||
; ret
|
||||
;
|
||||
; but if we don't SLP vectorise these examples we get this which is smaller
|
||||
; and faster:
|
||||
;
|
||||
; ldp x8, x9, [x1]
|
||||
; ldp x10, x11, [x0]
|
||||
; mul x9, x11, x9
|
||||
; mul x8, x10, x8
|
||||
; stp x8, x9, [x0]
|
||||
; ret
|
||||
;
|
||||
; FIXME: don't SLP vectorise this.
|
||||
|
||||
define void @mul(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
|
||||
; CHECK-LABEL: @mul(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[B]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[A]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <2 x i64> [[TMP3]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[A]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = load i64, i64* %b, align 8
|
||||
%1 = load i64, i64* %a, align 8
|
||||
%mul = mul nsw i64 %1, %0
|
||||
store i64 %mul, i64* %a, align 8
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
|
||||
%2 = load i64, i64* %arrayidx2, align 8
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
|
||||
%3 = load i64, i64* %arrayidx3, align 8
|
||||
%mul4 = mul nsw i64 %3, %2
|
||||
store i64 %mul4, i64* %arrayidx3, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Similar example, but now a multiply-accumulate:
|
||||
;
|
||||
; void x (long * __restrict a, long * __restrict b) {
|
||||
; a[0] *= b[0];
|
||||
; a[1] *= b[1];
|
||||
; a[0] += b[0];
|
||||
; a[1] += b[1];
|
||||
; }
|
||||
;
|
||||
define void @mac(i64* noalias nocapture %a, i64* noalias nocapture readonly %b) {
|
||||
; CHECK-LABEL: @mac(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[B]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[A]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <2 x i64> [[TMP3]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[A]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = load i64, i64* %b, align 8
|
||||
%1 = load i64, i64* %a, align 8
|
||||
%mul = mul nsw i64 %1, %0
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %b, i64 1
|
||||
%2 = load i64, i64* %arrayidx2, align 8
|
||||
%arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
|
||||
%3 = load i64, i64* %arrayidx3, align 8
|
||||
%mul4 = mul nsw i64 %3, %2
|
||||
%add = add nsw i64 %mul, %0
|
||||
store i64 %add, i64* %a, align 8
|
||||
%add9 = add nsw i64 %mul4, %2
|
||||
store i64 %add9, i64* %arrayidx3, align 8
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue