llvm-project/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll

; RUN: llc -mcpu cortex-a53 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"
target triple = "aarch64--linux-gnu"

declare void @f(i8*, i8*)
declare void @f2(i8*, i8*)
declare void @_Z5setupv()
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3

define i32 @main() local_unnamed_addr #1 {
; Make sure the stores happen in the correct order (the exact instructions could change).
; CHECK-LABEL: main:
; CHECK: stp xzr, xzr, [sp, #72]
; CHECK: str w9, [sp, #80]
; CHECK: str q0, [sp, #48]
; CHECK: ldr w8, [sp, #48]
; CHECK: str q0, [sp, #64]

for.body.lr.ph.i.i.i.i.i.i63:
  %b1 = alloca [10 x i32], align 16
  %x0 = bitcast [10 x i32]* %b1 to i8*
  %b2 = alloca [10 x i32], align 16
  %x1 = bitcast [10 x i32]* %b2 to i8*
  tail call void @_Z5setupv()
  %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6
  %x3 = bitcast i32* %x2 to i8*
  call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false)
  %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0
  %x4 = bitcast [10 x i32]* %b1 to <4 x i32>*
  store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %x4, align 16
  %incdec.ptr.i7.i.i.i.i.i.i64.3 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 4
  %x5 = bitcast i32* %incdec.ptr.i7.i.i.i.i.i.i64.3 to <4 x i32>*
  store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %x5, align 16
  %incdec.ptr.i7.i.i.i.i.i.i64.7 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 8
  store i32 1, i32* %incdec.ptr.i7.i.i.i.i.i.i64.7, align 16
  %x6 = load i32, i32* %arraydecay2, align 16
  %cmp6 = icmp eq i32 %x6, 1
  br i1 %cmp6, label %for.inc, label %if.then

for.inc:
  call void @f(i8* %x0, i8* %x1)
  ret i32 0

if.then:
  call void @f2(i8* %x0, i8* %x1)
  ret i32 0
}
[AArch64LoadStoreOptimizer] Check aliasing correctly when creating paired loads/stores. The existing code accidentally skipped the aliasing check in edge cases. Differential revision: https://reviews.llvm.org/D23372 llvm-svn: 278562 2016-08-13 04:39:51 +08:00			`; RUN: llc -mcpu cortex-a53 < %s \| FileCheck %s`
			`target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"`
			`target triple = "aarch64--linux-gnu"`

			`declare void @f(i8, i8)`
			`declare void @f2(i8, i8)`
			`declare void @_Z5setupv()`
			`declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3`

			`define i32 @main() local_unnamed_addr #1 {`
			`; Make sure the stores happen in the correct order (the exact instructions could change).`
			`; CHECK-LABEL: main:`
[AArch64] Split 0 vector stores into scalar store pairs. Summary: Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The load store optimizer pass will merge them to store pair stores. This should be better than a movi to create the vector zero followed by a vector store if the zero constant is not re-used, since one instructions and one register live range will be removed. For example, the final generated code should be: stp xzr, xzr, [x0] instead of: movi v0.2d, #0 str q0, [x0] Reviewers: t.p.northover, mcrosier, MatzeB, jmolloy Subscribers: aemerson, rengolin, llvm-commits Differential Revision: https://reviews.llvm.org/D26561 llvm-svn: 286875 2016-11-15 03:39:04 +08:00			`; CHECK: stp xzr, xzr, [sp, #72]`
			`; CHECK: str w9, [sp, #80]`
[AArch64LoadStoreOptimizer] Check aliasing correctly when creating paired loads/stores. The existing code accidentally skipped the aliasing check in edge cases. Differential revision: https://reviews.llvm.org/D23372 llvm-svn: 278562 2016-08-13 04:39:51 +08:00			`; CHECK: str q0, [sp, #48]`
			`; CHECK: ldr w8, [sp, #48]`
			`; CHECK: str q0, [sp, #64]`

			`for.body.lr.ph.i.i.i.i.i.i63:`
			`%b1 = alloca [10 x i32], align 16`
			`%x0 = bitcast [10 x i32]* %b1 to i8*`
			`%b2 = alloca [10 x i32], align 16`
			`%x1 = bitcast [10 x i32]* %b2 to i8*`
			`tail call void @_Z5setupv()`
			`%x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6`
			`%x3 = bitcast i32* %x2 to i8*`
			`call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false)`
			`%arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0`
			`%x4 = bitcast [10 x i32]* %b1 to <4 x i32>*`
			`store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %x4, align 16`
			`%incdec.ptr.i7.i.i.i.i.i.i64.3 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 4`
			`%x5 = bitcast i32* %incdec.ptr.i7.i.i.i.i.i.i64.3 to <4 x i32>*`
			`store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>* %x5, align 16`
			`%incdec.ptr.i7.i.i.i.i.i.i64.7 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 8`
			`store i32 1, i32* %incdec.ptr.i7.i.i.i.i.i.i64.7, align 16`
			`%x6 = load i32, i32* %arraydecay2, align 16`
			`%cmp6 = icmp eq i32 %x6, 1`
			`br i1 %cmp6, label %for.inc, label %if.then`

			`for.inc:`
			`call void @f(i8* %x0, i8* %x1)`
			`ret i32 0`

			`if.then:`
			`call void @f2(i8* %x0, i8* %x1)`
			`ret i32 0`
			`}`