llvm-project/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll

; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s

;; This test ensures that MergeConsecutiveStores does not attempt to
;; merge stores or loads when doing so would result in unaligned
;; memory operations (unless the target supports those, e.g. X86).

;; This issue happen in other situations for other targets, but PPC
;; with Altivec extensions was chosen for the test because it does not
;; support unaligned access with AltiVec instructions. If the 4
;; load/stores get merged to an v4i32 vector type severely bad code
;; gets generated: it painstakingly copies the values to a temporary
;; location on the stack, with vector ops, in order to then use
;; integer ops to load from the temporary stack location and store to
;; the final location. Yuck!

%struct.X = type { i32, i32, i32, i32 }

@fx = common global %struct.X zeroinitializer, align 4
@fy = common global %struct.X zeroinitializer, align 4

;; In this test case, lvx and stvx instructions should NOT be
;; generated, as the alignment is not sufficient for it to be
;; worthwhile.

;; CHECK-LABEL: f:
;; CHECK-DAG:      lwzu
;; CHECK-DAG:      stwu
;; CHECK-DAG: lwz
;; CHECK-DAG: lwz
;; CHECK-DAG: lwz
;; CHECK-DAG: stw
;; CHECK-DAG: stw
;; CHECK-DAG: stw
;; CHECK-NEXT: blr
define void @f() {
entry:
  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
  ret void
}

@gx = common global %struct.X zeroinitializer, align 16
@gy = common global %struct.X zeroinitializer, align 16

;; In this test, lvx and stvx instructions SHOULD be generated, as
;; the 16-byte alignment of the new load/store is acceptable.
;; CHECK-LABEL: g:
;; CHECK: lvx
;; CHECK: stvx
;; CHECK: blr
define void @g() {
entry:
  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
  ret void
}
PowerPC: Do not use llc -march in tests. `llc -march` is problematic because it only switches the target architecture, but leaves the operating system unchanged. This occasionally leads to indeterministic tests because the OS from LLVM_DEFAULT_TARGET_TRIPLE is used. However we can simply always use `llc -mtriple` instead. This changes all the tests to do this to avoid people using -march when they copy and paste parts of tests. This patch: - Removes -march if the .ll file already has a matching `target triple` directive or -mtriple argument. - In all other cases changes -march=ppc32/-march=ppc64 to -mtriple=ppc32--/-mtriple=ppc64-- See also the discussion in https://reviews.llvm.org/D35287 llvm-svn: 309754 2017-08-02 06:20:41 +08:00			`; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s \| FileCheck %s`
Fix alignment checks in MergeConsecutiveStores. 1) check whether the alignment of the memory is sufficient for the merged store or load to be efficient. Not doing so can result in some ridiculously poor code generation, if merging creates a vector operation which must be aligned but isn't. 2) DON'T check that the alignment of each load/store is equal. If you're merging 2 4-byte stores, the first might have 8-byte alignment, but the second certainly will have 4-byte alignment. We do want to allow those to be merged. llvm-svn: 236850 2015-05-08 21:47:01 +08:00
			`;; This test ensures that MergeConsecutiveStores does not attempt to`
			`;; merge stores or loads when doing so would result in unaligned`
			`;; memory operations (unless the target supports those, e.g. X86).`

			`;; This issue happen in other situations for other targets, but PPC`
			`;; with Altivec extensions was chosen for the test because it does not`
			`;; support unaligned access with AltiVec instructions. If the 4`
			`;; load/stores get merged to an v4i32 vector type severely bad code`
			`;; gets generated: it painstakingly copies the values to a temporary`
			`;; location on the stack, with vector ops, in order to then use`
			`;; integer ops to load from the temporary stack location and store to`
			`;; the final location. Yuck!`

			`%struct.X = type { i32, i32, i32, i32 }`

			`@fx = common global %struct.X zeroinitializer, align 4`
			`@fy = common global %struct.X zeroinitializer, align 4`

			`;; In this test case, lvx and stvx instructions should NOT be`
			`;; generated, as the alignment is not sufficient for it to be`
			`;; worthwhile.`

			`;; CHECK-LABEL: f:`
[Power9] Code Cleanup - Remove needsAggressiveScheduling() As we already return true from needsAggressiveScheduling() for the most recent hardware it would be cleaner to just return true for all PowerPC hardware. Differential Revision: https://reviews.llvm.org/D48663 llvm-svn: 337488 2018-07-20 03:34:18 +08:00			`;; CHECK-DAG: lwzu`
			`;; CHECK-DAG: stwu`
			`;; CHECK-DAG: lwz`
			`;; CHECK-DAG: lwz`
			`;; CHECK-DAG: lwz`
			`;; CHECK-DAG: stw`
			`;; CHECK-DAG: stw`
			`;; CHECK-DAG: stw`
Fix alignment checks in MergeConsecutiveStores. 1) check whether the alignment of the memory is sufficient for the merged store or load to be efficient. Not doing so can result in some ridiculously poor code generation, if merging creates a vector operation which must be aligned but isn't. 2) DON'T check that the alignment of each load/store is equal. If you're merging 2 4-byte stores, the first might have 8-byte alignment, but the second certainly will have 4-byte alignment. We do want to allow those to be merged. llvm-svn: 236850 2015-05-08 21:47:01 +08:00			`;; CHECK-NEXT: blr`
			`define void @f() {`
			`entry:`
			`%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4`
			`%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4`
			`%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4`
			`%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4`
			`store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4`
			`store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4`
			`store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4`
			`store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4`
			`ret void`
			`}`

			`@gx = common global %struct.X zeroinitializer, align 16`
			`@gy = common global %struct.X zeroinitializer, align 16`

			`;; In this test, lvx and stvx instructions SHOULD be generated, as`
			`;; the 16-byte alignment of the new load/store is acceptable.`
			`;; CHECK-LABEL: g:`
			`;; CHECK: lvx`
			`;; CHECK: stvx`
			`;; CHECK: blr`
			`define void @g() {`
			`entry:`
			`%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16`
			`%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4`
			`%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4`
			`%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4`
			`store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16`
			`store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4`
			`store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4`
			`store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4`
			`ret void`
			`}`