llvm-project/llvm/test/CodeGen/PowerPC/MergeConsecutiveStores.ll

; RUN: llc -verify-machineinstrs -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s

;; This test ensures that MergeConsecutiveStores does not attempt to
;; merge stores or loads when doing so would result in unaligned
;; memory operations (unless the target supports those, e.g. X86).

;; This issue happen in other situations for other targets, but PPC
;; with Altivec extensions was chosen for the test because it does not
;; support unaligned access with AltiVec instructions. If the 4
;; load/stores get merged to an v4i32 vector type severely bad code
;; gets generated: it painstakingly copies the values to a temporary
;; location on the stack, with vector ops, in order to then use
;; integer ops to load from the temporary stack location and store to
;; the final location. Yuck!

%struct.X = type { i32, i32, i32, i32 }

@fx = common global %struct.X zeroinitializer, align 4
@fy = common global %struct.X zeroinitializer, align 4

;; In this test case, lvx and stvx instructions should NOT be
;; generated, as the alignment is not sufficient for it to be
;; worthwhile.

;; CHECK-LABEL: f:
;; CHECK:      lwzu
;; CHECK-NEXT: lwz
;; CHECK-NEXT: lwz
;; CHECK-NEXT: lwz
;; CHECK-NEXT: stwu
;; CHECK-NEXT: stw
;; CHECK-NEXT: stw
;; CHECK-NEXT: stw
;; CHECK-NEXT: blr
define void @f() {
entry:
  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
  ret void
}

@gx = common global %struct.X zeroinitializer, align 16
@gy = common global %struct.X zeroinitializer, align 16

;; In this test, lvx and stvx instructions SHOULD be generated, as
;; the 16-byte alignment of the new load/store is acceptable.
;; CHECK-LABEL: g:
;; CHECK: lvx
;; CHECK: stvx
;; CHECK: blr
define void @g() {
entry:
  %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
  %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
  %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
  %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
  store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
  store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
  store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
  store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
  ret void
}
Adding -verify-machineinstrs option to PowerPC tests Currently we have a number of tests that fail with -verify-machineinstrs. To detect this cases earlier we add the option to the testcases with the exception of tests that will currently fail with this option. PR 27456 keeps track of this failures. No code review, as discussed with Hal Finkel. llvm-svn: 277624 2016-08-04 02:17:35 +08:00			`; RUN: llc -verify-machineinstrs -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s \| FileCheck %s`
Fix alignment checks in MergeConsecutiveStores. 1) check whether the alignment of the memory is sufficient for the merged store or load to be efficient. Not doing so can result in some ridiculously poor code generation, if merging creates a vector operation which must be aligned but isn't. 2) DON'T check that the alignment of each load/store is equal. If you're merging 2 4-byte stores, the first might have 8-byte alignment, but the second certainly will have 4-byte alignment. We do want to allow those to be merged. llvm-svn: 236850 2015-05-08 21:47:01 +08:00
			`;; This test ensures that MergeConsecutiveStores does not attempt to`
			`;; merge stores or loads when doing so would result in unaligned`
			`;; memory operations (unless the target supports those, e.g. X86).`

			`;; This issue happen in other situations for other targets, but PPC`
			`;; with Altivec extensions was chosen for the test because it does not`
			`;; support unaligned access with AltiVec instructions. If the 4`
			`;; load/stores get merged to an v4i32 vector type severely bad code`
			`;; gets generated: it painstakingly copies the values to a temporary`
			`;; location on the stack, with vector ops, in order to then use`
			`;; integer ops to load from the temporary stack location and store to`
			`;; the final location. Yuck!`

			`%struct.X = type { i32, i32, i32, i32 }`

			`@fx = common global %struct.X zeroinitializer, align 4`
			`@fy = common global %struct.X zeroinitializer, align 4`

			`;; In this test case, lvx and stvx instructions should NOT be`
			`;; generated, as the alignment is not sufficient for it to be`
			`;; worthwhile.`

			`;; CHECK-LABEL: f:`
			`;; CHECK: lwzu`
			`;; CHECK-NEXT: lwz`
			`;; CHECK-NEXT: lwz`
			`;; CHECK-NEXT: lwz`
			`;; CHECK-NEXT: stwu`
			`;; CHECK-NEXT: stw`
			`;; CHECK-NEXT: stw`
			`;; CHECK-NEXT: stw`
			`;; CHECK-NEXT: blr`
			`define void @f() {`
			`entry:`
			`%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4`
			`%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4`
			`%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4`
			`%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4`
			`store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4`
			`store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4`
			`store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4`
			`store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4`
			`ret void`
			`}`

			`@gx = common global %struct.X zeroinitializer, align 16`
			`@gy = common global %struct.X zeroinitializer, align 16`

			`;; In this test, lvx and stvx instructions SHOULD be generated, as`
			`;; the 16-byte alignment of the new load/store is acceptable.`
			`;; CHECK-LABEL: g:`
			`;; CHECK: lvx`
			`;; CHECK: stvx`
			`;; CHECK: blr`
			`define void @g() {`
			`entry:`
			`%0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16`
			`%1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4`
			`%2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4`
			`%3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4`
			`store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16`
			`store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4`
			`store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4`
			`store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4`
			`ret void`
			`}`