llvm-project/llvm/test/CodeGen/PowerPC/float-vector-gather.ll

; NOTE: This test ensures that, for both Big and Little Endian cases, a set of
; NOTE: 4 floats is gathered into a v4f32 register using xxmrghw and xxmrgld
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck %s -check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
; RUN: | FileCheck %s -check-prefix=CHECK-BE
define dso_local <4 x float> @vector_gatherf(float* nocapture readonly %a,
float* nocapture readonly %b, float* nocapture readonly %c,
float* nocapture readonly %d) {
; C code from which this IR test case was generated:
; vector float test(float *a, float *b, float *c, float *d) {
;  return (vector float) { *a, *b, *c, *d };
; }
; CHECK-LE-LABEL: vector_gatherf:
; CHECK-LE:       # %bb.0: # %entry
; CHECK-LE-DAG:    lfiwzx f[[REG0:[0-9]+]], 0, r6
; CHECK-LE-DAG:    lfiwzx f[[REG1:[0-9]+]], 0, r5
; CHECK-LE-DAG:    lfiwzx f[[REG2:[0-9]+]], 0, r4
; CHECK-LE-DAG:    lfiwzx f[[REG3:[0-9]+]], 0, r3
; CHECK-LE-DAG:    xxmrghw vs[[REG0]], vs[[REG0]], vs[[REG1]]
; CHECK-LE-DAG:    xxmrghw vs[[REG4:[0-9]+]], vs[[REG2]], vs[[REG3]]
; CHECK-LE-NEXT:   xxmrgld v[[REG:[0-9]+]], vs[[REG0]], vs[[REG4]]
; CHECK-LE-NEXT:   blr

; CHECK-BE-LABEL: vector_gatherf:
; CHECK-BE:       # %bb.0: # %entry
; CHECK-BE-DAG:    lfiwzx f[[REG0:[0-9]+]], 0, r3
; CHECK-BE-DAG:    lfiwzx f[[REG1:[0-9]+]], 0, r4
; CHECK-BE-DAG:    lfiwzx f[[REG2:[0-9]+]], 0, r5
; CHECK-BE-DAG:    lfiwzx f[[REG3:[0-9]+]], 0, r6
; CHECK-BE-DAG:    xxmrghw vs[[REG0]], vs[[REG0]], vs[[REG1]]
; CHECK-BE-DAG:    xxmrghw vs[[REG4:[0-9]+]], vs[[REG2]], vs[[REG3]]
; CHECK-BE-NEXT:   xxmrgld v[[REG:[0-9]+]], vs[[REG0]], vs[[REG4]]
; CHECK-BE-NEXT:   blr
entry:
  %0 = load float, float* %a, align 4
  %vecinit = insertelement <4 x float> undef, float %0, i32 0
  %1 = load float, float* %b, align 4
  %vecinit1 = insertelement <4 x float> %vecinit, float %1, i32 1
  %2 = load float, float* %c, align 4
  %vecinit2 = insertelement <4 x float> %vecinit1, float %2, i32 2
  %3 = load float, float* %d, align 4
  %vecinit3 = insertelement <4 x float> %vecinit2, float %3, i32 3
  ret <4 x float> %vecinit3
}
Test commit. 2020-03-19 21:34:48 +08:00			`; NOTE: This test ensures that, for both Big and Little Endian cases, a set of`
[PowerPC] Improve float vector gather codegen This patch aims to improve the code generation for float vector gather on POWER9. Patterns have been implemented to utilize instructions that deliver improved performance. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D62908 2019-11-19 05:50:44 +08:00			`; NOTE: 4 floats is gathered into a v4f32 register using xxmrghw and xxmrgld`
[PowerPC] Test case for vector float gather on ppc64le and ppc64 Test case to verify that the expected code is generated for a vector float gather based on the patterns in tablegen for big and little endian cases. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D69443 2019-11-19 03:08:22 +08:00			`; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \`
			`; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \`
			`; RUN: \| FileCheck %s -check-prefix=CHECK-LE`
			`; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \`
			`; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \`
			`; RUN: \| FileCheck %s -check-prefix=CHECK-BE`
			`define dso_local <4 x float> @vector_gatherf(float* nocapture readonly %a,`
			`float* nocapture readonly %b, float* nocapture readonly %c,`
			`float* nocapture readonly %d) {`
			`; C code from which this IR test case was generated:`
			`; vector float test(float a, float b, float c, float d) {`
			`; return (vector float) { a, b, c, d };`
			`; }`
			`; CHECK-LE-LABEL: vector_gatherf:`
			`; CHECK-LE: # %bb.0: # %entry`
[PowerPC] Improve float vector gather codegen This patch aims to improve the code generation for float vector gather on POWER9. Patterns have been implemented to utilize instructions that deliver improved performance. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D62908 2019-11-19 05:50:44 +08:00			`; CHECK-LE-DAG: lfiwzx f[[REG0:[0-9]+]], 0, r6`
			`; CHECK-LE-DAG: lfiwzx f[[REG1:[0-9]+]], 0, r5`
			`; CHECK-LE-DAG: lfiwzx f[[REG2:[0-9]+]], 0, r4`
			`; CHECK-LE-DAG: lfiwzx f[[REG3:[0-9]+]], 0, r3`
			`; CHECK-LE-DAG: xxmrghw vs[[REG0]], vs[[REG0]], vs[[REG1]]`
			`; CHECK-LE-DAG: xxmrghw vs[[REG4:[0-9]+]], vs[[REG2]], vs[[REG3]]`
			`; CHECK-LE-NEXT: xxmrgld v[[REG:[0-9]+]], vs[[REG0]], vs[[REG4]]`
[PowerPC] Test case for vector float gather on ppc64le and ppc64 Test case to verify that the expected code is generated for a vector float gather based on the patterns in tablegen for big and little endian cases. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D69443 2019-11-19 03:08:22 +08:00			`; CHECK-LE-NEXT: blr`

			`; CHECK-BE-LABEL: vector_gatherf:`
			`; CHECK-BE: # %bb.0: # %entry`
[PowerPC] Improve float vector gather codegen This patch aims to improve the code generation for float vector gather on POWER9. Patterns have been implemented to utilize instructions that deliver improved performance. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D62908 2019-11-19 05:50:44 +08:00			`; CHECK-BE-DAG: lfiwzx f[[REG0:[0-9]+]], 0, r3`
			`; CHECK-BE-DAG: lfiwzx f[[REG1:[0-9]+]], 0, r4`
			`; CHECK-BE-DAG: lfiwzx f[[REG2:[0-9]+]], 0, r5`
			`; CHECK-BE-DAG: lfiwzx f[[REG3:[0-9]+]], 0, r6`
			`; CHECK-BE-DAG: xxmrghw vs[[REG0]], vs[[REG0]], vs[[REG1]]`
			`; CHECK-BE-DAG: xxmrghw vs[[REG4:[0-9]+]], vs[[REG2]], vs[[REG3]]`
			`; CHECK-BE-NEXT: xxmrgld v[[REG:[0-9]+]], vs[[REG0]], vs[[REG4]]`
[PowerPC] Test case for vector float gather on ppc64le and ppc64 Test case to verify that the expected code is generated for a vector float gather based on the patterns in tablegen for big and little endian cases. Patch by: Kamau Bridgeman Differential Revision: https://reviews.llvm.org/D69443 2019-11-19 03:08:22 +08:00			`; CHECK-BE-NEXT: blr`
			`entry:`
			`%0 = load float, float* %a, align 4`
			`%vecinit = insertelement <4 x float> undef, float %0, i32 0`
			`%1 = load float, float* %b, align 4`
			`%vecinit1 = insertelement <4 x float> %vecinit, float %1, i32 1`
			`%2 = load float, float* %c, align 4`
			`%vecinit2 = insertelement <4 x float> %vecinit1, float %2, i32 2`
			`%3 = load float, float* %d, align 4`
			`%vecinit3 = insertelement <4 x float> %vecinit2, float %3, i32 3`
			`ret <4 x float> %vecinit3`
			`}`