[PowerPC][Power10] Exploit store rightmost vector element instructions

Using the store rightmost vector element instructions to do vector
element extraction and store. The rightmost vector element on little
endian is the zeroth vector element, with these patterns that element
can be extracted and stored in one instruction for all vector types.

Differential Revision: https://reviews.llvm.org/D89195
This commit is contained in:
Kamau Bridgeman 2020-12-22 12:04:57 -05:00 committed by Albion Fung
parent 9a7895dc20
commit 8a58f21f5b
3 changed files with 277 additions and 118 deletions

View File

@ -2554,16 +2554,21 @@ let Predicates = [IsISA3_1, HasVSX] in {
(COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>;
}
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
(STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src),
(STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src),
(STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src),
(STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
}
let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
// Store element 0 of a VSX register to memory
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$src, 0)), xoaddr:$dst),
(STXVRBX (COPY_TO_REGCLASS v16i8:$src, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$src, 0)), xoaddr:$dst),
(STXVRHX (COPY_TO_REGCLASS v8i16:$src, VSRC), xoaddr:$dst)>;
def : Pat<(store (i32 (extractelt v4i32:$src, 0)), xoaddr:$dst),
(STXVRWX (COPY_TO_REGCLASS v4i32:$src, VSRC), xoaddr:$dst)>;
def : Pat<(store (f32 (extractelt v4f32:$src, 0)), xoaddr:$dst),
(STXVRWX (COPY_TO_REGCLASS v4f32:$src, VSRC), xoaddr:$dst)>;
def : Pat<(store (i64 (extractelt v2i64:$src, 0)), xoaddr:$dst),
(STXVRDX (COPY_TO_REGCLASS v2i64:$src, VSRC), xoaddr:$dst)>;
def : Pat<(store (f64 (extractelt v2f64:$src, 0)), xoaddr:$dst),
(STXVRDX (COPY_TO_REGCLASS v2f64:$src, VSRC), xoaddr:$dst)>;
}
class xxevalPattern <dag pattern, bits<8> imm> :
Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}

View File

@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-O0
; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-O0
; These test cases aims to test the builtins for the Power10 VSX vector
; instructions introduced in ISA 3.1.
@ -22,14 +22,6 @@ define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
; CHECK-NEXT: srwi r3, r3, 31
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xvtlsbb cr0, v2
; CHECK-O0-NEXT: mfocrf r3, 128
; CHECK-O0-NEXT: srwi r3, r3, 31
; CHECK-O0-NEXT: extsw r3, r3
; CHECK-O0-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1)
ret i32 %0
@ -43,24 +35,22 @@ define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xvtlsbb cr0, v2
; CHECK-O0-NEXT: mfocrf r3, 128
; CHECK-O0-NEXT: rlwinm r3, r3, 3, 31, 31
; CHECK-O0-NEXT: extsw r3, r3
; CHECK-O0-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
ret i32 %0
}
define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_sc:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stxvrbx v2, r6, r5
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_sc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrbx v2, r6, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_sc:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9
; CHECK-BE-NEXT: stxsibx v2, r6, r5
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_sc:
; CHECK-O0: # %bb.0: # %entry
@ -79,10 +69,16 @@ entry:
}
define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_uc:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stxvrbx v2, r6, r5
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_uc:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrbx v2, r6, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_uc:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9
; CHECK-BE-NEXT: stxsibx v2, r6, r5
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_uc:
; CHECK-O0: # %bb.0: # %entry
@ -101,11 +97,18 @@ entry:
}
define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_ss:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 1
; CHECK-NEXT: stxvrhx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_ss:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 1
; CHECK-LE-NEXT: stxvrhx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_ss:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: sldi r3, r5, 1
; CHECK-BE-NEXT: stxsihx v2, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ss:
; CHECK-O0: # %bb.0: # %entry
@ -125,11 +128,18 @@ entry:
}
define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_us:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 1
; CHECK-NEXT: stxvrhx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_us:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 1
; CHECK-LE-NEXT: stxvrhx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_us:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: sldi r3, r5, 1
; CHECK-BE-NEXT: stxsihx v2, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_us:
; CHECK-O0: # %bb.0: # %entry
@ -149,11 +159,18 @@ entry:
}
define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_si:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 2
; CHECK-NEXT: stxvrwx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_si:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 2
; CHECK-LE-NEXT: stxvrwx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_si:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: sldi r3, r5, 2
; CHECK-BE-NEXT: stfiwx f0, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_si:
; CHECK-O0: # %bb.0: # %entry
@ -173,11 +190,18 @@ entry:
}
define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_ui:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 2
; CHECK-NEXT: stxvrwx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_ui:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 2
; CHECK-LE-NEXT: stxvrwx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_ui:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: sldi r3, r5, 2
; CHECK-BE-NEXT: stfiwx f0, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ui:
; CHECK-O0: # %bb.0: # %entry
@ -197,11 +221,17 @@ entry:
}
define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_sll:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 3
; CHECK-NEXT: stxvrdx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_sll:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 3
; CHECK-LE-NEXT: stxvrdx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_sll:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: sldi r3, r5, 3
; CHECK-BE-NEXT: stxsdx v2, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_sll:
; CHECK-O0: # %bb.0: # %entry
@ -219,11 +249,17 @@ entry:
}
define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
; CHECK-LABEL: vec_xst_trunc_ull:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r5, 3
; CHECK-NEXT: stxvrdx v2, r6, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xst_trunc_ull:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r5, 3
; CHECK-LE-NEXT: stxvrdx v2, r6, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xst_trunc_ull:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: sldi r3, r5, 3
; CHECK-BE-NEXT: stxsdx v2, r6, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ull:
; CHECK-O0: # %bb.0: # %entry
@ -245,11 +281,6 @@ define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvrbx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: lxvrbx v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
%0 = load i8, i8* %add.ptr, align 1
@ -264,12 +295,6 @@ define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture rea
; CHECK-NEXT: sldi r3, r3, 1
; CHECK-NEXT: lxvrhx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_short:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 1
; CHECK-O0-NEXT: lxvrhx v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
%0 = load i16, i16* %add.ptr, align 2
@ -284,12 +309,6 @@ define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture read
; CHECK-NEXT: sldi r3, r3, 2
; CHECK-NEXT: lxvrwx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_word:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 2
; CHECK-O0-NEXT: lxvrwx v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
%0 = load i32, i32* %add.ptr, align 4
@ -304,12 +323,6 @@ define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readon
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: lxvrdx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_zext_dw:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: sldi r3, r3, 3
; CHECK-O0-NEXT: lxvrdx v2, r4, r3
; CHECK-O0-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
%0 = load i64, i64* %add.ptr, align 8
@ -319,13 +332,21 @@ entry:
}
define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
; CHECK-LABEL: vec_xl_sext_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lbzx r3, r4, r3
; CHECK-NEXT: extsb r3, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xl_sext_b:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: lbzx r3, r4, r3
; CHECK-LE-NEXT: extsb r3, r3
; CHECK-LE-NEXT: sradi r4, r3, 63
; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xl_sext_b:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lbzx r3, r4, r3
; CHECK-BE-NEXT: extsb r3, r3
; CHECK-BE-NEXT: sradi r4, r3, 63
; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_b:
; CHECK-O0: # %bb.0: # %entry
@ -343,13 +364,21 @@ entry:
}
define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
; CHECK-LABEL: vec_xl_sext_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 1
; CHECK-NEXT: lhax r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xl_sext_h:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r3, 1
; CHECK-LE-NEXT: lhax r3, r4, r3
; CHECK-LE-NEXT: sradi r4, r3, 63
; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xl_sext_h:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: sldi r3, r3, 1
; CHECK-BE-NEXT: lhax r3, r4, r3
; CHECK-BE-NEXT: sradi r4, r3, 63
; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_h:
; CHECK-O0: # %bb.0: # %entry
@ -367,13 +396,21 @@ entry:
}
define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
; CHECK-LABEL: vec_xl_sext_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 2
; CHECK-NEXT: lwax r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xl_sext_w:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r3, 2
; CHECK-LE-NEXT: lwax r3, r4, r3
; CHECK-LE-NEXT: sradi r4, r3, 63
; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xl_sext_w:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: sldi r3, r3, 2
; CHECK-BE-NEXT: lwax r3, r4, r3
; CHECK-BE-NEXT: sradi r4, r3, 63
; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_w:
; CHECK-O0: # %bb.0: # %entry
@ -391,13 +428,21 @@ entry:
}
define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
; CHECK-LABEL: vec_xl_sext_d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r3, r3, 3
; CHECK-NEXT: ldx r3, r4, r3
; CHECK-NEXT: sradi r4, r3, 63
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
; CHECK-LE-LABEL: vec_xl_sext_d:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: sldi r3, r3, 3
; CHECK-LE-NEXT: ldx r3, r4, r3
; CHECK-LE-NEXT: sradi r4, r3, 63
; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: vec_xl_sext_d:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: sldi r3, r3, 3
; CHECK-BE-NEXT: ldx r3, r4, r3
; CHECK-BE-NEXT: sradi r4, r3, 63
; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xl_sext_d:
; CHECK-O0: # %bb.0: # %entry

View File

@ -0,0 +1,109 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: < %s | FileCheck %s --check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: < %s | FileCheck %s --check-prefix=CHECK-BE
define void @test1(<4 x i32> %A, i32* %a) {
; CHECK-LE-LABEL: test1:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrwx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: stfiwx f0, 0, r5
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <4 x i32> %A, i32 0
store i32 %vecext, i32* %a, align 4
ret void
}
define void @test2(<4 x float> %A, float* %a) {
; CHECK-LE-LABEL: test2:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrwx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3
; CHECK-BE-NEXT: stfiwx f0, 0, r5
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %A, i32 0
store float %vecext, float* %a, align 4
ret void
}
define void @test3(<2 x double> %A, double* %a) {
; CHECK-LE-LABEL: test3:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrdx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: stxsd v2, 0(r5)
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <2 x double> %A, i32 0
store double %vecext, double* %a, align 8
ret void
}
define void @test4(<2 x i64> %A, i64* %a) {
; CHECK-LE-LABEL: test4:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrdx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test4:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: stxsd v2, 0(r5)
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <2 x i64> %A, i32 0
store i64 %vecext, i64* %a, align 8
ret void
}
define void @test5(<8 x i16> %A, i16* %a) {
; CHECK-LE-LABEL: test5:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrhx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test5:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10
; CHECK-BE-NEXT: stxsihx v2, 0, r5
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <8 x i16> %A, i32 0
store i16 %vecext, i16* %a, align 2
ret void
}
define void @test6(<16 x i8> %A, i8* %a) {
; CHECK-LE-LABEL: test6:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: stxvrbx v2, 0, r5
; CHECK-LE-NEXT: blr
;
; CHECK-BE-LABEL: test6:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: vsldoi v2, v2, v2, 9
; CHECK-BE-NEXT: stxsibx v2, 0, r5
; CHECK-BE-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %A, i32 0
store i8 %vecext, i8* %a, align 1
ret void
}