forked from OSchip/llvm-project
[PowerPC] Disable perfect shuffle by default
We are going to remove the old 'perfect shuffle' optimization since it brings performance penalty in hot loop around vectors. For example, in following loop sharing the same mask: %v.1 = shufflevector ... <0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27> %v.2 = shufflevector ... <0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27> The generated instructions will be `vmrglw-vmrghw-vmrglw-vmrghw` instead of `vperm-vperm`. In some large loop cases, this causes 20%+ performance penalty. The original attempt to resolve this is to pre-record masks of every shufflevector operation in DAG, but that is somewhat complex and brings unnecessary computation (to scan all nodes) in optimization. Here we disable it by default. There're indeed some cases becoming worse after this, which will be fixed in a more careful way in future patches. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D121082
This commit is contained in:
parent
23e3cbe24a
commit
300e1293de
|
@ -129,7 +129,7 @@ static cl::opt<bool> EnableQuadwordAtomics(
|
|||
static cl::opt<bool>
|
||||
DisablePerfectShuffle("ppc-disable-perfect-shuffle",
|
||||
cl::desc("disable vector permute decomposition"),
|
||||
cl::init(false), cl::Hidden);
|
||||
cl::init(true), cl::Hidden);
|
||||
|
||||
STATISTIC(NumTailCalls, "Number of tail calls");
|
||||
STATISTIC(NumSiblingCalls, "Number of sibling calls");
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | grep vsldoi
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | not grep vor
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 -ppc-disable-perfect-shuffle=false | grep vsldoi
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 -ppc-disable-perfect-shuffle=false | not grep vor
|
||||
|
||||
; TODO: Fix this case when disabling perfect shuffle
|
||||
|
||||
define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {
|
||||
%tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>> [#uses=1]
|
||||
|
|
|
@ -1447,16 +1447,16 @@ entry:
|
|||
define <4 x float> @testSameVecEl0LE(<4 x float> %a) {
|
||||
; CHECK-64-LABEL: testSameVecEl0LE:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xxspltw 0, 34, 2
|
||||
; CHECK-64-NEXT: xxsldwi 0, 34, 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 34, 0, 0, 3
|
||||
; CHECK-64-NEXT: ld 3, L..C0(2) # %const.0
|
||||
; CHECK-64-NEXT: lxv 35, 0(3)
|
||||
; CHECK-64-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testSameVecEl0LE:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xxspltw 0, 34, 2
|
||||
; CHECK-32-NEXT: xxsldwi 0, 34, 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 34, 0, 0, 3
|
||||
; CHECK-32-NEXT: lwz 3, L..C0(2) # %const.0
|
||||
; CHECK-32-NEXT: lxv 35, 0(3)
|
||||
; CHECK-32-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
%vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 6, i32 1, i32 2, i32 3>
|
||||
|
@ -1465,16 +1465,16 @@ entry:
|
|||
define <4 x float> @testSameVecEl1LE(<4 x float> %a) {
|
||||
; CHECK-64-LABEL: testSameVecEl1LE:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xxswapd 0, 34
|
||||
; CHECK-64-NEXT: xxmrghw 1, 34, 0
|
||||
; CHECK-64-NEXT: xxmrghw 34, 1, 0
|
||||
; CHECK-64-NEXT: ld 3, L..C1(2) # %const.0
|
||||
; CHECK-64-NEXT: lxv 35, 0(3)
|
||||
; CHECK-64-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testSameVecEl1LE:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xxswapd 0, 34
|
||||
; CHECK-32-NEXT: xxmrghw 1, 34, 0
|
||||
; CHECK-32-NEXT: xxmrghw 34, 1, 0
|
||||
; CHECK-32-NEXT: lwz 3, L..C1(2) # %const.0
|
||||
; CHECK-32-NEXT: lxv 35, 0(3)
|
||||
; CHECK-32-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
%vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
|
||||
|
@ -1483,16 +1483,16 @@ entry:
|
|||
define <4 x float> @testSameVecEl3LE(<4 x float> %a) {
|
||||
; CHECK-64-LABEL: testSameVecEl3LE:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xxspltw 0, 34, 2
|
||||
; CHECK-64-NEXT: xxswapd 1, 34
|
||||
; CHECK-64-NEXT: xxsldwi 34, 1, 0, 2
|
||||
; CHECK-64-NEXT: ld 3, L..C2(2) # %const.0
|
||||
; CHECK-64-NEXT: lxv 35, 0(3)
|
||||
; CHECK-64-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: testSameVecEl3LE:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xxspltw 0, 34, 2
|
||||
; CHECK-32-NEXT: xxswapd 1, 34
|
||||
; CHECK-32-NEXT: xxsldwi 34, 1, 0, 2
|
||||
; CHECK-32-NEXT: lwz 3, L..C2(2) # %const.0
|
||||
; CHECK-32-NEXT: lxv 35, 0(3)
|
||||
; CHECK-32-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
%vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||
|
|
|
@ -30,12 +30,13 @@ define void @test_aix_splatimm(i32 %arg, i32 %arg1, i32 %arg2) {
|
|||
; CHECK-AIX-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-AIX-NEXT: addi 3, 1, -16
|
||||
; CHECK-AIX-NEXT: lxvw4x 36, 0, 3
|
||||
; CHECK-AIX-NEXT: ld 3, L..C0(2) # %const.0
|
||||
; CHECK-AIX-NEXT: vmrghh 3, 2, 3
|
||||
; CHECK-AIX-NEXT: vsplth 5, 2, 0
|
||||
; CHECK-AIX-NEXT: vmrghh 2, 4, 2
|
||||
; CHECK-AIX-NEXT: xxmrghw 35, 35, 37
|
||||
; CHECK-AIX-NEXT: xxswapd 0, 35
|
||||
; CHECK-AIX-NEXT: xxsldwi 34, 0, 34, 2
|
||||
; CHECK-AIX-NEXT: vmrghh 4, 4, 2
|
||||
; CHECK-AIX-NEXT: vsplth 2, 2, 0
|
||||
; CHECK-AIX-NEXT: xxmrghw 34, 35, 34
|
||||
; CHECK-AIX-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-AIX-NEXT: vperm 2, 2, 4, 3
|
||||
; CHECK-AIX-NEXT: vsplth 3, 2, 1
|
||||
; CHECK-AIX-NEXT: vsplth 2, 2, 4
|
||||
; CHECK-AIX-NEXT: stxvw4x 35, 0, 5
|
||||
|
|
|
@ -55,15 +55,15 @@ entry:
|
|||
define <2 x i64> @buildl(i64 %a) {
|
||||
; CHECK-LABEL: buildl:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lwz 5, L..C0(2) # %const.0
|
||||
; CHECK-NEXT: stw 4, -16(1)
|
||||
; CHECK-NEXT: stw 3, -32(1)
|
||||
; CHECK-NEXT: addi 3, 1, -16
|
||||
; CHECK-NEXT: addi 4, 1, -32
|
||||
; CHECK-NEXT: lxvw4x 0, 0, 3
|
||||
; CHECK-NEXT: lxvw4x 1, 0, 4
|
||||
; CHECK-NEXT: xxmrghw 34, 1, 0
|
||||
; CHECK-NEXT: xxswapd 0, 34
|
||||
; CHECK-NEXT: xxsldwi 34, 0, 34, 2
|
||||
; CHECK-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-NEXT: lxvw4x 36, 0, 4
|
||||
; CHECK-NEXT: lxvw4x 34, 0, 5
|
||||
; CHECK-NEXT: vperm 2, 4, 3, 2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%splat.splatinsert = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||
|
@ -90,7 +90,7 @@ entry:
|
|||
define <2 x double> @buildd() {
|
||||
; CHECK-LABEL: buildd:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lwz 3, L..C0(2) # @d
|
||||
; CHECK-NEXT: lwz 3, L..C1(2) # @d
|
||||
; CHECK-NEXT: lxvdsx 34, 0, 3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
|
|
|
@ -584,14 +584,16 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
|
|||
;
|
||||
; CHECK-BE-LABEL: test_stores_exceed_vec_size:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xxspltw vs0, vs34, 0
|
||||
; CHECK-BE-NEXT: xxsldwi vs1, vs34, vs34, 1
|
||||
; CHECK-BE-NEXT: li r3, 16
|
||||
; CHECK-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
|
||||
; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
|
||||
; CHECK-BE-NEXT: li r4, 20
|
||||
; CHECK-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
|
||||
; CHECK-BE-NEXT: lxvw4x vs35, 0, r3
|
||||
; CHECK-BE-NEXT: li r3, 16
|
||||
; CHECK-BE-NEXT: stxsiwx vs34, r5, r3
|
||||
; CHECK-BE-NEXT: xxsldwi vs0, vs34, vs0, 2
|
||||
; CHECK-BE-NEXT: stfiwx f1, r5, r4
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r5
|
||||
; CHECK-BE-NEXT: stfiwx f0, r5, r4
|
||||
; CHECK-BE-NEXT: vperm v3, v2, v2, v3
|
||||
; CHECK-BE-NEXT: stxvw4x vs35, 0, r5
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: test_stores_exceed_vec_size:
|
||||
|
@ -610,14 +612,16 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
|
|||
;
|
||||
; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size:
|
||||
; CHECK-P9-BE: # %bb.0: # %entry
|
||||
; CHECK-P9-BE-NEXT: xxspltw vs0, vs34, 0
|
||||
; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
|
||||
; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
|
||||
; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
|
||||
; CHECK-P9-BE-NEXT: lxv vs35, 0(r3)
|
||||
; CHECK-P9-BE-NEXT: li r3, 16
|
||||
; CHECK-P9-BE-NEXT: stxsiwx vs34, r5, r3
|
||||
; CHECK-P9-BE-NEXT: li r3, 20
|
||||
; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs0, 2
|
||||
; CHECK-P9-BE-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-P9-BE-NEXT: xxsldwi vs0, vs34, vs34, 1
|
||||
; CHECK-P9-BE-NEXT: stfiwx f0, r5, r3
|
||||
; CHECK-P9-BE-NEXT: vperm v3, v2, v2, v3
|
||||
; CHECK-P9-BE-NEXT: stxv vs35, 0(r5)
|
||||
; CHECK-P9-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %a, i32 2
|
||||
|
|
|
@ -211,45 +211,45 @@ define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a
|
|||
; P9-AIX32-NEXT: lwz r5, 24(r4)
|
||||
; P9-AIX32-NEXT: lwz r4, 28(r4)
|
||||
; P9-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P9-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0
|
||||
; P9-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv vs0, -16(r1)
|
||||
; P9-AIX32-NEXT: lxv vs1, -32(r1)
|
||||
; P9-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P9-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P9-AIX32-NEXT: stxv vs0, 0(r3)
|
||||
; P9-AIX32-NEXT: lxv v3, -16(r1)
|
||||
; P9-AIX32-NEXT: lxv v4, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv v2, 0(r4)
|
||||
; P9-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P9-AIX32-NEXT: stxv v2, 0(r3)
|
||||
; P9-AIX32-NEXT: blr
|
||||
;
|
||||
; P8-AIX32-LABEL: test4:
|
||||
; P8-AIX32: # %bb.0: # %entry
|
||||
; P8-AIX32-NEXT: lwz r5, 24(r4)
|
||||
; P8-AIX32-NEXT: lwz r4, 28(r4)
|
||||
; P8-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P8-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P8-AIX32-NEXT: lwz r5, L..C0(r2) # %const.0
|
||||
; P8-AIX32-NEXT: lwz r6, 28(r4)
|
||||
; P8-AIX32-NEXT: lwz r4, 24(r4)
|
||||
; P8-AIX32-NEXT: stw r6, -16(r1)
|
||||
; P8-AIX32-NEXT: stw r4, -32(r1)
|
||||
; P8-AIX32-NEXT: addi r4, r1, -16
|
||||
; P8-AIX32-NEXT: lxvw4x v2, 0, r5
|
||||
; P8-AIX32-NEXT: addi r5, r1, -32
|
||||
; P8-AIX32-NEXT: lxvw4x vs0, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x vs1, 0, r5
|
||||
; P8-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P8-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P8-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P8-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P8-AIX32-NEXT: blr
|
||||
;
|
||||
; P7-AIX32-LABEL: test4:
|
||||
; P7-AIX32: # %bb.0: # %entry
|
||||
; P7-AIX32-NEXT: lwz r6, 28(r4)
|
||||
; P7-AIX32-NEXT: lwz r4, 24(r4)
|
||||
; P7-AIX32-NEXT: addi r5, r1, -16
|
||||
; P7-AIX32-NEXT: stw r6, -16(r1)
|
||||
; P7-AIX32-NEXT: stw r4, -32(r1)
|
||||
; P7-AIX32-NEXT: addi r4, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x vs0, 0, r5
|
||||
; P7-AIX32-NEXT: lxvw4x vs1, 0, r4
|
||||
; P7-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P7-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P7-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P7-AIX32-NEXT: lwz r5, L..C0(r2) # %const.0
|
||||
; P7-AIX32-NEXT: lwz r6, 24(r4)
|
||||
; P7-AIX32-NEXT: lwz r4, 28(r4)
|
||||
; P7-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P7-AIX32-NEXT: stw r6, -32(r1)
|
||||
; P7-AIX32-NEXT: lxvw4x v2, 0, r5
|
||||
; P7-AIX32-NEXT: addi r4, r1, -16
|
||||
; P7-AIX32-NEXT: addi r5, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P7-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P7-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P7-AIX32-NEXT: blr
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i64, i64* %a, i64 3
|
||||
|
@ -288,45 +288,45 @@ define void @test5(<2 x i64>* %a, i32* %in) {
|
|||
; P9-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P9-AIX32-NEXT: srawi r5, r4, 31
|
||||
; P9-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P9-AIX32-NEXT: lxv vs0, -16(r1)
|
||||
; P9-AIX32-NEXT: lwz r4, L..C1(r2) # %const.0
|
||||
; P9-AIX32-NEXT: lxv v3, -16(r1)
|
||||
; P9-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv vs1, -32(r1)
|
||||
; P9-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P9-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P9-AIX32-NEXT: stxv vs0, 0(r3)
|
||||
; P9-AIX32-NEXT: lxv v4, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv v2, 0(r4)
|
||||
; P9-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P9-AIX32-NEXT: stxv v2, 0(r3)
|
||||
; P9-AIX32-NEXT: blr
|
||||
;
|
||||
; P8-AIX32-LABEL: test5:
|
||||
; P8-AIX32: # %bb.0: # %entry
|
||||
; P8-AIX32-NEXT: lwz r5, L..C1(r2) # %const.0
|
||||
; P8-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P8-AIX32-NEXT: srawi r5, r4, 31
|
||||
; P8-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P8-AIX32-NEXT: srawi r4, r4, 31
|
||||
; P8-AIX32-NEXT: stw r4, -32(r1)
|
||||
; P8-AIX32-NEXT: lxvw4x v2, 0, r5
|
||||
; P8-AIX32-NEXT: addi r4, r1, -16
|
||||
; P8-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P8-AIX32-NEXT: addi r5, r1, -32
|
||||
; P8-AIX32-NEXT: lxvw4x vs0, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x vs1, 0, r5
|
||||
; P8-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P8-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P8-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P8-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P8-AIX32-NEXT: blr
|
||||
;
|
||||
; P7-AIX32-LABEL: test5:
|
||||
; P7-AIX32: # %bb.0: # %entry
|
||||
; P7-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P7-AIX32-NEXT: addi r5, r1, -16
|
||||
; P7-AIX32-NEXT: lwz r5, L..C1(r2) # %const.0
|
||||
; P7-AIX32-NEXT: srawi r6, r4, 31
|
||||
; P7-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P7-AIX32-NEXT: srawi r4, r4, 31
|
||||
; P7-AIX32-NEXT: stw r4, -32(r1)
|
||||
; P7-AIX32-NEXT: addi r4, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x vs0, 0, r5
|
||||
; P7-AIX32-NEXT: lxvw4x vs1, 0, r4
|
||||
; P7-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P7-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P7-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P7-AIX32-NEXT: addi r4, r1, -16
|
||||
; P7-AIX32-NEXT: stw r6, -32(r1)
|
||||
; P7-AIX32-NEXT: lxvw4x v2, 0, r5
|
||||
; P7-AIX32-NEXT: addi r5, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P7-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P7-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P7-AIX32-NEXT: blr
|
||||
entry:
|
||||
%0 = load i32, i32* %in, align 4
|
||||
|
@ -365,45 +365,45 @@ define void @test6(<2 x i64>* %a, i32* %in) {
|
|||
; P9-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P9-AIX32-NEXT: li r5, 0
|
||||
; P9-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv vs0, -32(r1)
|
||||
; P9-AIX32-NEXT: lxv v3, -32(r1)
|
||||
; P9-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P9-AIX32-NEXT: lxv vs1, -16(r1)
|
||||
; P9-AIX32-NEXT: xxmrghw v2, vs0, vs1
|
||||
; P9-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P9-AIX32-NEXT: stxv vs0, 0(r3)
|
||||
; P9-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0
|
||||
; P9-AIX32-NEXT: lxv v4, -16(r1)
|
||||
; P9-AIX32-NEXT: lxv v2, 0(r4)
|
||||
; P9-AIX32-NEXT: vperm v2, v3, v4, v2
|
||||
; P9-AIX32-NEXT: stxv v2, 0(r3)
|
||||
; P9-AIX32-NEXT: blr
|
||||
;
|
||||
; P8-AIX32-LABEL: test6:
|
||||
; P8-AIX32: # %bb.0: # %entry
|
||||
; P8-AIX32-NEXT: lwz r6, L..C2(r2) # %const.0
|
||||
; P8-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P8-AIX32-NEXT: li r5, 0
|
||||
; P8-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P8-AIX32-NEXT: addi r5, r1, -16
|
||||
; P8-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P8-AIX32-NEXT: addi r4, r1, -32
|
||||
; P8-AIX32-NEXT: lxvw4x vs0, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x vs1, 0, r5
|
||||
; P8-AIX32-NEXT: xxmrghw v2, vs0, vs1
|
||||
; P8-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P8-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P8-AIX32-NEXT: lxvw4x v2, 0, r6
|
||||
; P8-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P8-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P8-AIX32-NEXT: vperm v2, v3, v4, v2
|
||||
; P8-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P8-AIX32-NEXT: blr
|
||||
;
|
||||
; P7-AIX32-LABEL: test6:
|
||||
; P7-AIX32: # %bb.0: # %entry
|
||||
; P7-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0
|
||||
; P7-AIX32-NEXT: lwz r4, 0(r4)
|
||||
; P7-AIX32-NEXT: li r5, 0
|
||||
; P7-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P7-AIX32-NEXT: addi r5, r1, -16
|
||||
; P7-AIX32-NEXT: li r6, 0
|
||||
; P7-AIX32-NEXT: stw r6, -32(r1)
|
||||
; P7-AIX32-NEXT: stw r4, -16(r1)
|
||||
; P7-AIX32-NEXT: addi r4, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x vs0, 0, r4
|
||||
; P7-AIX32-NEXT: lxvw4x vs1, 0, r5
|
||||
; P7-AIX32-NEXT: xxmrghw v2, vs0, vs1
|
||||
; P7-AIX32-NEXT: xxswapd vs0, v2
|
||||
; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2
|
||||
; P7-AIX32-NEXT: stxvw4x vs0, 0, r3
|
||||
; P7-AIX32-NEXT: lxvw4x v2, 0, r5
|
||||
; P7-AIX32-NEXT: addi r5, r1, -16
|
||||
; P7-AIX32-NEXT: lxvw4x v3, 0, r4
|
||||
; P7-AIX32-NEXT: lxvw4x v4, 0, r5
|
||||
; P7-AIX32-NEXT: vperm v2, v3, v4, v2
|
||||
; P7-AIX32-NEXT: stxvw4x v2, 0, r3
|
||||
; P7-AIX32-NEXT: blr
|
||||
entry:
|
||||
%0 = load i32, i32* %in, align 4
|
||||
|
@ -832,32 +832,34 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
|
|||
;
|
||||
; P8-AIX32-LABEL: unadjusted_lxvdsx:
|
||||
; P8-AIX32: # %bb.0: # %entry
|
||||
; P8-AIX32-NEXT: lwz r4, 4(r3)
|
||||
; P8-AIX32-NEXT: stw r4, -32(r1)
|
||||
; P8-AIX32-NEXT: addi r4, r1, -16
|
||||
; P8-AIX32-NEXT: lwz r5, 4(r3)
|
||||
; P8-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0
|
||||
; P8-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P8-AIX32-NEXT: lwz r3, 0(r3)
|
||||
; P8-AIX32-NEXT: lxvw4x v2, 0, r4
|
||||
; P8-AIX32-NEXT: addi r4, r1, -16
|
||||
; P8-AIX32-NEXT: stw r3, -16(r1)
|
||||
; P8-AIX32-NEXT: addi r3, r1, -32
|
||||
; P8-AIX32-NEXT: lxvw4x vs0, 0, r3
|
||||
; P8-AIX32-NEXT: lxvw4x vs1, 0, r4
|
||||
; P8-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P8-AIX32-NEXT: xxsldwi vs0, vs1, v2, 2
|
||||
; P8-AIX32-NEXT: xxmrgld v2, vs0, vs0
|
||||
; P8-AIX32-NEXT: lxvw4x v3, 0, r3
|
||||
; P8-AIX32-NEXT: lxvw4x v4, 0, r4
|
||||
; P8-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P8-AIX32-NEXT: xxmrghd v2, v2, v2
|
||||
; P8-AIX32-NEXT: blr
|
||||
;
|
||||
; P7-AIX32-LABEL: unadjusted_lxvdsx:
|
||||
; P7-AIX32: # %bb.0: # %entry
|
||||
; P7-AIX32-NEXT: lwz r5, 4(r3)
|
||||
; P7-AIX32-NEXT: addi r4, r1, -32
|
||||
; P7-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0
|
||||
; P7-AIX32-NEXT: stw r5, -32(r1)
|
||||
; P7-AIX32-NEXT: lwz r3, 0(r3)
|
||||
; P7-AIX32-NEXT: lxvw4x v2, 0, r4
|
||||
; P7-AIX32-NEXT: addi r4, r1, -16
|
||||
; P7-AIX32-NEXT: stw r3, -16(r1)
|
||||
; P7-AIX32-NEXT: addi r3, r1, -16
|
||||
; P7-AIX32-NEXT: lxvw4x vs0, 0, r4
|
||||
; P7-AIX32-NEXT: lxvw4x vs1, 0, r3
|
||||
; P7-AIX32-NEXT: xxmrghw v2, vs1, vs0
|
||||
; P7-AIX32-NEXT: xxsldwi vs0, vs1, v2, 2
|
||||
; P7-AIX32-NEXT: xxmrgld v2, vs0, vs0
|
||||
; P7-AIX32-NEXT: addi r3, r1, -32
|
||||
; P7-AIX32-NEXT: lxvw4x v3, 0, r3
|
||||
; P7-AIX32-NEXT: lxvw4x v4, 0, r4
|
||||
; P7-AIX32-NEXT: vperm v2, v4, v3, v2
|
||||
; P7-AIX32-NEXT: xxmrghd v2, v2, v2
|
||||
; P7-AIX32-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i64* %s to <8 x i8>*
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -ppc-disable-perfect-shuffle=false < %s | FileCheck %s
|
||||
|
||||
; TODO: Fix this case when disabling perfect shuffle
|
||||
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
|
|
@ -1,19 +1,31 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple powerpc64 -mcpu=pwr10 < %s | FileCheck %s --check-prefix=BE
|
||||
; RUN: llc -mtriple powerpc64le -mcpu=pwr10 < %s | FileCheck %s --check-prefix=LE
|
||||
; RUN: llc -mtriple powerpc64le -mcpu=pwr10 -ppc-disable-perfect-shuffle=false < %s | FileCheck %s --check-prefix=LE
|
||||
; RUN: llc -mtriple powerpc64 -mcpu=pwr10 -ppc-disable-perfect-shuffle=false < %s | FileCheck %s --check-prefix=BE-ENABLE
|
||||
|
||||
; TODO: Fix the worse codegen when disabling perfect shuffle
|
||||
|
||||
define <4 x float> @shuffle1(<16 x i8> %v1, <16 x i8> %v2) {
|
||||
; BE-LABEL: shuffle1:
|
||||
; BE: # %bb.0:
|
||||
; BE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-NEXT: xxmrghw 34, 1, 0
|
||||
; BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; BE-NEXT: lxv 36, 0(3)
|
||||
; BE-NEXT: vperm 2, 2, 3, 4
|
||||
; BE-NEXT: blr
|
||||
;
|
||||
; LE-LABEL: shuffle1:
|
||||
; LE: # %bb.0:
|
||||
; LE-NEXT: vpkudum 2, 3, 2
|
||||
; LE-NEXT: blr
|
||||
;
|
||||
; BE-ENABLE-LABEL: shuffle1:
|
||||
; BE-ENABLE: # %bb.0:
|
||||
; BE-ENABLE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-ENABLE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-ENABLE-NEXT: xxmrghw 34, 1, 0
|
||||
; BE-ENABLE-NEXT: blr
|
||||
%shuf = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
|
||||
%cast = bitcast <16 x i8> %shuf to <4 x float>
|
||||
ret <4 x float> %cast
|
||||
|
@ -30,6 +42,11 @@ define <4 x float> @shuffle2(<16 x i8> %v1, <16 x i8> %v2) {
|
|||
; LE-NEXT: plxv 36, .LCPI1_0@PCREL(0), 1
|
||||
; LE-NEXT: vperm 2, 3, 2, 4
|
||||
; LE-NEXT: blr
|
||||
;
|
||||
; BE-ENABLE-LABEL: shuffle2:
|
||||
; BE-ENABLE: # %bb.0:
|
||||
; BE-ENABLE-NEXT: vpkudum 2, 2, 3
|
||||
; BE-ENABLE-NEXT: blr
|
||||
%shuf = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
|
||||
%cast = bitcast <16 x i8> %shuf to <4 x float>
|
||||
ret <4 x float> %cast
|
||||
|
@ -38,12 +55,11 @@ define <4 x float> @shuffle2(<16 x i8> %v1, <16 x i8> %v2) {
|
|||
define <4 x float> @shuffle3(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) {
|
||||
; BE-LABEL: shuffle3:
|
||||
; BE: # %bb.0:
|
||||
; BE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-NEXT: xxmrghw 34, 1, 0
|
||||
; BE-NEXT: xxmrglw 0, 36, 37
|
||||
; BE-NEXT: xxmrghw 1, 36, 37
|
||||
; BE-NEXT: xxmrghw 35, 1, 0
|
||||
; BE-NEXT: addis 3, 2, .LCPI2_0@toc@ha
|
||||
; BE-NEXT: addi 3, 3, .LCPI2_0@toc@l
|
||||
; BE-NEXT: lxv 32, 0(3)
|
||||
; BE-NEXT: vperm 2, 2, 3, 0
|
||||
; BE-NEXT: vperm 3, 4, 5, 0
|
||||
; BE-NEXT: xvaddsp 34, 34, 35
|
||||
; BE-NEXT: blr
|
||||
;
|
||||
|
@ -53,6 +69,17 @@ define <4 x float> @shuffle3(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x
|
|||
; LE-NEXT: vpkudum 3, 5, 4
|
||||
; LE-NEXT: xvaddsp 34, 34, 35
|
||||
; LE-NEXT: blr
|
||||
;
|
||||
; BE-ENABLE-LABEL: shuffle3:
|
||||
; BE-ENABLE: # %bb.0:
|
||||
; BE-ENABLE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-ENABLE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-ENABLE-NEXT: xxmrghw 34, 1, 0
|
||||
; BE-ENABLE-NEXT: xxmrglw 0, 36, 37
|
||||
; BE-ENABLE-NEXT: xxmrghw 1, 36, 37
|
||||
; BE-ENABLE-NEXT: xxmrghw 35, 1, 0
|
||||
; BE-ENABLE-NEXT: xvaddsp 34, 34, 35
|
||||
; BE-ENABLE-NEXT: blr
|
||||
%shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
|
||||
%shuf2 = shufflevector <16 x i8> %v3, <16 x i8> %v4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
|
||||
%cast1 = bitcast <16 x i8> %shuf1 to <4 x float>
|
||||
|
@ -76,6 +103,13 @@ define <4 x float> @shuffle4(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x
|
|||
; LE-NEXT: vperm 3, 5, 4, 0
|
||||
; LE-NEXT: xvaddsp 34, 34, 35
|
||||
; LE-NEXT: blr
|
||||
;
|
||||
; BE-ENABLE-LABEL: shuffle4:
|
||||
; BE-ENABLE: # %bb.0:
|
||||
; BE-ENABLE-NEXT: vpkudum 2, 2, 3
|
||||
; BE-ENABLE-NEXT: vpkudum 3, 4, 5
|
||||
; BE-ENABLE-NEXT: xvaddsp 34, 34, 35
|
||||
; BE-ENABLE-NEXT: blr
|
||||
%shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
|
||||
%shuf2 = shufflevector <16 x i8> %v3, <16 x i8> %v4, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
|
||||
%cast1 = bitcast <16 x i8> %shuf1 to <4 x float>
|
||||
|
@ -87,22 +121,21 @@ define <4 x float> @shuffle4(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x
|
|||
define <4 x float> @shuffle5(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) {
|
||||
; BE-LABEL: shuffle5:
|
||||
; BE: # %bb.0: # %entry
|
||||
; BE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-NEXT: addis 3, 2, .LCPI4_0@toc@ha
|
||||
; BE-NEXT: addi 3, 3, .LCPI4_0@toc@l
|
||||
; BE-NEXT: lxv 32, 0(3)
|
||||
; BE-NEXT: li 3, 8
|
||||
; BE-NEXT: vextublx 3, 3, 2
|
||||
; BE-NEXT: xxmrghw 0, 1, 0
|
||||
; BE-NEXT: andi. 3, 3, 255
|
||||
; BE-NEXT: xxlor 1, 0, 0
|
||||
; BE-NEXT: vperm 3, 2, 3, 0
|
||||
; BE-NEXT: vmr 2, 3
|
||||
; BE-NEXT: beq 0, .LBB4_2
|
||||
; BE-NEXT: # %bb.1: # %exit
|
||||
; BE-NEXT: xvaddsp 34, 0, 1
|
||||
; BE-NEXT: xvaddsp 34, 35, 34
|
||||
; BE-NEXT: blr
|
||||
; BE-NEXT: .LBB4_2: # %second
|
||||
; BE-NEXT: xxmrglw 1, 36, 37
|
||||
; BE-NEXT: xxmrghw 2, 36, 37
|
||||
; BE-NEXT: xxmrghw 1, 2, 1
|
||||
; BE-NEXT: xvaddsp 34, 0, 1
|
||||
; BE-NEXT: vperm 2, 4, 5, 0
|
||||
; BE-NEXT: xvaddsp 34, 35, 34
|
||||
; BE-NEXT: blr
|
||||
;
|
||||
; LE-LABEL: shuffle5:
|
||||
|
@ -120,6 +153,26 @@ define <4 x float> @shuffle5(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x
|
|||
; LE-NEXT: vpkudum 2, 5, 4
|
||||
; LE-NEXT: xvaddsp 34, 35, 34
|
||||
; LE-NEXT: blr
|
||||
;
|
||||
; BE-ENABLE-LABEL: shuffle5:
|
||||
; BE-ENABLE: # %bb.0: # %entry
|
||||
; BE-ENABLE-NEXT: xxmrglw 0, 34, 35
|
||||
; BE-ENABLE-NEXT: xxmrghw 1, 34, 35
|
||||
; BE-ENABLE-NEXT: li 3, 8
|
||||
; BE-ENABLE-NEXT: vextublx 3, 3, 2
|
||||
; BE-ENABLE-NEXT: xxmrghw 0, 1, 0
|
||||
; BE-ENABLE-NEXT: andi. 3, 3, 255
|
||||
; BE-ENABLE-NEXT: xxlor 1, 0, 0
|
||||
; BE-ENABLE-NEXT: beq 0, .LBB4_2
|
||||
; BE-ENABLE-NEXT: # %bb.1: # %exit
|
||||
; BE-ENABLE-NEXT: xvaddsp 34, 0, 1
|
||||
; BE-ENABLE-NEXT: blr
|
||||
; BE-ENABLE-NEXT: .LBB4_2: # %second
|
||||
; BE-ENABLE-NEXT: xxmrglw 1, 36, 37
|
||||
; BE-ENABLE-NEXT: xxmrghw 2, 36, 37
|
||||
; BE-ENABLE-NEXT: xxmrghw 1, 2, 1
|
||||
; BE-ENABLE-NEXT: xvaddsp 34, 0, 1
|
||||
; BE-ENABLE-NEXT: blr
|
||||
entry:
|
||||
%shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
|
||||
%fetch = extractelement <16 x i8> %shuf1, i32 4
|
||||
|
|
|
@ -10,26 +10,27 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr {
|
|||
; 32BIT: # %bb.0: # %entry
|
||||
; 32BIT-NEXT: stwu 1, -64(1)
|
||||
; 32BIT-NEXT: .cfi_def_cfa_offset 64
|
||||
; 32BIT-NEXT: lxvw4x 34, 0, 3
|
||||
; 32BIT-NEXT: li 3, 0
|
||||
; 32BIT-NEXT: addi 4, 1, 16
|
||||
; 32BIT-NEXT: addi 5, 1, 32
|
||||
; 32BIT-NEXT: li 3, .LCPI0_0@l
|
||||
; 32BIT-NEXT: lis 4, .LCPI0_0@ha
|
||||
; 32BIT-NEXT: addi 5, 1, 16
|
||||
; 32BIT-NEXT: addi 6, 1, 48
|
||||
; 32BIT-NEXT: li 7, 0
|
||||
; 32BIT-NEXT: lxvw4x 34, 0, 3
|
||||
; 32BIT-NEXT: lxvw4x 35, 4, 3
|
||||
; 32BIT-NEXT: li 3, 0
|
||||
; 32BIT-NEXT: addi 4, 1, 32
|
||||
; 32BIT-NEXT: .p2align 4
|
||||
; 32BIT-NEXT: .LBB0_1: # %while.body
|
||||
; 32BIT-NEXT: #
|
||||
; 32BIT-NEXT: stw 7, 16(1)
|
||||
; 32BIT-NEXT: stw 3, 32(1)
|
||||
; 32BIT-NEXT: lxvw4x 0, 0, 4
|
||||
; 32BIT-NEXT: lxvw4x 1, 0, 5
|
||||
; 32BIT-NEXT: xxsldwi 0, 1, 0, 1
|
||||
; 32BIT-NEXT: xxspltw 1, 1, 0
|
||||
; 32BIT-NEXT: xxsldwi 35, 0, 1, 3
|
||||
; 32BIT-NEXT: vadduwm 3, 2, 3
|
||||
; 32BIT-NEXT: xxspltw 36, 35, 1
|
||||
; 32BIT-NEXT: vadduwm 3, 3, 4
|
||||
; 32BIT-NEXT: stxvw4x 35, 0, 6
|
||||
; 32BIT-NEXT: stw 7, 16(1)
|
||||
; 32BIT-NEXT: lxvw4x 36, 0, 4
|
||||
; 32BIT-NEXT: lxvw4x 37, 0, 5
|
||||
; 32BIT-NEXT: vperm 4, 5, 4, 3
|
||||
; 32BIT-NEXT: vadduwm 4, 2, 4
|
||||
; 32BIT-NEXT: xxspltw 37, 36, 1
|
||||
; 32BIT-NEXT: vadduwm 4, 4, 5
|
||||
; 32BIT-NEXT: stxvw4x 36, 0, 6
|
||||
; 32BIT-NEXT: lwz 7, 48(1)
|
||||
; 32BIT-NEXT: b .LBB0_1
|
||||
;
|
||||
|
|
|
@ -4,23 +4,26 @@
|
|||
define <4 x float> @bar(float* %p, float* %q) {
|
||||
; CHECK-LABEL: bar:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lxvw4x 0, 0, 3
|
||||
; CHECK-NEXT: lxvw4x 1, 0, 4
|
||||
; CHECK-NEXT: li 5, 16
|
||||
; CHECK-NEXT: lxvw4x 2, 3, 5
|
||||
; CHECK-NEXT: lxvw4x 3, 4, 5
|
||||
; CHECK-NEXT: lxvw4x 2, 0, 3
|
||||
; CHECK-NEXT: lxvw4x 3, 0, 4
|
||||
; CHECK-NEXT: lxvw4x 0, 3, 5
|
||||
; CHECK-NEXT: lxvw4x 1, 4, 5
|
||||
; CHECK-NEXT: li 5, 32
|
||||
; CHECK-NEXT: lxvw4x 4, 4, 5
|
||||
; CHECK-NEXT: xvsubsp 35, 3, 2
|
||||
; CHECK-NEXT: xvsubsp 34, 1, 0
|
||||
; CHECK-NEXT: lxvw4x 0, 3, 5
|
||||
; CHECK-NEXT: lxvw4x 1, 4, 5
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; CHECK-NEXT: lxvw4x 36, 0, 3
|
||||
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; CHECK-NEXT: xvsubsp 0, 1, 0
|
||||
; CHECK-NEXT: lxvw4x 1, 3, 5
|
||||
; CHECK-NEXT: xvsubsp 2, 3, 2
|
||||
; CHECK-NEXT: xvsubsp 1, 4, 1
|
||||
; CHECK-NEXT: xxsldwi 0, 0, 0, 1
|
||||
; CHECK-NEXT: xxmrglw 34, 0, 2
|
||||
; CHECK-NEXT: xxsldwi 0, 0, 34, 3
|
||||
; CHECK-NEXT: xxmrghw 34, 1, 1
|
||||
; CHECK-NEXT: xxsldwi 0, 34, 0, 3
|
||||
; CHECK-NEXT: xxsldwi 34, 0, 0, 1
|
||||
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
; CHECK-NEXT: vperm 2, 3, 2, 4
|
||||
; CHECK-NEXT: lxvw4x 36, 0, 3
|
||||
; CHECK-NEXT: xxmrghw 35, 0, 0
|
||||
; CHECK-NEXT: vperm 2, 2, 3, 4
|
||||
; CHECK-NEXT: blr
|
||||
%1 = bitcast float* %p to <12 x float>*
|
||||
%2 = bitcast float* %q to <12 x float>*
|
||||
|
|
|
@ -35,10 +35,11 @@ define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
|
|||
;
|
||||
; P8BE-LABEL: s2v_test1:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: lfiwzx f0, 0, r3
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
|
||||
; P8BE-NEXT: lxsiwzx v4, 0, r3
|
||||
; P8BE-NEXT: addi r4, r4, .LCPI0_0@toc@l
|
||||
; P8BE-NEXT: lxvw4x v3, 0, r4
|
||||
; P8BE-NEXT: vperm v2, v4, v2, v3
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load i32, i32* %int32, align 4
|
||||
|
@ -74,11 +75,12 @@ define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
|
|||
;
|
||||
; P8BE-LABEL: s2v_test2:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, 4
|
||||
; P8BE-NEXT: lfiwzx f0, 0, r3
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addi r4, r4, .LCPI1_0@toc@l
|
||||
; P8BE-NEXT: lxsiwzx v4, 0, r3
|
||||
; P8BE-NEXT: lxvw4x v3, 0, r4
|
||||
; P8BE-NEXT: vperm v2, v4, v2, v3
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
|
||||
|
@ -117,11 +119,12 @@ define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32
|
|||
;
|
||||
; P8BE-LABEL: s2v_test3:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: sldi r4, r7, 2
|
||||
; P8BE-NEXT: lfiwzx f0, r3, r4
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
|
||||
; P8BE-NEXT: sldi r5, r7, 2
|
||||
; P8BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
|
||||
; P8BE-NEXT: lxsiwzx v3, r3, r5
|
||||
; P8BE-NEXT: lxvw4x v4, 0, r4
|
||||
; P8BE-NEXT: vperm v2, v3, v2, v4
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%idxprom = sext i32 %Idx to i64
|
||||
|
@ -159,11 +162,12 @@ define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
|
|||
;
|
||||
; P8BE-LABEL: s2v_test4:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, 4
|
||||
; P8BE-NEXT: lfiwzx f0, 0, r3
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addi r4, r4, .LCPI3_0@toc@l
|
||||
; P8BE-NEXT: lxsiwzx v4, 0, r3
|
||||
; P8BE-NEXT: lxvw4x v3, 0, r4
|
||||
; P8BE-NEXT: vperm v2, v4, v2, v3
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
|
||||
|
@ -199,10 +203,11 @@ define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
|
|||
;
|
||||
; P8BE-LABEL: s2v_test5:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: lfiwzx f0, 0, r5
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
|
||||
; P8BE-NEXT: lxsiwzx v4, 0, r5
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
|
||||
; P8BE-NEXT: lxvw4x v3, 0, r3
|
||||
; P8BE-NEXT: vperm v2, v4, v2, v3
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load i32, i32* %ptr1, align 4
|
||||
|
@ -237,10 +242,11 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
|
|||
;
|
||||
; P8BE-LABEL: s2v_test_f1:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: lfiwzx f0, 0, r3
|
||||
; P8BE-NEXT: xxsldwi vs1, v2, vs0, 1
|
||||
; P8BE-NEXT: xxmrghw v2, v2, vs0
|
||||
; P8BE-NEXT: xxsldwi v2, v2, vs1, 3
|
||||
; P8BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
|
||||
; P8BE-NEXT: lxsiwzx v4, 0, r3
|
||||
; P8BE-NEXT: addi r4, r4, .LCPI5_0@toc@l
|
||||
; P8BE-NEXT: lxvw4x v3, 0, r4
|
||||
; P8BE-NEXT: vperm v2, v4, v2, v3
|
||||
; P8BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load float, float* %f64, align 4
|
||||
|
|
|
@ -55,21 +55,24 @@ define dso_local <4 x i32> @test(<4 x i32> %a, double %b) {
|
|||
; CHECK-BE-P7: # %bb.0: # %entry
|
||||
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
||||
; CHECK-BE-P7-NEXT: addis r4, r2, .LCPI0_0@toc@ha
|
||||
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
|
||||
; CHECK-BE-P7-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-BE-P7-NEXT: stw r3, -32(r1)
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x vs1, 0, r3
|
||||
; CHECK-BE-P7-NEXT: xxsldwi v2, vs0, vs1, 1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r4, .LCPI0_0@toc@l
|
||||
; CHECK-BE-P7-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
|
||||
; CHECK-BE-P7-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1
|
||||
; CHECK-BE-P8-NEXT: xxmrghw v3, v2, vs0
|
||||
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
|
||||
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
|
||||
; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1
|
||||
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
|
||||
; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P9-LABEL: test:
|
||||
|
@ -118,21 +121,24 @@ define dso_local <4 x i32> @test2(<4 x i32> %a, float %b) {
|
|||
; CHECK-BE-P7: # %bb.0: # %entry
|
||||
; CHECK-BE-P7-NEXT: xscvdpsxws f0, f1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
||||
; CHECK-BE-P7-NEXT: addis r4, r2, .LCPI1_0@toc@ha
|
||||
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
|
||||
; CHECK-BE-P7-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-BE-P7-NEXT: stw r3, -32(r1)
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x vs1, 0, r3
|
||||
; CHECK-BE-P7-NEXT: xxsldwi v2, vs0, vs1, 1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r4, .LCPI1_0@toc@l
|
||||
; CHECK-BE-P7-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
|
||||
; CHECK-BE-P7-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test2:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: xscvdpsxws f0, f1
|
||||
; CHECK-BE-P8-NEXT: xxmrghw v3, v2, vs0
|
||||
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
|
||||
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
|
||||
; CHECK-BE-P8-NEXT: xscvdpsxws v3, f1
|
||||
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
|
||||
; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P9-LABEL: test2:
|
||||
|
@ -181,21 +187,24 @@ define dso_local <4 x i32> @test3(<4 x i32> %a, double %b) {
|
|||
; CHECK-BE-P7: # %bb.0: # %entry
|
||||
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
||||
; CHECK-BE-P7-NEXT: addis r4, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
|
||||
; CHECK-BE-P7-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-BE-P7-NEXT: stw r3, -32(r1)
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x vs1, 0, r3
|
||||
; CHECK-BE-P7-NEXT: xxsldwi v2, vs0, vs1, 1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r4, .LCPI2_0@toc@l
|
||||
; CHECK-BE-P7-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
|
||||
; CHECK-BE-P7-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test3:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1
|
||||
; CHECK-BE-P8-NEXT: xxmrghw v3, v2, vs0
|
||||
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
|
||||
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
|
||||
; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1
|
||||
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
|
||||
; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P9-LABEL: test3:
|
||||
|
@ -244,21 +253,24 @@ define dso_local <4 x i32> @test4(<4 x i32> %a, float %b) {
|
|||
; CHECK-BE-P7: # %bb.0: # %entry
|
||||
; CHECK-BE-P7-NEXT: xscvdpuxws f0, f1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -4
|
||||
; CHECK-BE-P7-NEXT: addis r4, r2, .LCPI3_0@toc@ha
|
||||
; CHECK-BE-P7-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lwz r3, -4(r1)
|
||||
; CHECK-BE-P7-NEXT: xxsldwi vs0, v2, v2, 3
|
||||
; CHECK-BE-P7-NEXT: stw r3, -32(r1)
|
||||
; CHECK-BE-P7-NEXT: addi r3, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x vs1, 0, r3
|
||||
; CHECK-BE-P7-NEXT: xxsldwi v2, vs0, vs1, 1
|
||||
; CHECK-BE-P7-NEXT: addi r3, r4, .LCPI3_0@toc@l
|
||||
; CHECK-BE-P7-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v3, 0, r3
|
||||
; CHECK-BE-P7-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-P7-NEXT: vperm v2, v2, v4, v3
|
||||
; CHECK-BE-P7-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P8-LABEL: test4:
|
||||
; CHECK-BE-P8: # %bb.0: # %entry
|
||||
; CHECK-BE-P8-NEXT: xscvdpuxws f0, f1
|
||||
; CHECK-BE-P8-NEXT: xxmrghw v3, v2, vs0
|
||||
; CHECK-BE-P8-NEXT: xxsldwi vs0, v3, v2, 3
|
||||
; CHECK-BE-P8-NEXT: xxsldwi v2, vs0, vs0, 1
|
||||
; CHECK-BE-P8-NEXT: xscvdpuxws v3, f1
|
||||
; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
|
||||
; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
|
||||
; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
|
||||
; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
|
||||
; CHECK-BE-P8-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-P9-LABEL: test4:
|
||||
|
|
|
@ -190,10 +190,12 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
|
|||
; CHECK-BE-LABEL: test10:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
|
||||
; CHECK-BE-NEXT: xxmrghw 0, 35, 35
|
||||
; CHECK-BE-NEXT: lfs 1, .LCPI9_0@toc@l(3)
|
||||
; CHECK-BE-NEXT: xxmrglw 0, 0, 34
|
||||
; CHECK-BE-NEXT: xsadddp 1, 0, 1
|
||||
; CHECK-BE-NEXT: addi 3, 3, .LCPI9_0@toc@l
|
||||
; CHECK-BE-NEXT: lxv 36, 0(3)
|
||||
; CHECK-BE-NEXT: addis 3, 2, .LCPI9_1@toc@ha
|
||||
; CHECK-BE-NEXT: lfs 0, .LCPI9_1@toc@l(3)
|
||||
; CHECK-BE-NEXT: vperm 2, 3, 2, 4
|
||||
; CHECK-BE-NEXT: xsadddp 1, 34, 0
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 2, i32 3, i32 7>
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | not grep vperm
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 -ppc-disable-perfect-shuffle=false | not grep vperm
|
||||
|
||||
; TODO: Fix this case when disabling perfect shuffle
|
||||
|
||||
define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
|
||||
%V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mattr=+power8-vector < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck -check-prefix=CHECK-PWR7 %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi < %s | FileCheck -check-prefix=CHECK-PWR7 %s
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi < %s | FileCheck -check-prefix=CHECK-PWR7-AIX %s
|
||||
|
||||
define void @VPKUDUM_unary(<2 x i64>* %A) {
|
||||
; CHECK-LABEL: VPKUDUM_unary:
|
||||
|
@ -14,12 +14,22 @@ define void @VPKUDUM_unary(<2 x i64>* %A) {
|
|||
;
|
||||
; CHECK-PWR7-LABEL: VPKUDUM_unary:
|
||||
; CHECK-PWR7: # %bb.0: # %entry
|
||||
; CHECK-PWR7-NEXT: lxvw4x 0, 0, 3
|
||||
; CHECK-PWR7-NEXT: xxmrglw 1, 0, 0
|
||||
; CHECK-PWR7-NEXT: xxmrghw 0, 0, 0
|
||||
; CHECK-PWR7-NEXT: xxmrglw 0, 0, 1
|
||||
; CHECK-PWR7-NEXT: stxvw4x 0, 0, 3
|
||||
; CHECK-PWR7-NEXT: addis 4, 2, .LCPI0_0@toc@ha
|
||||
; CHECK-PWR7-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-NEXT: addi 4, 4, .LCPI0_0@toc@l
|
||||
; CHECK-PWR7-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-PWR7-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-PWR7-NEXT: stxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-NEXT: blr
|
||||
;
|
||||
; CHECK-PWR7-AIX-LABEL: VPKUDUM_unary:
|
||||
; CHECK-PWR7-AIX: # %bb.0: # %entry
|
||||
; CHECK-PWR7-AIX-NEXT: ld 4, L..C0(2) # %const.0
|
||||
; CHECK-PWR7-AIX-NEXT: lxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-AIX-NEXT: lxvw4x 35, 0, 4
|
||||
; CHECK-PWR7-AIX-NEXT: vperm 2, 2, 2, 3
|
||||
; CHECK-PWR7-AIX-NEXT: stxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-AIX-NEXT: blr
|
||||
entry:
|
||||
%tmp = load <2 x i64>, <2 x i64>* %A
|
||||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
|
||||
|
@ -45,13 +55,24 @@ define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
|
|||
;
|
||||
; CHECK-PWR7-LABEL: VPKUDUM:
|
||||
; CHECK-PWR7: # %bb.0: # %entry
|
||||
; CHECK-PWR7-NEXT: lxvw4x 0, 0, 3
|
||||
; CHECK-PWR7-NEXT: lxvw4x 1, 0, 4
|
||||
; CHECK-PWR7-NEXT: xxmrglw 2, 0, 1
|
||||
; CHECK-PWR7-NEXT: xxmrghw 0, 0, 1
|
||||
; CHECK-PWR7-NEXT: xxmrglw 0, 0, 2
|
||||
; CHECK-PWR7-NEXT: stxvw4x 0, 0, 3
|
||||
; CHECK-PWR7-NEXT: addis 5, 2, .LCPI1_0@toc@ha
|
||||
; CHECK-PWR7-NEXT: lxvw4x 34, 0, 4
|
||||
; CHECK-PWR7-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-PWR7-NEXT: addi 4, 5, .LCPI1_0@toc@l
|
||||
; CHECK-PWR7-NEXT: lxvw4x 36, 0, 4
|
||||
; CHECK-PWR7-NEXT: vperm 2, 3, 2, 4
|
||||
; CHECK-PWR7-NEXT: stxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-NEXT: blr
|
||||
;
|
||||
; CHECK-PWR7-AIX-LABEL: VPKUDUM:
|
||||
; CHECK-PWR7-AIX: # %bb.0: # %entry
|
||||
; CHECK-PWR7-AIX-NEXT: ld 5, L..C1(2) # %const.0
|
||||
; CHECK-PWR7-AIX-NEXT: lxvw4x 34, 0, 4
|
||||
; CHECK-PWR7-AIX-NEXT: lxvw4x 35, 0, 3
|
||||
; CHECK-PWR7-AIX-NEXT: lxvw4x 36, 0, 5
|
||||
; CHECK-PWR7-AIX-NEXT: vperm 2, 3, 2, 4
|
||||
; CHECK-PWR7-AIX-NEXT: stxvw4x 34, 0, 3
|
||||
; CHECK-PWR7-AIX-NEXT: blr
|
||||
entry:
|
||||
%tmp = load <2 x i64>, <2 x i64>* %A
|
||||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
|
||||
|
|
Loading…
Reference in New Issue