This commit caused some miscompiles that did not show up on any of the bots.
Reverting until we can investigate the cause of those failures.

llvm-svn: 288214
This commit is contained in:
Nemanja Ivanovic 2016-11-29 23:00:33 +00:00
parent 58ab22fe48
commit f57f150b1b
7 changed files with 65 additions and 106 deletions

View File

@ -10861,14 +10861,6 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
} }
MVT VecTy = N->getValueType(0).getSimpleVT(); MVT VecTy = N->getValueType(0).getSimpleVT();
// Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
// aligned and the type is a vector with elements up to 4 bytes
if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
&& VecTy.getScalarSizeInBits() <= 32 ) {
return SDValue();
}
SDValue LoadOps[] = { Chain, Base }; SDValue LoadOps[] = { Chain, Base };
SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
DAG.getVTList(MVT::v2f64, MVT::Other), DAG.getVTList(MVT::v2f64, MVT::Other),
@ -10933,13 +10925,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
SDValue Src = N->getOperand(SrcOpnd); SDValue Src = N->getOperand(SrcOpnd);
MVT VecTy = Src.getValueType().getSimpleVT(); MVT VecTy = Src.getValueType().getSimpleVT();
// Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
// aligned and the type is a vector with elements up to 4 bytes
if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
&& VecTy.getScalarSizeInBits() <= 32 ) {
return SDValue();
}
// All stores are done as v2f64 and possible bit cast. // All stores are done as v2f64 and possible bit cast.
if (VecTy != MVT::v2f64) { if (VecTy != MVT::v2f64) {
Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);

View File

@ -138,7 +138,7 @@ let Uses = [RM] in {
def LXVW4X : XX1Form<31, 780, def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src), (outs vsrc:$XT), (ins memrr:$src),
"lxvw4x $XT, $src", IIC_LdStLFD, "lxvw4x $XT, $src", IIC_LdStLFD,
[]>; [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
} // mayLoad } // mayLoad
// Store indexed instructions // Store indexed instructions
@ -160,7 +160,7 @@ let Uses = [RM] in {
def STXVW4X : XX1Form<31, 908, def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst), (outs), (ins vsrc:$XT, memrr:$dst),
"stxvw4x $XT, $dst", IIC_LdStSTFD, "stxvw4x $XT, $dst", IIC_LdStSTFD,
[]>; [(store v4i32:$XT, xoaddr:$dst)]>;
} }
} // mayStore } // mayStore
@ -1045,6 +1045,8 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// Stores. // Stores.
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>; (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
(STXVW4X $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
(STXVD2X $rS, xoaddr:$dst)>; (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
@ -1055,12 +1057,8 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
(STXVW4X $rS, xoaddr:$dst)>;
} }
// Permutes. // Permutes.
@ -1884,8 +1882,8 @@ let Predicates = [IsLittleEndian, HasVSX] in
def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
(f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
let Predicates = [IsLittleEndian, HasDirectMove] in { let Predicates = [IsLittleEndian, HasDirectMove] in {
// v16i8 scalar <-> vector conversions (LE) // v16i8 scalar <-> vector conversions (LE)

View File

@ -1,6 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \ ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
; RUN: --implicit-check-not xxswapd
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
@ -9,15 +8,13 @@
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
; RUN: --implicit-check-not xxswapd
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | \ ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
; RUN: FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \ ; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \
@ -29,7 +26,7 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \ ; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd ; RUN: FileCheck %s -check-prefix=CHECK-LE
@x = common global <1 x i128> zeroinitializer, align 16 @x = common global <1 x i128> zeroinitializer, align 16
@y = common global <1 x i128> zeroinitializer, align 16 @y = common global <1 x i128> zeroinitializer, align 16
@ -202,7 +199,8 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind {
ret <1 x i128> %ret ret <1 x i128> %ret
; CHECK-LE-LABEL: @call_v1i128_increment_by_one ; CHECK-LE-LABEL: @call_v1i128_increment_by_one
; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} ; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-LE: xxswapd 34, [[PARAM]]
; CHECK-LE: bl v1i128_increment_by_one ; CHECK-LE: bl v1i128_increment_by_one
; CHECK-LE: blr ; CHECK-LE: blr
@ -231,8 +229,10 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
ret <1 x i128> %ret ret <1 x i128> %ret
; CHECK-LE-LABEL: @call_v1i128_increment_by_val ; CHECK-LE-LABEL: @call_v1i128_increment_by_val
; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}} ; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}} ; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
; CHECK-LE: bl v1i128_increment_by_val ; CHECK-LE: bl v1i128_increment_by_val
; CHECK-LE: blr ; CHECK-LE: blr

View File

@ -13,12 +13,6 @@
; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck -check-prefix=NOOPTSWAP %s ; RUN: | FileCheck -check-prefix=NOOPTSWAP %s
; LH: 2016-11-17
; Updated align attritue from 16 to 8 to keep swap instructions tests.
; Changes have been made on little-endian to use lvx and stvx
; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
; aligned vectors with elements up to 4 bytes
; This test was generated from the following source: ; This test was generated from the following source:
; ;
; #define N 4096 ; #define N 4096
@ -35,10 +29,10 @@
; } ; }
; } ; }
@cb = common global [4096 x i32] zeroinitializer, align 8 @cb = common global [4096 x i32] zeroinitializer, align 16
@cc = common global [4096 x i32] zeroinitializer, align 8 @cc = common global [4096 x i32] zeroinitializer, align 16
@cd = common global [4096 x i32] zeroinitializer, align 8 @cd = common global [4096 x i32] zeroinitializer, align 16
@ca = common global [4096 x i32] zeroinitializer, align 8 @ca = common global [4096 x i32] zeroinitializer, align 16
define void @foo() { define void @foo() {
entry: entry:
@ -48,63 +42,63 @@ vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ] %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ]
%0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index %0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index
%1 = bitcast i32* %0 to <4 x i32>* %1 = bitcast i32* %0 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %1, align 8 %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
%2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index %2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index
%3 = bitcast i32* %2 to <4 x i32>* %3 = bitcast i32* %2 to <4 x i32>*
%wide.load13 = load <4 x i32>, <4 x i32>* %3, align 8 %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 16
%4 = add nsw <4 x i32> %wide.load13, %wide.load %4 = add nsw <4 x i32> %wide.load13, %wide.load
%5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index %5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index
%6 = bitcast i32* %5 to <4 x i32>* %6 = bitcast i32* %5 to <4 x i32>*
%wide.load14 = load <4 x i32>, <4 x i32>* %6, align 8 %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 16
%7 = mul nsw <4 x i32> %4, %wide.load14 %7 = mul nsw <4 x i32> %4, %wide.load14
%8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index %8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index
%9 = bitcast i32* %8 to <4 x i32>* %9 = bitcast i32* %8 to <4 x i32>*
store <4 x i32> %7, <4 x i32>* %9, align 8 store <4 x i32> %7, <4 x i32>* %9, align 16
%index.next = add nuw nsw i64 %index, 4 %index.next = add nuw nsw i64 %index, 4
%10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next %10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next
%11 = bitcast i32* %10 to <4 x i32>* %11 = bitcast i32* %10 to <4 x i32>*
%wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 8 %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 16
%12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next %12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next
%13 = bitcast i32* %12 to <4 x i32>* %13 = bitcast i32* %12 to <4 x i32>*
%wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 8 %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 16
%14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1 %14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1
%15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next %15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next
%16 = bitcast i32* %15 to <4 x i32>* %16 = bitcast i32* %15 to <4 x i32>*
%wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 8 %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 16
%17 = mul nsw <4 x i32> %14, %wide.load14.1 %17 = mul nsw <4 x i32> %14, %wide.load14.1
%18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next %18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next
%19 = bitcast i32* %18 to <4 x i32>* %19 = bitcast i32* %18 to <4 x i32>*
store <4 x i32> %17, <4 x i32>* %19, align 8 store <4 x i32> %17, <4 x i32>* %19, align 16
%index.next.1 = add nuw nsw i64 %index.next, 4 %index.next.1 = add nuw nsw i64 %index.next, 4
%20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1 %20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1
%21 = bitcast i32* %20 to <4 x i32>* %21 = bitcast i32* %20 to <4 x i32>*
%wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 8 %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 16
%22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1 %22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1
%23 = bitcast i32* %22 to <4 x i32>* %23 = bitcast i32* %22 to <4 x i32>*
%wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 8 %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 16
%24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2 %24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2
%25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1 %25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1
%26 = bitcast i32* %25 to <4 x i32>* %26 = bitcast i32* %25 to <4 x i32>*
%wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 8 %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 16
%27 = mul nsw <4 x i32> %24, %wide.load14.2 %27 = mul nsw <4 x i32> %24, %wide.load14.2
%28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1 %28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1
%29 = bitcast i32* %28 to <4 x i32>* %29 = bitcast i32* %28 to <4 x i32>*
store <4 x i32> %27, <4 x i32>* %29, align 8 store <4 x i32> %27, <4 x i32>* %29, align 16
%index.next.2 = add nuw nsw i64 %index.next.1, 4 %index.next.2 = add nuw nsw i64 %index.next.1, 4
%30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2 %30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2
%31 = bitcast i32* %30 to <4 x i32>* %31 = bitcast i32* %30 to <4 x i32>*
%wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 8 %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 16
%32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2 %32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2
%33 = bitcast i32* %32 to <4 x i32>* %33 = bitcast i32* %32 to <4 x i32>*
%wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 8 %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 16
%34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3 %34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3
%35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2 %35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2
%36 = bitcast i32* %35 to <4 x i32>* %36 = bitcast i32* %35 to <4 x i32>*
%wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 8 %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 16
%37 = mul nsw <4 x i32> %34, %wide.load14.3 %37 = mul nsw <4 x i32> %34, %wide.load14.3
%38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2 %38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2
%39 = bitcast i32* %38 to <4 x i32>* %39 = bitcast i32* %38 to <4 x i32>*
store <4 x i32> %37, <4 x i32>* %39, align 8 store <4 x i32> %37, <4 x i32>* %39, align 16
%index.next.3 = add nuw nsw i64 %index.next.2, 4 %index.next.3 = add nuw nsw i64 %index.next.2, 4
%40 = icmp eq i64 %index.next.3, 4096 %40 = icmp eq i64 %index.next.3, 4096
br i1 %40, label %for.end, label %vector.body br i1 %40, label %for.end, label %vector.body

View File

@ -2,13 +2,6 @@
; Test swap removal when a vector splat must be adjusted to make it legal. ; Test swap removal when a vector splat must be adjusted to make it legal.
; ;
; LH: 2016-11-17
; Updated align attritue from 16 to 8 to keep swap instructions tests.
; Changes have been made on little-endian to use lvx and stvx
; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
; aligned vectors with elements up to 4 bytes
; Test generated from following C code: ; Test generated from following C code:
; ;
; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
@ -35,37 +28,37 @@
; vir = (vector int){vi[1], vi[1], vi[1], vi[1]}; ; vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
; } ; }
@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 8 @vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 8 @vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 8 @vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@vcr = common global <16 x i8> zeroinitializer, align 8 @vcr = common global <16 x i8> zeroinitializer, align 16
@vsr = common global <8 x i16> zeroinitializer, align 8 @vsr = common global <8 x i16> zeroinitializer, align 16
@vir = common global <4 x i32> zeroinitializer, align 8 @vir = common global <4 x i32> zeroinitializer, align 16
; Function Attrs: nounwind ; Function Attrs: nounwind
define void @cfoo() { define void @cfoo() {
entry: entry:
%0 = load <16 x i8>, <16 x i8>* @vc, align 8 %0 = load <16 x i8>, <16 x i8>* @vc, align 16
%vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8 store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
ret void ret void
} }
; Function Attrs: nounwind ; Function Attrs: nounwind
define void @sfoo() { define void @sfoo() {
entry: entry:
%0 = load <8 x i16>, <8 x i16>* @vs, align 8 %0 = load <8 x i16>, <8 x i16>* @vs, align 16
%vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6> %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8 store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
ret void ret void
} }
; Function Attrs: nounwind ; Function Attrs: nounwind
define void @ifoo() { define void @ifoo() {
entry: entry:
%0 = load <4 x i32>, <4 x i32>* @vi, align 8 %0 = load <4 x i32>, <4 x i32>* @vi, align 16
%vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8 store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
ret void ret void
} }

View File

@ -14,10 +14,8 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
; RUN: grep lxvd2x < %t | count 3 ; RUN: grep lxvd2x < %t | count 6
; RUN: grep lvx < %t | count 3 ; RUN: grep stxvd2x < %t | count 6
; RUN: grep stxvd2x < %t | count 3
; RUN: grep stvx < %t | count 3
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t

View File

@ -1,17 +1,8 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s
; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 \ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | \ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
; RUN: FileCheck -check-prefix=CHECK-REG %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-LE %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\
; RUN: FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
; RUN: -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\
; RUN: FileCheck -check-prefix=CHECK-FISL %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | \
; RUN: FileCheck -check-prefix=CHECK-LE %s
define double @test1(double %a, double %b) { define double @test1(double %a, double %b) {
entry: entry:
@ -654,8 +645,8 @@ define <4 x float> @test32(<4 x float>* %a) {
; CHECK-FISL: blr ; CHECK-FISL: blr
; CHECK-LE-LABEL: @test32 ; CHECK-LE-LABEL: @test32
; CHECK-LE: lvx 2, 0, 3 ; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
; CHECK-LE-NOT: xxswapd ; CHECK-LE: xxswapd 34, [[V1]]
; CHECK-LE: blr ; CHECK-LE: blr
} }
@ -672,8 +663,8 @@ define void @test33(<4 x float>* %a, <4 x float> %b) {
; CHECK-FISL: blr ; CHECK-FISL: blr
; CHECK-LE-LABEL: @test33 ; CHECK-LE-LABEL: @test33
; CHECK-LE-NOT: xxswapd ; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
; CHECK-LE: stvx 2, 0, 3 ; CHECK-LE: stxvd2x [[V1]], 0, 3
; CHECK-LE: blr ; CHECK-LE: blr
} }
@ -725,8 +716,8 @@ define <4 x i32> @test34(<4 x i32>* %a) {
; CHECK-FISL: blr ; CHECK-FISL: blr
; CHECK-LE-LABEL: @test34 ; CHECK-LE-LABEL: @test34
; CHECK-LE: lvx 2, 0, 3 ; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
; CHECK-LE-NOT: xxswapd ; CHECK-LE: xxswapd 34, [[V1]]
; CHECK-LE: blr ; CHECK-LE: blr
} }
@ -743,8 +734,8 @@ define void @test35(<4 x i32>* %a, <4 x i32> %b) {
; CHECK-FISL: blr ; CHECK-FISL: blr
; CHECK-LE-LABEL: @test35 ; CHECK-LE-LABEL: @test35
; CHECK-LE-NOT: xxswapd ; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
; CHECK-LE: stvx 2, 0, 3 ; CHECK-LE: stxvd2x [[V1]], 0, 3
; CHECK-LE: blr ; CHECK-LE: blr
} }
@ -1159,9 +1150,9 @@ define <2 x i32> @test80(i32 %v) {
; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3 ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]] ; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
; CHECK-LE-DAG: lvx 3, 0, [[R2]] ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
; CHECK-LE-DAG: xxspltw 34, [[V1]] ; CHECK-LE-DAG: xxspltw 34, [[V1]]
; CHECK-LE-NOT: xxswapd 35, [[V2]] ; CHECK-LE-DAG: xxswapd 35, [[V2]]
; CHECK-LE: vadduwm 2, 2, 3 ; CHECK-LE: vadduwm 2, 2, 3
; CHECK-LE: blr ; CHECK-LE: blr
} }