forked from OSchip/llvm-project
[PATCH] PowerPC: Expand load extend vector operations
This patch expands the SEXTLOAD, ZEXTLOAD, and EXTLOAD operations for vector types when altivec is enabled. llvm-svn: 167386
This commit is contained in:
parent
520a30fd05
commit
c4182d1890
|
@ -361,6 +361,16 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
|||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
|
||||
setOperationAction(ISD::CTTZ, VT, Expand);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
|
||||
|
||||
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
||||
j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
|
||||
MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
|
||||
setTruncStoreAction(VT, InnerVT, Expand);
|
||||
}
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
|
||||
}
|
||||
|
||||
for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
|
||||
|
||||
; Check vector extend load expansion with altivec enabled.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Altivec does not provides an sext intruction, so it expands
|
||||
; a set of vector stores (stvx), bytes load/sign expand/store
|
||||
; (lbz/stb), and a final vector load (lvx) to load the result
|
||||
; extended vector.
|
||||
define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
|
||||
%b = trunc <16 x i8> %a to <16 x i4>
|
||||
%c = sext <16 x i4> %b to <16 x i8>
|
||||
ret <16 x i8> %c
|
||||
}
|
||||
; CHECK: v16si8_sext_in_reg:
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lbz
|
||||
; CHECK: stb
|
||||
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
|
||||
; The zero extend uses a more clever logic: a vector splat
|
||||
; and a logic and to set higher bits to 0.
|
||||
define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) {
|
||||
%b = trunc <16 x i8> %a to <16 x i4>
|
||||
%c = zext <16 x i4> %b to <16 x i8>
|
||||
ret <16 x i8> %c
|
||||
}
|
||||
; CHECK: v16si8_zext_in_reg:
|
||||
; CHECK: vspltisb [[VMASK:[0-9]+]], 15
|
||||
; CHECK-NEXT: vand 2, 2, [[VMASK]]
|
||||
|
||||
; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth).
|
||||
define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
|
||||
%b = trunc <8 x i16> %a to <8 x i8>
|
||||
%c = sext <8 x i8> %b to <8 x i16>
|
||||
ret <8 x i16> %c
|
||||
}
|
||||
; CHECK: v8si16_sext_in_reg:
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
|
||||
; Same as v8si16_sext_in_reg, but instead of creating the mask
|
||||
; with a splat, loads it from memory.
|
||||
define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) {
|
||||
%b = trunc <8 x i16> %a to <8 x i8>
|
||||
%c = zext <8 x i8> %b to <8 x i16>
|
||||
ret <8 x i16> %c
|
||||
}
|
||||
; CHECK: v8si16_zext_in_reg:
|
||||
; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2)
|
||||
; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]]
|
||||
; CHECK-NEXT: vand 2, 2, [[VMASK]]
|
||||
|
||||
; Same as v16si8_sext_in_reg, expands to load halfword (lha) and
|
||||
; store words (stw).
|
||||
define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
|
||||
%b = trunc <4 x i32> %a to <4 x i16>
|
||||
%c = sext <4 x i16> %b to <4 x i32>
|
||||
ret <4 x i32> %c
|
||||
}
|
||||
; CHECK: v4si32_sext_in_reg:
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lha
|
||||
; CHECK: stw
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lha
|
||||
; CHECK: stw
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lha
|
||||
; CHECK: stw
|
||||
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: lha
|
||||
; CHECK: stw
|
||||
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
|
||||
; Same as v8si16_sext_in_reg.
|
||||
define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
|
||||
%b = trunc <4 x i32> %a to <4 x i16>
|
||||
%c = zext <4 x i16> %b to <4 x i32>
|
||||
ret <4 x i32> %c
|
||||
}
|
||||
; CHECK: v4si32_zext_in_reg:
|
||||
; CHECK: vspltisw [[VMASK:[0-9]+]], -16
|
||||
; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]]
|
||||
; CHECK-NEXT: vand 2, 2, [[VMASK]]
|
Loading…
Reference in New Issue