forked from OSchip/llvm-project
[Power9]Legalize and emit code for W vector extract and convert to QP
Implemente patterns to extract [Un]signed Word vector element and convert to quad-precision. Differential Revision: https://reviews.llvm.org/D46536 llvm-svn: 333115
This commit is contained in:
parent
b1ba127aa8
commit
8b0da65bfb
|
@ -3152,21 +3152,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
|
||||
(f32 (DFLOADf32 ixaddr:$src))>;
|
||||
|
||||
// (Un)Signed DWord vector extract -> QP
|
||||
let Predicates = [IsBigEndian] in {
|
||||
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
|
||||
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
|
||||
(f128 (XSCVSDQP
|
||||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
|
||||
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
|
||||
(f128 (XSCVUDQP
|
||||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
}
|
||||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
|
||||
let Predicates = [IsLittleEndian] in {
|
||||
// (Un)Signed DWord vector extract -> QP
|
||||
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
|
||||
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
|
||||
(f128 (XSCVSDQP
|
||||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
|
||||
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
|
||||
(f128 (XSCVUDQP
|
||||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
|
||||
// (Un)Signed Word vector extract -> QP
|
||||
def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
|
||||
(f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
|
||||
foreach Idx = [0,2,3] in {
|
||||
def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
|
||||
(f128 (XSCVSDQP (EXTRACT_SUBREG
|
||||
(VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
|
||||
}
|
||||
foreach Idx = 0-3 in {
|
||||
def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
|
||||
(f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
|
||||
}
|
||||
|
||||
} // IsBigEndian, HasP9Vector
|
||||
|
||||
let Predicates = [IsLittleEndian, HasP9Vector] in {
|
||||
|
||||
// (Un)Signed DWord vector extract -> QP
|
||||
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
|
||||
(f128 (XSCVSDQP
|
||||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
|
@ -3177,7 +3194,22 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
|
||||
(f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
|
||||
}
|
||||
|
||||
// (Un)Signed Word vector extract -> QP
|
||||
foreach Idx = [[0,3],[1,2],[3,0]] in {
|
||||
def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
|
||||
(f128 (XSCVSDQP (EXTRACT_SUBREG
|
||||
(VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
|
||||
sub_64)))>;
|
||||
}
|
||||
def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
|
||||
(f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
|
||||
|
||||
foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
|
||||
def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
|
||||
(f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
|
||||
}
|
||||
} // IsLittleEndian, HasP9Vector
|
||||
|
||||
// Convert (Un)Signed DWord in memory -> QP
|
||||
def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
; RUN: -verify-machineinstrs -enable-ppc-quad-precision < %s | \
|
||||
; RUN: FileCheck %s -check-prefix=CHECK-BE
|
||||
|
||||
; Vector extract DWord and convert to quad precision.
|
||||
|
||||
@sdwVecMem = global <2 x i64> <i64 88, i64 99>, align 16
|
||||
@udwVecMem = global <2 x i64> <i64 88, i64 99>, align 16
|
||||
|
||||
|
@ -158,3 +160,182 @@ entry:
|
|||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Vector extract Word and convert to quad precision.
|
||||
|
||||
@swVecMem = global <4 x i32> <i32 88, i32 99, i32 100, i32 2>, align 16
|
||||
@uwVecMem = global <4 x i32> <i32 89, i32 89, i32 200, i32 3>, align 16
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @swVecConv2qp(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: swVecConv2qp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vspltw 2, 2, 3
|
||||
; CHECK-NEXT: vextsw2d 2, 2
|
||||
; CHECK-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: swVecConv2qp:
|
||||
; CHECK-BE: vspltw 2, 2, 0
|
||||
; CHECK-BE-NEXT: vextsw2d 2, 2
|
||||
; CHECK-BE-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 0
|
||||
%conv = sitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @swVecConv2qp1(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: swVecConv2qp1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vspltw 2, 2, 2
|
||||
; CHECK-NEXT: vextsw2d 2, 2
|
||||
; CHECK-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: swVecConv2qp1:
|
||||
; CHECK-BE: vextsw2d 2, 2
|
||||
; CHECK-BE-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 1
|
||||
%conv = sitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @swVecConv2qp2(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: swVecConv2qp2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vextsw2d 2, 2
|
||||
; CHECK-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: swVecConv2qp2:
|
||||
; CHECK-BE: vspltw 2, 2, 2
|
||||
; CHECK-BE-NEXT: vextsw2d 2, 2
|
||||
; CHECK-BE-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 2
|
||||
%conv = sitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @swVecConv2qp3(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: swVecConv2qp3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vspltw 2, 2, 0
|
||||
; CHECK-NEXT: vextsw2d 2, 2
|
||||
; CHECK-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: swVecConv2qp3:
|
||||
; CHECK-BE: vspltw 2, 2, 3
|
||||
; CHECK-BE-NEXT: vextsw2d 2, 2
|
||||
; CHECK-BE-NEXT: xscvsdqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 3
|
||||
%conv = sitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @uwVecConv2qp(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: uwVecConv2qp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxextractuw 34, 34, 12
|
||||
; CHECK-NEXT: xscvudqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: uwVecConv2qp:
|
||||
; CHECK-BE: xxextractuw 34, 34, 0
|
||||
; CHECK-BE-NEXT: xscvudqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 0
|
||||
%conv = uitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @uwVecConv2qp1(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: uwVecConv2qp1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxextractuw 34, 34, 8
|
||||
; CHECK-NEXT: xscvudqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: uwVecConv2qp1:
|
||||
; CHECK-BE: xxextractuw 34, 34, 4
|
||||
; CHECK-BE-NEXT: xscvudqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 1
|
||||
%conv = uitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @uwVecConv2qp2(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: uwVecConv2qp2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxextractuw 34, 34, 4
|
||||
; CHECK-NEXT: xscvudqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: uwVecConv2qp2:
|
||||
; CHECK-BE: xxextractuw 34, 34, 8
|
||||
; CHECK-BE-NEXT: xscvudqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 2
|
||||
%conv = uitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @uwVecConv2qp3(fp128* nocapture %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: uwVecConv2qp3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxextractuw 34, 34, 0
|
||||
; CHECK-NEXT: xscvudqp 2, 2
|
||||
; CHECK-NEXT: stxv 34, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
|
||||
; CHECK-BE-LABEL: uwVecConv2qp3:
|
||||
; CHECK-BE: xxextractuw 34, 34, 12
|
||||
; CHECK-BE-NEXT: xscvudqp 2, 2
|
||||
; CHECK-BE-NEXT: stxv 34, 0(3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %b, i32 3
|
||||
%conv = uitofp i32 %vecext to fp128
|
||||
store fp128 %conv, fp128* %a, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue