forked from OSchip/llvm-project
[PowerPC] [ISEL] select x-form instruction for unaligned offset
Differential Revision: https://reviews.llvm.org/D62173 llvm-svn: 361346
This commit is contained in:
parent
b372259ace
commit
9970665f60
|
@ -218,13 +218,6 @@ namespace {
|
|||
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
const SDLoc &dl);
|
||||
|
||||
/// SelectAddrImm - Returns true if the address N can be represented by
|
||||
/// a base register plus a signed 16-bit displacement [r+imm].
|
||||
bool SelectAddrImm(SDValue N, SDValue &Disp,
|
||||
SDValue &Base) {
|
||||
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
|
||||
}
|
||||
|
||||
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
|
||||
/// immediate field. Note that the operand at this point is already the
|
||||
/// result of a prior SelectAddressRegImm call.
|
||||
|
@ -238,26 +231,61 @@ namespace {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
|
||||
/// represented as an indexed [r+r] operation. Returns false if it can
|
||||
/// be represented by [r+imm], which are preferred.
|
||||
/// SelectAddrIdx - Given the specified address, check to see if it can be
|
||||
/// represented as an indexed [r+r] operation.
|
||||
/// This is for xform instructions whose associated displacement form is D.
|
||||
/// The last parameter \p 0 means associated D form has no requirment for 16
|
||||
/// bit signed displacement.
|
||||
/// Returns false if it can be represented by [r+imm], which are preferred.
|
||||
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
|
||||
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
|
||||
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
|
||||
}
|
||||
|
||||
/// SelectAddrIdxOnly - Given the specified addressed, force it to be
|
||||
/// SelectAddrIdx4 - Given the specified address, check to see if it can be
|
||||
/// represented as an indexed [r+r] operation.
|
||||
/// This is for xform instructions whose associated displacement form is DS.
|
||||
/// The last parameter \p 4 means associated DS form 16 bit signed
|
||||
/// displacement must be a multiple of 4.
|
||||
/// Returns false if it can be represented by [r+imm], which are preferred.
|
||||
bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
|
||||
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
|
||||
}
|
||||
|
||||
/// SelectAddrIdx16 - Given the specified address, check to see if it can be
|
||||
/// represented as an indexed [r+r] operation.
|
||||
/// This is for xform instructions whose associated displacement form is DQ.
|
||||
/// The last parameter \p 16 means associated DQ form 16 bit signed
|
||||
/// displacement must be a multiple of 16.
|
||||
/// Returns false if it can be represented by [r+imm], which are preferred.
|
||||
bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
|
||||
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
|
||||
}
|
||||
|
||||
/// SelectAddrIdxOnly - Given the specified address, force it to be
|
||||
/// represented as an indexed [r+r] operation.
|
||||
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
|
||||
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
|
||||
}
|
||||
|
||||
/// SelectAddrImm - Returns true if the address N can be represented by
|
||||
/// a base register plus a signed 16-bit displacement [r+imm].
|
||||
/// The last parameter \p 0 means D form has no requirment for 16 bit signed
|
||||
/// displacement.
|
||||
bool SelectAddrImm(SDValue N, SDValue &Disp,
|
||||
SDValue &Base) {
|
||||
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
|
||||
}
|
||||
|
||||
/// SelectAddrImmX4 - Returns true if the address N can be represented by
|
||||
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
|
||||
/// Suitable for use by STD and friends.
|
||||
/// a base register plus a signed 16-bit displacement that is a multiple of
|
||||
/// 4 (last parameter). Suitable for use by STD and friends.
|
||||
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
|
||||
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
|
||||
}
|
||||
|
||||
/// SelectAddrImmX16 - Returns true if the address N can be represented by
|
||||
/// a base register plus a signed 16-bit displacement that is a multiple of
|
||||
/// 16(last parameter). Suitable for use by STXV and friends.
|
||||
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
|
||||
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
|
||||
}
|
||||
|
|
|
@ -2219,14 +2219,18 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
|
|||
|
||||
/// SelectAddressRegReg - Given the specified addressed, check to see if it
|
||||
/// can be represented as an indexed [r+r] operation. Returns false if it
|
||||
/// can be more efficiently represented with [r+imm].
|
||||
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
|
||||
/// non-zero and N can be represented by a base register plus a signed 16-bit
|
||||
/// displacement, make a more precise judgement by checking (displacement % \p
|
||||
/// EncodingAlignment).
|
||||
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
|
||||
SDValue &Index,
|
||||
SelectionDAG &DAG) const {
|
||||
SDValue &Index, SelectionDAG &DAG,
|
||||
unsigned EncodingAlignment) const {
|
||||
int16_t imm = 0;
|
||||
if (N.getOpcode() == ISD::ADD) {
|
||||
if (isIntS16Immediate(N.getOperand(1), imm))
|
||||
return false; // r+i
|
||||
if (isIntS16Immediate(N.getOperand(1), imm) &&
|
||||
(!EncodingAlignment || !(imm % EncodingAlignment)))
|
||||
return false; // r+i
|
||||
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
|
||||
return false; // r+i
|
||||
|
||||
|
@ -2234,8 +2238,9 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
|
|||
Index = N.getOperand(1);
|
||||
return true;
|
||||
} else if (N.getOpcode() == ISD::OR) {
|
||||
if (isIntS16Immediate(N.getOperand(1), imm))
|
||||
return false; // r+i can fold it if we can.
|
||||
if (isIntS16Immediate(N.getOperand(1), imm) &&
|
||||
(!EncodingAlignment || !(imm % EncodingAlignment)))
|
||||
return false; // r+i can fold it if we can.
|
||||
|
||||
// If this is an or of disjoint bitfields, we can codegen this as an add
|
||||
// (for better address arithmetic) if the LHS and RHS of the OR are provably
|
||||
|
@ -2308,7 +2313,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
|
|||
// FIXME dl should come from parent load or store, not from address
|
||||
SDLoc dl(N);
|
||||
// If this can be more profitably realized as r+r, fail.
|
||||
if (SelectAddressRegReg(N, Disp, Base, DAG))
|
||||
if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
|
||||
return false;
|
||||
|
||||
if (N.getOpcode() == ISD::ADD) {
|
||||
|
|
|
@ -660,17 +660,21 @@ namespace llvm {
|
|||
SelectionDAG &DAG) const override;
|
||||
|
||||
/// SelectAddressRegReg - Given the specified addressed, check to see if it
|
||||
/// can be represented as an indexed [r+r] operation. Returns false if it
|
||||
/// can be more efficiently represented with [r+imm].
|
||||
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
|
||||
/// is non-zero, only accept displacement which is not suitable for [r+imm].
|
||||
/// Returns false if it can be represented by [r+imm], which are preferred.
|
||||
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
|
||||
SelectionDAG &DAG) const;
|
||||
SelectionDAG &DAG,
|
||||
unsigned EncodingAlignment = 0) const;
|
||||
|
||||
/// SelectAddressRegImm - Returns true if the address N can be represented
|
||||
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
|
||||
/// is not better represented as reg+reg. If Aligned is true, only accept
|
||||
/// displacements suitable for STD and friends, i.e. multiples of 4.
|
||||
/// is not better represented as reg+reg. If \p EncodingAlignment is
|
||||
/// non-zero, only accept displacements suitable for instruction encoding
|
||||
/// requirement, i.e. multiples of 4 for DS form.
|
||||
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
|
||||
SelectionDAG &DAG, unsigned Alignment) const;
|
||||
SelectionDAG &DAG,
|
||||
unsigned EncodingAlignment) const;
|
||||
|
||||
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
|
||||
/// represented as an indexed [r+r] operation.
|
||||
|
|
|
@ -927,7 +927,7 @@ def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
|
|||
PPC970_DGroup_Cracked;
|
||||
def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
|
||||
"lwax $rD, $src", IIC_LdStLHA,
|
||||
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
|
||||
[(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
// For fast-isel:
|
||||
let isCodeGenOnly = 1, mayLoad = 1 in {
|
||||
|
@ -1052,7 +1052,7 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
|
|||
|
||||
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
|
||||
"ldx $rD, $src", IIC_LdStLD,
|
||||
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
|
||||
[(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
|
||||
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
|
||||
"ldbrx $rD, $src", IIC_LdStLoad,
|
||||
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
|
||||
|
@ -1224,7 +1224,7 @@ def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
|
|||
[(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
|
||||
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
|
||||
"stdx $rS, $dst", IIC_LdStSTD,
|
||||
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
|
||||
[(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
|
||||
"stdbrx $rS, $dst", IIC_LdStStore,
|
||||
|
@ -1441,10 +1441,10 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
|
|||
|
||||
// 64-bits atomic loads and stores
|
||||
def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
|
||||
def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
|
||||
def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>;
|
||||
|
||||
def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
|
||||
def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
|
||||
def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
|
||||
|
||||
let Predicates = [IsISA3_0] in {
|
||||
|
||||
|
|
|
@ -893,11 +893,24 @@ def pred : Operand<OtherVT> {
|
|||
}
|
||||
|
||||
// Define PowerPC specific addressing mode.
|
||||
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
|
||||
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
|
||||
|
||||
// d-form
|
||||
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
|
||||
// ds-form
|
||||
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
|
||||
// dq-form
|
||||
def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
|
||||
|
||||
// Below forms are all x-form addressing mode, use three different ones so we
|
||||
// can make a accurate check for x-form instructions in ISEL.
|
||||
// x-form addressing mode whose associated diplacement form is D.
|
||||
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx"
|
||||
// x-form addressing mode whose associated diplacement form is DS.
|
||||
def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx"
|
||||
// x-form addressing mode whose associated diplacement form is DQ.
|
||||
def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx"
|
||||
|
||||
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
|
||||
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
|
||||
def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
|
||||
|
||||
// The address in a single register. This is used with the SjLj
|
||||
// pseudo-instructions.
|
||||
|
|
|
@ -2857,7 +2857,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
|
||||
// Load Vector Indexed
|
||||
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
|
||||
[(set v2f64:$XT, (load xaddr:$src))]>;
|
||||
[(set v2f64:$XT, (load xaddrX16:$src))]>;
|
||||
// Load Vector (Left-justified) with Length
|
||||
def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
|
||||
"lxvl $XT, $src, $rB", IIC_LdStLoad,
|
||||
|
@ -2905,7 +2905,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
|
||||
// Store Vector Indexed
|
||||
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
|
||||
[(store v2f64:$XT, xaddr:$dst)]>;
|
||||
[(store v2f64:$XT, xaddrX16:$dst)]>;
|
||||
|
||||
// Store Vector (Left-justified) with Length
|
||||
def STXVL : XX1Form_memOp<31, 397, (outs),
|
||||
|
@ -3293,26 +3293,26 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
|
||||
(v2i64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
|
||||
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
|
||||
(v2i64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
|
||||
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
|
||||
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
|
||||
(v2f64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
|
||||
(v2f64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
|
||||
(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
|
||||
sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
|
||||
sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
|
||||
(DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), ixaddr:$src)>;
|
||||
|
@ -3328,23 +3328,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
|
||||
(v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
|
||||
(v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
|
||||
(v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
|
||||
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
|
||||
(v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
|
||||
(v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
|
||||
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
|
||||
(v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
|
||||
sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
|
||||
sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
|
||||
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
|
||||
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
|
||||
(DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
|
||||
sub_64), ixaddr:$src)>;
|
||||
|
@ -3491,12 +3491,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
} // IsLittleEndian, HasP9Vector
|
||||
|
||||
// Convert (Un)Signed DWord in memory -> QP
|
||||
def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
|
||||
(f128 (XSCVSDQP (LXSDX xaddr:$src)))>;
|
||||
def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
|
||||
(f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
|
||||
def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))),
|
||||
(f128 (XSCVSDQP (LXSD ixaddr:$src)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))),
|
||||
(f128 (XSCVUDQP (LXSDX xaddr:$src)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
|
||||
(f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
|
||||
def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))),
|
||||
(f128 (XSCVUDQP (LXSD ixaddr:$src)))>;
|
||||
|
||||
|
@ -3519,9 +3519,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
// Instructions for store(fptosi).
|
||||
// The 8-byte version is repeated here due to availability of D-Form STXSD.
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8),
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
|
||||
(STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
|
||||
xaddr:$dst)>;
|
||||
xaddrX4:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8),
|
||||
(STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
|
||||
|
@ -3536,8 +3536,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
|
||||
(STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8),
|
||||
(STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
|
||||
(STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
|
||||
(STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
|
||||
|
@ -3550,9 +3550,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
|
||||
// Instructions for store(fptoui).
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8),
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
|
||||
(STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
|
||||
xaddr:$dst)>;
|
||||
xaddrX4:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8),
|
||||
(STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
|
||||
|
@ -3567,8 +3567,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
|
||||
(STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8),
|
||||
(STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
|
||||
(STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
|
||||
def : Pat<(PPCstore_scal_int_from_vsr
|
||||
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
|
||||
(STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
|
||||
|
|
|
@ -4226,8 +4226,8 @@ define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %e
|
|||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: sldi r4, r4, 3
|
||||
; P9BE-NEXT: add r3, r3, r4
|
||||
; P9BE-NEXT: addi r3, r3, -8
|
||||
; P9BE-NEXT: lxvx v2, 0, r3
|
||||
; P9BE-NEXT: li r4, -8
|
||||
; P9BE-NEXT: lxvx v2, r3, r4
|
||||
; P9BE-NEXT: xxswapd v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
|
@ -4235,8 +4235,8 @@ define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %e
|
|||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: sldi r4, r4, 3
|
||||
; P9LE-NEXT: add r3, r3, r4
|
||||
; P9LE-NEXT: addi r3, r3, -8
|
||||
; P9LE-NEXT: lxvx v2, 0, r3
|
||||
; P9LE-NEXT: li r4, -8
|
||||
; P9LE-NEXT: lxvx v2, r3, r4
|
||||
; P9LE-NEXT: xxswapd v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
|
@ -5030,8 +5030,8 @@ define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32
|
|||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: sldi r4, r4, 3
|
||||
; P9BE-NEXT: add r3, r3, r4
|
||||
; P9BE-NEXT: addi r3, r3, -8
|
||||
; P9BE-NEXT: lxvx vs0, 0, r3
|
||||
; P9BE-NEXT: li r4, -8
|
||||
; P9BE-NEXT: lxvx vs0, r3, r4
|
||||
; P9BE-NEXT: xxswapd vs0, vs0
|
||||
; P9BE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
|
@ -5040,8 +5040,8 @@ define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32
|
|||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: sldi r4, r4, 3
|
||||
; P9LE-NEXT: add r3, r3, r4
|
||||
; P9LE-NEXT: addi r3, r3, -8
|
||||
; P9LE-NEXT: lxvx vs0, 0, r3
|
||||
; P9LE-NEXT: li r4, -8
|
||||
; P9LE-NEXT: lxvx vs0, r3, r4
|
||||
; P9LE-NEXT: xxswapd vs0, vs0
|
||||
; P9LE-NEXT: xvcvdpsxds v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
|
@ -5473,8 +5473,8 @@ define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %
|
|||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: sldi r4, r4, 3
|
||||
; P9BE-NEXT: add r3, r3, r4
|
||||
; P9BE-NEXT: addi r3, r3, -8
|
||||
; P9BE-NEXT: lxvx v2, 0, r3
|
||||
; P9BE-NEXT: li r4, -8
|
||||
; P9BE-NEXT: lxvx v2, r3, r4
|
||||
; P9BE-NEXT: xxswapd v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
|
@ -5482,8 +5482,8 @@ define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %
|
|||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: sldi r4, r4, 3
|
||||
; P9LE-NEXT: add r3, r3, r4
|
||||
; P9LE-NEXT: addi r3, r3, -8
|
||||
; P9LE-NEXT: lxvx v2, 0, r3
|
||||
; P9LE-NEXT: li r4, -8
|
||||
; P9LE-NEXT: lxvx v2, r3, r4
|
||||
; P9LE-NEXT: xxswapd v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
|
@ -6277,8 +6277,8 @@ define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32
|
|||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: sldi r4, r4, 3
|
||||
; P9BE-NEXT: add r3, r3, r4
|
||||
; P9BE-NEXT: addi r3, r3, -8
|
||||
; P9BE-NEXT: lxvx vs0, 0, r3
|
||||
; P9BE-NEXT: li r4, -8
|
||||
; P9BE-NEXT: lxvx vs0, r3, r4
|
||||
; P9BE-NEXT: xxswapd vs0, vs0
|
||||
; P9BE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P9BE-NEXT: blr
|
||||
|
@ -6287,8 +6287,8 @@ define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32
|
|||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: sldi r4, r4, 3
|
||||
; P9LE-NEXT: add r3, r3, r4
|
||||
; P9LE-NEXT: addi r3, r3, -8
|
||||
; P9LE-NEXT: lxvx vs0, 0, r3
|
||||
; P9LE-NEXT: li r4, -8
|
||||
; P9LE-NEXT: lxvx vs0, r3, r4
|
||||
; P9LE-NEXT: xxswapd vs0, vs0
|
||||
; P9LE-NEXT: xvcvdpuxds v2, vs0
|
||||
; P9LE-NEXT: blr
|
||||
|
|
|
@ -23,9 +23,9 @@ entry:
|
|||
define i64 @test_xaddrX4(i8* %p) {
|
||||
; CHECK-LABEL: test_xaddrX4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li r4, 3
|
||||
; CHECK-NEXT: std r3, -8(r1)
|
||||
; CHECK-NEXT: addi r3, r3, 3
|
||||
; CHECK-NEXT: ld r3, 0(r3)
|
||||
; CHECK-NEXT: ldx r3, r3, r4
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%p.addr = alloca i8*, align 8
|
||||
|
@ -41,8 +41,8 @@ entry:
|
|||
define <2 x double> @test_xaddrX16(double* %arr) {
|
||||
; CHECK-LABEL: test_xaddrX16:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi r3, r3, 40
|
||||
; CHECK-NEXT: lxvx vs34, 0, r3
|
||||
; CHECK-NEXT: li r4, 40
|
||||
; CHECK-NEXT: lxvx vs34, r3, r4
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%arrayidx1 = getelementptr inbounds double, double* %arr, i64 5
|
||||
|
@ -79,12 +79,13 @@ define i64 @test_xaddrX4_loop(i8* %p) {
|
|||
; CHECK-NEXT: li r3, 8
|
||||
; CHECK-NEXT: mtctr r3
|
||||
; CHECK-NEXT: li r3, 0
|
||||
; CHECK-NEXT: .p2align 5
|
||||
; CHECK-NEXT: li r5, 3
|
||||
; loop instruction number is changed from 5 to 4, so its align is changed from 5 to 4.
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB4_1: # %for.body
|
||||
; CHECK: ldu r5, 8(r4)
|
||||
; CHECK-NEXT: addi r6, r4, 3
|
||||
; CHECK-NEXT: ld r6, 0(r6)
|
||||
; CHECK-NEXT: maddld r3, r6, r5, r3
|
||||
; CHECK: ldu r6, 8(r4)
|
||||
; CHECK-NEXT: ldx r7, r4, r5
|
||||
; CHECK-NEXT: maddld r3, r7, r6, r3
|
||||
; CHECK-NEXT: bdnz .LBB4_1
|
||||
; CHECK-NEXT: # %bb.2: # %for.end
|
||||
; CHECK-NEXT: blr
|
||||
|
|
Loading…
Reference in New Issue