forked from OSchip/llvm-project
[X86] Rename VROUNDYPS* and VROUNDYPD* instructions to VROUNDPSY* and VROUNDPDY*. Fix itinerary mistake on all memory forms of VROUNDPD
This makes the Y position consistent with other instructions. This should have been NFC, but while refactoring the multiclass I noticed that VROUNDPD memory forms were using the register itinerary. llvm-svn: 328254
This commit is contained in:
parent
b9d3d30e22
commit
40d3b32e12
|
@ -793,8 +793,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
|
||||
{ X86::VPTESTYrr, X86::VPTESTYrm, 0 },
|
||||
{ X86::VRCPPSYr, X86::VRCPPSYm, 0 },
|
||||
{ X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
|
||||
{ X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
|
||||
{ X86::VROUNDPDYr, X86::VROUNDPDYm, 0 },
|
||||
{ X86::VROUNDPSYr, X86::VROUNDPSYm, 0 },
|
||||
{ X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
|
||||
{ X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
|
||||
{ X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
|
||||
|
|
|
@ -5863,49 +5863,35 @@ let Predicates = [UseAVX] in {
|
|||
// SSE4.1 - Round Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass sse41_fp_unop_p<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
|
||||
def SSE_ROUNDPS : OpndItins<
|
||||
IIC_SSE_ROUNDPS_REG, IIC_SSE_ROUNDPS_MEM
|
||||
>;
|
||||
|
||||
def SSE_ROUNDPD : OpndItins<
|
||||
IIC_SSE_ROUNDPD_REG, IIC_SSE_ROUNDPD_MEM
|
||||
>;
|
||||
|
||||
multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
ValueType VT32, ValueType VT64,
|
||||
PatFrag mem_frag32, PatFrag mem_frag64,
|
||||
SDNode OpNode> {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
ValueType VT, PatFrag mem_frag, SDNode OpNode,
|
||||
OpndItins itins> {
|
||||
// Intrinsic operation, reg.
|
||||
// Vector intrinsic operation, reg
|
||||
def PSr : SS4AIi8<opcps, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (VT32 (OpNode RC:$src1, imm:$src2)))],
|
||||
IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>;
|
||||
def r : SS4AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))],
|
||||
itins.rr>, Sched<[WriteFAdd]>;
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PSm : SS4AIi8<opcps, MRMSrcMem,
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(VT32 (OpNode (mem_frag32 addr:$src1),imm:$src2)))],
|
||||
IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>;
|
||||
} // ExeDomain = SSEPackedSingle
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
// Vector intrinsic operation, reg
|
||||
def PDr : SS4AIi8<opcpd, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (VT64 (OpNode RC:$src1, imm:$src2)))],
|
||||
IIC_SSE_ROUNDPD_REG>, Sched<[WriteFAdd]>;
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PDm : SS4AIi8<opcpd, MRMSrcMem,
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(VT64 (OpNode (mem_frag64 addr:$src1),imm:$src2)))],
|
||||
IIC_SSE_ROUNDPD_REG>, Sched<[WriteFAddLd]>;
|
||||
} // ExeDomain = SSEPackedDouble
|
||||
def m : SS4AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(VT (OpNode (mem_frag addr:$src1),imm:$src2)))],
|
||||
itins.rm>, Sched<[WriteFAddLd]>;
|
||||
}
|
||||
|
||||
multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
|
||||
|
@ -6026,13 +6012,24 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
|
|||
|
||||
// FP round - roundss, roundps, roundsd, roundpd
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
// Intrinsic form
|
||||
defm VROUND : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128, v4f32,
|
||||
v2f64, loadv4f32, loadv2f64, X86VRndScale>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256, v8f32,
|
||||
v4f64, loadv8f32, loadv4f64, X86VRndScale>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
// Intrinsic form
|
||||
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
|
||||
loadv4f32, X86VRndScale, SSE_ROUNDPS>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
|
||||
loadv8f32, X86VRndScale, SSE_ROUNDPS>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
|
||||
loadv2f64, X86VRndScale, SSE_ROUNDPD>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
|
||||
loadv4f64, X86VRndScale, SSE_ROUNDPD>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasAVX, NoAVX512] in {
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", v4f32, v2f64,
|
||||
|
@ -6087,30 +6084,34 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
(VROUNDPDr VR128:$src, (i32 0xB))>;
|
||||
|
||||
def : Pat<(v8f32 (ffloor VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0x9))>;
|
||||
(VROUNDPSYr VR256:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f32 (fnearbyint VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0xC))>;
|
||||
(VROUNDPSYr VR256:$src, (i32 0xC))>;
|
||||
def : Pat<(v8f32 (fceil VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0xA))>;
|
||||
(VROUNDPSYr VR256:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f32 (frint VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0x4))>;
|
||||
(VROUNDPSYr VR256:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f32 (ftrunc VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0xB))>;
|
||||
(VROUNDPSYr VR256:$src, (i32 0xB))>;
|
||||
|
||||
def : Pat<(v4f64 (ffloor VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0x9))>;
|
||||
(VROUNDPDYr VR256:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f64 (fnearbyint VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0xC))>;
|
||||
(VROUNDPDYr VR256:$src, (i32 0xC))>;
|
||||
def : Pat<(v4f64 (fceil VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0xA))>;
|
||||
(VROUNDPDYr VR256:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f64 (frint VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0x4))>;
|
||||
(VROUNDPDYr VR256:$src, (i32 0x4))>;
|
||||
def : Pat<(v4f64 (ftrunc VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0xB))>;
|
||||
(VROUNDPDYr VR256:$src, (i32 0xB))>;
|
||||
}
|
||||
|
||||
defm ROUND : sse41_fp_unop_p<0x08, 0x09, "round", f128mem, VR128, v4f32, v2f64,
|
||||
memopv4f32, memopv2f64, X86VRndScale>;
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
|
||||
memopv4f32, X86VRndScale, SSE_ROUNDPS>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
|
||||
memopv2f64, X86VRndScale, SSE_ROUNDPD>;
|
||||
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round">;
|
||||
|
||||
|
|
|
@ -1742,8 +1742,8 @@ def: InstRW<[BWWriteResGroup58], (instregex "LD_F32m",
|
|||
"VROUNDPSr",
|
||||
"VROUNDSDr",
|
||||
"VROUNDSSr",
|
||||
"VROUNDYPDr",
|
||||
"VROUNDYPSr")>;
|
||||
"VROUNDPDYr",
|
||||
"VROUNDPSYr")>;
|
||||
|
||||
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
|
||||
let Latency = 6;
|
||||
|
@ -3014,8 +3014,8 @@ def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI16m",
|
|||
"SUBR_FI32m",
|
||||
"SUB_FI16m",
|
||||
"SUB_FI32m",
|
||||
"VROUNDYPDm",
|
||||
"VROUNDYPSm")>;
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
|
||||
let Latency = 12;
|
||||
|
|
|
@ -555,8 +555,8 @@ def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
|
|||
"(V?)MOVUPSrm",
|
||||
"VPBROADCASTDrm",
|
||||
"VPBROADCASTQrm",
|
||||
"(V?)ROUND(Y?)PDr",
|
||||
"(V?)ROUND(Y?)PSr",
|
||||
"(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr")>;
|
||||
|
||||
|
@ -2787,8 +2787,8 @@ def: InstRW<[HWWriteResGroup103], (instregex "ADD_FI16m",
|
|||
"SUBR_FI32m",
|
||||
"SUB_FI16m",
|
||||
"SUB_FI32m",
|
||||
"VROUNDYPDm",
|
||||
"VROUNDYPSm")>;
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> {
|
||||
let Latency = 12;
|
||||
|
|
|
@ -757,8 +757,8 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
|
|||
"(V?)ROUNDPSr",
|
||||
"(V?)ROUNDSDr",
|
||||
"(V?)ROUNDSSr",
|
||||
"VROUNDYPDr",
|
||||
"VROUNDYPSr",
|
||||
"VROUNDPDYr",
|
||||
"VROUNDPSYr",
|
||||
"VSUBPDYrr",
|
||||
"(V?)SUBPDrr",
|
||||
"VSUBPSYrr",
|
||||
|
@ -1853,8 +1853,8 @@ def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m",
|
|||
"VMAX(C?)PSYrm",
|
||||
"VMIN(C?)PDYrm",
|
||||
"VMIN(C?)PSYrm",
|
||||
"VROUNDYPDm",
|
||||
"VROUNDYPSm",
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm",
|
||||
"VSUBPDYrm",
|
||||
"VSUBPSYrm")>;
|
||||
|
||||
|
|
|
@ -2406,8 +2406,8 @@ def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr",
|
|||
"VROUNDPSr",
|
||||
"VROUNDSDr",
|
||||
"VROUNDSSr",
|
||||
"VROUNDYPDr",
|
||||
"VROUNDYPSr")>;
|
||||
"VROUNDPDYr",
|
||||
"VROUNDPSYr")>;
|
||||
|
||||
def SKLWriteResGroup105_2 : SchedWriteRes<[SKLPort01]> {
|
||||
let Latency = 10;
|
||||
|
@ -3231,8 +3231,8 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
|
|||
let NumMicroOps = 3;
|
||||
let ResourceCycles = [1,2];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm",
|
||||
"VROUNDYPSm")>;
|
||||
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
|
||||
let Latency = 17;
|
||||
|
|
|
@ -3876,8 +3876,8 @@ def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr",
|
|||
"VROUNDPSr",
|
||||
"VROUNDSDr",
|
||||
"VROUNDSSr",
|
||||
"VROUNDYPDr",
|
||||
"VROUNDYPSr")>;
|
||||
"VROUNDPDYr",
|
||||
"VROUNDPSYr")>;
|
||||
|
||||
def SKXWriteResGroup116_2 : SchedWriteRes<[SKXPort015]> {
|
||||
let Latency = 10;
|
||||
|
@ -5555,8 +5555,8 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)",
|
|||
"VRNDSCALEPDZrm(b?)i(k?)(z?)",
|
||||
"VRNDSCALEPSZ256rm(b?)i(k?)(z?)",
|
||||
"VRNDSCALEPSZrm(b?)i(k?)(z?)",
|
||||
"VROUNDYPDm",
|
||||
"VROUNDYPSm")>;
|
||||
"VROUNDPDYm",
|
||||
"VROUNDPSYm")>;
|
||||
|
||||
def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
|
||||
let Latency = 17;
|
||||
|
|
|
@ -599,7 +599,7 @@ def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
|
|||
}
|
||||
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
|
||||
VCVTPS2DQYrr, VCVTTPS2DQYrr,
|
||||
VROUNDYPDr, VROUNDYPSr)>;
|
||||
VROUNDPDYr, VROUNDPSYr)>;
|
||||
|
||||
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
||||
let Latency = 8;
|
||||
|
@ -607,7 +607,7 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
|
|||
}
|
||||
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
|
||||
VCVTPS2DQYrm, VCVTTPS2DQYrm,
|
||||
VROUNDYPDm, VROUNDYPSm)>;
|
||||
VROUNDPDYm, VROUNDPSYm)>;
|
||||
|
||||
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
let Latency = 2;
|
||||
|
|
|
@ -4132,8 +4132,8 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
|
|||
;
|
||||
; ZNVER1-LABEL: test_roundpd:
|
||||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
|
||||
; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
|
||||
; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
|
||||
|
@ -4196,8 +4196,8 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
|
|||
;
|
||||
; ZNVER1-LABEL: test_roundps:
|
||||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
|
||||
; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
|
||||
; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
|
||||
; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; ZNVER1-NEXT: retq # sched: [1:0.50]
|
||||
%1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
|
||||
|
|
|
@ -169,10 +169,10 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
|
|||
{"VRNDSCALEPDZ128rmi", "VROUNDPDm", true},
|
||||
{"VRNDSCALEPSZ128rri", "VROUNDPSr", true},
|
||||
{"VRNDSCALEPSZ128rmi", "VROUNDPSm", true},
|
||||
{"VRNDSCALEPDZ256rri", "VROUNDYPDr", false},
|
||||
{"VRNDSCALEPDZ256rmi", "VROUNDYPDm", false},
|
||||
{"VRNDSCALEPSZ256rri", "VROUNDYPSr", false},
|
||||
{"VRNDSCALEPSZ256rmi", "VROUNDYPSm", false},
|
||||
{"VRNDSCALEPDZ256rri", "VROUNDPDYr", false},
|
||||
{"VRNDSCALEPDZ256rmi", "VROUNDPDYm", false},
|
||||
{"VRNDSCALEPSZ256rri", "VROUNDPSYr", false},
|
||||
{"VRNDSCALEPSZ256rmi", "VROUNDPSYm", false},
|
||||
{"VRNDSCALESDr", "VROUNDSDr", true},
|
||||
{"VRNDSCALESDm", "VROUNDSDm", true},
|
||||
{"VRNDSCALESSr", "VROUNDSSr", true},
|
||||
|
|
Loading…
Reference in New Issue