[X86] Rename VROUNDYPS* and VROUNDYPD* instructions to VROUNDPSY* and VROUNDPDY*. Fix itinerary mistake on all memory forms of VROUNDPD

This makes the Y position consistent with other instructions.

This should have been NFC, but while refactoring the multiclass I noticed that VROUNDPD memory forms were using the register itinerary.

llvm-svn: 328254
This commit is contained in:
Craig Topper 2018-03-22 21:55:20 +00:00
parent b9d3d30e22
commit 40d3b32e12
10 changed files with 90 additions and 89 deletions

View File

@ -793,8 +793,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
{ X86::VPTESTYrr, X86::VPTESTYrm, 0 },
{ X86::VRCPPSYr, X86::VRCPPSYm, 0 },
{ X86::VROUNDYPDr, X86::VROUNDYPDm, 0 },
{ X86::VROUNDYPSr, X86::VROUNDYPSm, 0 },
{ X86::VROUNDPDYr, X86::VROUNDPDYm, 0 },
{ X86::VROUNDPSYr, X86::VROUNDPSYm, 0 },
{ X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
{ X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
{ X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },

View File

@ -5863,49 +5863,35 @@ let Predicates = [UseAVX] in {
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
multiclass sse41_fp_unop_p<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
def SSE_ROUNDPS : OpndItins<
IIC_SSE_ROUNDPS_REG, IIC_SSE_ROUNDPS_MEM
>;
def SSE_ROUNDPD : OpndItins<
IIC_SSE_ROUNDPD_REG, IIC_SSE_ROUNDPD_MEM
>;
multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
ValueType VT32, ValueType VT64,
PatFrag mem_frag32, PatFrag mem_frag64,
SDNode OpNode> {
let ExeDomain = SSEPackedSingle in {
ValueType VT, PatFrag mem_frag, SDNode OpNode,
OpndItins itins> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (VT32 (OpNode RC:$src1, imm:$src2)))],
IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>;
def r : SS4AIi8<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))],
itins.rr>, Sched<[WriteFAdd]>;
// Vector intrinsic operation, mem
def PSm : SS4AIi8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(VT32 (OpNode (mem_frag32 addr:$src1),imm:$src2)))],
IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>;
} // ExeDomain = SSEPackedSingle
let ExeDomain = SSEPackedDouble in {
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (VT64 (OpNode RC:$src1, imm:$src2)))],
IIC_SSE_ROUNDPD_REG>, Sched<[WriteFAdd]>;
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(VT64 (OpNode (mem_frag64 addr:$src1),imm:$src2)))],
IIC_SSE_ROUNDPD_REG>, Sched<[WriteFAddLd]>;
} // ExeDomain = SSEPackedDouble
def m : SS4AIi8<opc, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(VT (OpNode (mem_frag addr:$src1),imm:$src2)))],
itins.rm>, Sched<[WriteFAddLd]>;
}
multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
@ -6026,13 +6012,24 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
// FP round - roundss, roundps, roundsd, roundpd
let Predicates = [HasAVX, NoVLX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128, v4f32,
v2f64, loadv4f32, loadv2f64, X86VRndScale>,
VEX, VEX_WIG;
defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256, v8f32,
v4f64, loadv8f32, loadv4f64, X86VRndScale>,
VEX, VEX_L, VEX_WIG;
let ExeDomain = SSEPackedSingle in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, SSE_ROUNDPS>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
loadv8f32, X86VRndScale, SSE_ROUNDPS>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, SSE_ROUNDPD>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
loadv4f64, X86VRndScale, SSE_ROUNDPD>,
VEX, VEX_L, VEX_WIG;
}
}
let Predicates = [HasAVX, NoAVX512] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", v4f32, v2f64,
@ -6087,30 +6084,34 @@ let Predicates = [HasAVX, NoVLX] in {
(VROUNDPDr VR128:$src, (i32 0xB))>;
def : Pat<(v8f32 (ffloor VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x9))>;
(VROUNDPSYr VR256:$src, (i32 0x9))>;
def : Pat<(v8f32 (fnearbyint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xC))>;
(VROUNDPSYr VR256:$src, (i32 0xC))>;
def : Pat<(v8f32 (fceil VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xA))>;
(VROUNDPSYr VR256:$src, (i32 0xA))>;
def : Pat<(v8f32 (frint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x4))>;
(VROUNDPSYr VR256:$src, (i32 0x4))>;
def : Pat<(v8f32 (ftrunc VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xB))>;
(VROUNDPSYr VR256:$src, (i32 0xB))>;
def : Pat<(v4f64 (ffloor VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x9))>;
(VROUNDPDYr VR256:$src, (i32 0x9))>;
def : Pat<(v4f64 (fnearbyint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xC))>;
(VROUNDPDYr VR256:$src, (i32 0xC))>;
def : Pat<(v4f64 (fceil VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xA))>;
(VROUNDPDYr VR256:$src, (i32 0xA))>;
def : Pat<(v4f64 (frint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x4))>;
(VROUNDPDYr VR256:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xB))>;
(VROUNDPDYr VR256:$src, (i32 0xB))>;
}
defm ROUND : sse41_fp_unop_p<0x08, 0x09, "round", f128mem, VR128, v4f32, v2f64,
memopv4f32, memopv2f64, X86VRndScale>;
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
memopv4f32, X86VRndScale, SSE_ROUNDPS>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
memopv2f64, X86VRndScale, SSE_ROUNDPD>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round">;

View File

@ -1742,8 +1742,8 @@ def: InstRW<[BWWriteResGroup58], (instregex "LD_F32m",
"VROUNDPSr",
"VROUNDSDr",
"VROUNDSSr",
"VROUNDYPDr",
"VROUNDYPSr")>;
"VROUNDPDYr",
"VROUNDPSYr")>;
def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 6;
@ -3014,8 +3014,8 @@ def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI16m",
"SUBR_FI32m",
"SUB_FI16m",
"SUB_FI32m",
"VROUNDYPDm",
"VROUNDYPSm")>;
"VROUNDPDYm",
"VROUNDPSYm")>;
def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let Latency = 12;

View File

@ -555,8 +555,8 @@ def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm",
"(V?)MOVUPSrm",
"VPBROADCASTDrm",
"VPBROADCASTQrm",
"(V?)ROUND(Y?)PDr",
"(V?)ROUND(Y?)PSr",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr")>;
@ -2787,8 +2787,8 @@ def: InstRW<[HWWriteResGroup103], (instregex "ADD_FI16m",
"SUBR_FI32m",
"SUB_FI16m",
"SUB_FI32m",
"VROUNDYPDm",
"VROUNDYPSm")>;
"VROUNDPDYm",
"VROUNDPSYm")>;
def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 12;

View File

@ -757,8 +757,8 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
"(V?)ROUNDPSr",
"(V?)ROUNDSDr",
"(V?)ROUNDSSr",
"VROUNDYPDr",
"VROUNDYPSr",
"VROUNDPDYr",
"VROUNDPSYr",
"VSUBPDYrr",
"(V?)SUBPDrr",
"VSUBPSYrr",
@ -1853,8 +1853,8 @@ def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m",
"VMAX(C?)PSYrm",
"VMIN(C?)PDYrm",
"VMIN(C?)PSYrm",
"VROUNDYPDm",
"VROUNDYPSm",
"VROUNDPDYm",
"VROUNDPSYm",
"VSUBPDYrm",
"VSUBPSYrm")>;

View File

@ -2406,8 +2406,8 @@ def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr",
"VROUNDPSr",
"VROUNDSDr",
"VROUNDSSr",
"VROUNDYPDr",
"VROUNDYPSr")>;
"VROUNDPDYr",
"VROUNDPSYr")>;
def SKLWriteResGroup105_2 : SchedWriteRes<[SKLPort01]> {
let Latency = 10;
@ -3231,8 +3231,8 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm",
"VROUNDYPSm")>;
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
"VROUNDPSYm")>;
def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 17;

View File

@ -3876,8 +3876,8 @@ def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr",
"VROUNDPSr",
"VROUNDSDr",
"VROUNDSSr",
"VROUNDYPDr",
"VROUNDYPSr")>;
"VROUNDPDYr",
"VROUNDPSYr")>;
def SKXWriteResGroup116_2 : SchedWriteRes<[SKXPort015]> {
let Latency = 10;
@ -5555,8 +5555,8 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)",
"VRNDSCALEPDZrm(b?)i(k?)(z?)",
"VRNDSCALEPSZ256rm(b?)i(k?)(z?)",
"VRNDSCALEPSZrm(b?)i(k?)(z?)",
"VROUNDYPDm",
"VROUNDYPSm")>;
"VROUNDPDYm",
"VROUNDPSYm")>;
def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 17;

View File

@ -599,7 +599,7 @@ def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
}
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
VCVTPS2DQYrr, VCVTTPS2DQYrr,
VROUNDYPDr, VROUNDYPSr)>;
VROUNDPDYr, VROUNDPSYr)>;
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 8;
@ -607,7 +607,7 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
}
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm,
VROUNDYPDm, VROUNDYPSm)>;
VROUNDPDYm, VROUNDPSYm)>;
def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
let Latency = 2;

View File

@ -4132,8 +4132,8 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
;
; ZNVER1-LABEL: test_roundpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
@ -4196,8 +4196,8 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
;
; ZNVER1-LABEL: test_roundps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)

View File

@ -169,10 +169,10 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
{"VRNDSCALEPDZ128rmi", "VROUNDPDm", true},
{"VRNDSCALEPSZ128rri", "VROUNDPSr", true},
{"VRNDSCALEPSZ128rmi", "VROUNDPSm", true},
{"VRNDSCALEPDZ256rri", "VROUNDYPDr", false},
{"VRNDSCALEPDZ256rmi", "VROUNDYPDm", false},
{"VRNDSCALEPSZ256rri", "VROUNDYPSr", false},
{"VRNDSCALEPSZ256rmi", "VROUNDYPSm", false},
{"VRNDSCALEPDZ256rri", "VROUNDPDYr", false},
{"VRNDSCALEPDZ256rmi", "VROUNDPDYm", false},
{"VRNDSCALEPSZ256rri", "VROUNDPSYr", false},
{"VRNDSCALEPSZ256rmi", "VROUNDPSYm", false},
{"VRNDSCALESDr", "VROUNDSDr", true},
{"VRNDSCALESDm", "VROUNDSDm", true},
{"VRNDSCALESSr", "VROUNDSSr", true},