diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index f6453a21392c..f4c4b0aa9c79 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -21,10 +21,6 @@ def BroadwellModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size and benchmarking data. let LoopMicroOpBufferSize = 50; - - // This flag is set to allow the scheduler to assign a default model to - // unrecognized opcodes. - let CompleteModel = 0; } let SchedModel = BroadwellModel in { @@ -120,6 +116,9 @@ def : WriteRes; // These can often bypass execution ports completely. def : WriteRes; +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. defm : BWWriteResPair; diff --git a/llvm/test/CodeGen/X86/aes-schedule.ll b/llvm/test/CodeGen/X86/aes-schedule.ll index e4768eafeaa5..be3b049b9d54 100644 --- a/llvm/test/CodeGen/X86/aes-schedule.ll +++ b/llvm/test/CodeGen/X86/aes-schedule.ll @@ -261,8 +261,8 @@ define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) { ; ; BROADWELL-LABEL: test_aesimc: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] ; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [19:2.00] +; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index a31109cf0071..6f63af9c25bc 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1069,8 +1069,8 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00] +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4035,8 +4035,8 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_rcpps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4099,8 +4099,8 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00] +; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4163,8 +4163,8 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00] +; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4227,8 +4227,8 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_rsqrtps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4408,8 +4408,8 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [40:2.00] ; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [34:2.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [40:2.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -4472,8 +4472,8 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_sqrtps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:2.00] ; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index d8dc3bb84f5f..df6b1918c6ab 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -1761,8 +1761,8 @@ define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { ; ; BROADWELL-LABEL: test_pbroadcastb: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1810,8 +1810,8 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { ; ; BROADWELL-LABEL: test_pbroadcastb_ymm: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] +; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -2051,8 +2051,8 @@ define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { ; ; BROADWELL-LABEL: test_pbroadcastw: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -2100,8 +2100,8 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { ; ; BROADWELL-LABEL: test_pbroadcastw_ymm: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] +; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 580723b1c409..42159fea8f56 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -54,10 +54,10 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; ; BROADWELL-LABEL: test_cvtpd2pi: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [9:1.00] -; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [9:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2pi: @@ -138,9 +138,9 @@ define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { ; ; BROADWELL-LABEL: test_cvtpi2pd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00] +; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpi2pd: @@ -388,10 +388,10 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; ; BROADWELL-LABEL: test_cvttpd2pi: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [9:1.00] -; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] +; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [9:1.00] +; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] +; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttpd2pi: diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 7b04f663b0a2..6072e927cde1 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -784,8 +784,8 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; ; BROADWELL-LABEL: test_cvtss2si: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -864,8 +864,8 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; BROADWELL-LABEL: test_cvtss2siq: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -944,8 +944,8 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; BROADWELL-LABEL: test_cvttss2si: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1021,8 +1021,8 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; ; BROADWELL-LABEL: test_cvttss2siq: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 092445ec49fe..32c44c67c0a3 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -705,8 +705,8 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1179,8 +1179,8 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; ; BROADWELL-LABEL: test_cvtsd2si: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1259,8 +1259,8 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; ; BROADWELL-LABEL: test_cvtsd2siq: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1829,8 +1829,8 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; ; BROADWELL-LABEL: test_cvttsd2si: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -1906,8 +1906,8 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; ; BROADWELL-LABEL: test_cvttsd2siq: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index c02eae3eaee5..a2073f7ffb02 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -2992,8 +2992,8 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -3064,8 +3064,8 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; @@ -3137,9 +3137,9 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; BROADWELL-LABEL: test_roundsd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] +; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundsd: @@ -3210,9 +3210,9 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; BROADWELL-LABEL: test_roundss: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] +; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundss: