llvm-project/llvm/test/CodeGen/X86/recip-fastmath.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1413 lines
57 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
; we should generate the estimate sequence.
; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
; for details about the accuracy, speed, and implementation
; differences of x86 reciprocal estimates.
define float @f32_no_estimate(float %x) #0 {
; SSE-LABEL: f32_no_estimate:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: divss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX-LABEL: f32_no_estimate:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
define float @f32_one_step(float %x) #1 {
; SSE-LABEL: f32_one_step:
; SSE: # %bb.0:
; SSE-NEXT: rcpss %xmm0, %xmm2
; SSE-NEXT: mulss %xmm2, %xmm0
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: subss %xmm0, %xmm1
; SSE-NEXT: mulss %xmm2, %xmm1
; SSE-NEXT: addss %xmm2, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: f32_one_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-RECIP-NEXT: vsubss %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: f32_one_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: f32_one_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
define float @f32_one_step_variables(float %x, float %y) #1 {
; SSE-LABEL: f32_one_step_variables:
; SSE: # %bb.0:
; SSE-NEXT: rcpss %xmm1, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulss %xmm2, %xmm3
; SSE-NEXT: mulss %xmm3, %xmm1
; SSE-NEXT: subss %xmm1, %xmm0
; SSE-NEXT: mulss %xmm2, %xmm0
; SSE-NEXT: addss %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: f32_one_step_variables:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; AVX-RECIP-NEXT: vmulss %xmm2, %xmm0, %xmm3
; AVX-RECIP-NEXT: vmulss %xmm3, %xmm1, %xmm1
; AVX-RECIP-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vmulss %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vaddss %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: f32_one_step_variables:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; FMA-RECIP-NEXT: vmulss %xmm2, %xmm0, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; FMA-RECIP-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; FMA-RECIP-NEXT: vmovaps %xmm2, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step_variables:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; BDVER2-NEXT: vmulss %xmm2, %xmm0, %xmm3
; BDVER2-NEXT: vfnmaddss %xmm0, %xmm3, %xmm1, %xmm0
; BDVER2-NEXT: vfmaddss %xmm3, %xmm0, %xmm2, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_variables:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; BTVER2-NEXT: vmulss %xmm2, %xmm0, %xmm3
; BTVER2-NEXT: vmulss %xmm3, %xmm1, %xmm1
; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vmulss %xmm0, %xmm2, %xmm0
; BTVER2-NEXT: vaddss %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_variables:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; SANDY-NEXT: vmulss %xmm2, %xmm0, %xmm3
; SANDY-NEXT: vmulss %xmm3, %xmm1, %xmm1
; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vmulss %xmm0, %xmm2, %xmm0
; SANDY-NEXT: vaddss %xmm0, %xmm3, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_variables:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; HASWELL-NEXT: vmulss %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; HASWELL-NEXT: vmovaps %xmm2, %xmm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_variables:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm0, %xmm3
; HASWELL-NO-FMA-NEXT: vmulss %xmm3, %xmm1, %xmm1
; HASWELL-NO-FMA-NEXT: vsubss %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: f32_one_step_variables:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpss %xmm1, %xmm1, %xmm2
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%div = fdiv fast float %x, %y
ret float %div
}
define float @f32_two_step(float %x) #2 {
; SSE-LABEL: f32_two_step:
; SSE: # %bb.0:
; SSE-NEXT: rcpss %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulss %xmm2, %xmm3
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: subss %xmm3, %xmm4
; SSE-NEXT: mulss %xmm2, %xmm4
; SSE-NEXT: addss %xmm2, %xmm4
; SSE-NEXT: mulss %xmm4, %xmm0
; SSE-NEXT: subss %xmm0, %xmm1
; SSE-NEXT: mulss %xmm4, %xmm1
; SSE-NEXT: addss %xmm4, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: f32_two_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm2
; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX-RECIP-NEXT: vsubss %xmm2, %xmm3, %xmm2
; AVX-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm2
; AVX-RECIP-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vsubss %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_two_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3
; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_two_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_two_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; HASWELL-NEXT: vmovaps %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: f32_two_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512-NEXT: vmovaps %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; SSE-LABEL: v4f32_no_estimate:
; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: divps %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_no_estimate:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_no_estimate:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_no_estimate:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; SSE-LABEL: v4f32_one_step:
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm2, %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: subps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm2, %xmm1
; SSE-NEXT: addps %xmm2, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_one_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_one_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %xmm0, %xmm1
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %xmm0, %xmm1
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_one_step:
; KNL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; KNL-NEXT: vrcpps %xmm0, %xmm1
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step:
; SKX: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SKX-NEXT: vrcpps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
define <4 x float> @v4f32_one_step_variables(<4 x float> %x, <4 x float> %y) #1 {
; SSE-LABEL: v4f32_one_step_variables:
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm1, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
; SSE-NEXT: mulps %xmm3, %xmm1
; SSE-NEXT: subps %xmm1, %xmm0
; SSE-NEXT: mulps %xmm2, %xmm0
; SSE-NEXT: addps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_one_step_variables:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm1, %xmm2
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm0, %xmm3
; AVX-RECIP-NEXT: vmulps %xmm3, %xmm1, %xmm1
; AVX-RECIP-NEXT: vsubps %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm2, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_one_step_variables:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm1, %xmm2
; FMA-RECIP-NEXT: vmulps %xmm2, %xmm0, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; FMA-RECIP-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; FMA-RECIP-NEXT: vmovaps %xmm2, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step_variables:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm1, %xmm2
; BDVER2-NEXT: vmulps %xmm2, %xmm0, %xmm3
; BDVER2-NEXT: vfnmaddps %xmm0, %xmm3, %xmm1, %xmm0
; BDVER2-NEXT: vfmaddps %xmm3, %xmm0, %xmm2, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step_variables:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %xmm1, %xmm2
; BTVER2-NEXT: vmulps %xmm2, %xmm0, %xmm3
; BTVER2-NEXT: vmulps %xmm3, %xmm1, %xmm1
; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vmulps %xmm0, %xmm2, %xmm0
; BTVER2-NEXT: vaddps %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step_variables:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %xmm1, %xmm2
; SANDY-NEXT: vmulps %xmm2, %xmm0, %xmm3
; SANDY-NEXT: vmulps %xmm3, %xmm1, %xmm1
; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vmulps %xmm0, %xmm2, %xmm0
; SANDY-NEXT: vaddps %xmm0, %xmm3, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step_variables:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vrcpps %xmm1, %xmm2
; HASWELL-NEXT: vmulps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; HASWELL-NEXT: vmovaps %xmm2, %xmm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_variables:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm0, %xmm3
; HASWELL-NO-FMA-NEXT: vmulps %xmm3, %xmm1, %xmm1
; HASWELL-NO-FMA-NEXT: vsubps %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm2, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
; AVX512-LABEL: v4f32_one_step_variables:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpps %xmm1, %xmm2
; AVX512-NEXT: vmulps %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm3
; AVX512-NEXT: vmovaps %xmm2, %xmm0
; AVX512-NEXT: retq
%div = fdiv fast <4 x float> %x, %y
ret <4 x float> %div
}
define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SSE-LABEL: v4f32_two_step:
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: subps %xmm3, %xmm4
; SSE-NEXT: mulps %xmm2, %xmm4
; SSE-NEXT: addps %xmm2, %xmm4
; SSE-NEXT: mulps %xmm4, %xmm0
; SSE-NEXT: subps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm4, %xmm1
; SSE-NEXT: addps %xmm4, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v4f32_two_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm3, %xmm0
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v4f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_two_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %xmm0, %xmm1
; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3
; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %xmm0, %xmm1
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_two_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %xmm0, %xmm1
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v4f32_two_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpps %xmm0, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vmovaps %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; SSE-LABEL: v8f32_no_estimate:
; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: divps %xmm0, %xmm3
; SSE-NEXT: divps %xmm1, %xmm2
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v8f32_no_estimate:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v8f32_no_estimate:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_no_estimate:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; SSE-LABEL: v8f32_one_step:
; SSE: # %bb.0:
; SSE-NEXT: rcpps %xmm0, %xmm4
; SSE-NEXT: mulps %xmm4, %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: subps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm4, %xmm3
; SSE-NEXT: addps %xmm4, %xmm3
; SSE-NEXT: rcpps %xmm1, %xmm0
; SSE-NEXT: mulps %xmm0, %xmm1
; SSE-NEXT: subps %xmm1, %xmm2
; SSE-NEXT: mulps %xmm0, %xmm2
; SSE-NEXT: addps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v8f32_one_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v8f32_one_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_one_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %ymm0, %ymm1
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %ymm0, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_one_step:
; KNL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; KNL-NEXT: vrcpps %ymm0, %ymm1
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step:
; SKX: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SKX-NEXT: vrcpps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SSE-LABEL: v8f32_two_step:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: rcpps %xmm0, %xmm3
; SSE-NEXT: movaps %xmm0, %xmm4
; SSE-NEXT: mulps %xmm3, %xmm4
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm1, %xmm5
; SSE-NEXT: subps %xmm4, %xmm5
; SSE-NEXT: mulps %xmm3, %xmm5
; SSE-NEXT: addps %xmm3, %xmm5
; SSE-NEXT: mulps %xmm5, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm3
; SSE-NEXT: subps %xmm0, %xmm3
; SSE-NEXT: mulps %xmm5, %xmm3
; SSE-NEXT: addps %xmm5, %xmm3
; SSE-NEXT: rcpps %xmm2, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm4
; SSE-NEXT: mulps %xmm0, %xmm4
; SSE-NEXT: movaps %xmm1, %xmm5
; SSE-NEXT: subps %xmm4, %xmm5
; SSE-NEXT: mulps %xmm0, %xmm5
; SSE-NEXT: addps %xmm0, %xmm5
; SSE-NEXT: mulps %xmm5, %xmm2
; SSE-NEXT: subps %xmm2, %xmm1
; SSE-NEXT: mulps %xmm5, %xmm1
; SSE-NEXT: addps %xmm5, %xmm1
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v8f32_two_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v8f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_two_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %ymm0, %ymm1
; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3
; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %ymm0, %ymm1
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_two_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %ymm0, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v8f32_two_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcpps %ymm0, %ymm1
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vmovaps %ymm1, %ymm3
; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
; SSE-LABEL: v16f32_no_estimate:
; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm4, %xmm5
; SSE-NEXT: divps %xmm0, %xmm5
; SSE-NEXT: movaps %xmm4, %xmm6
; SSE-NEXT: divps %xmm1, %xmm6
; SSE-NEXT: movaps %xmm4, %xmm7
; SSE-NEXT: divps %xmm2, %xmm7
; SSE-NEXT: divps %xmm3, %xmm4
; SSE-NEXT: movaps %xmm5, %xmm0
; SSE-NEXT: movaps %xmm6, %xmm1
; SSE-NEXT: movaps %xmm7, %xmm2
; SSE-NEXT: movaps %xmm4, %xmm3
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v16f32_no_estimate:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v16f32_no_estimate:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_no_estimate:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_estimate:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_estimate:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0
; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vdivps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v16f32_no_estimate:
; AVX512: # %bb.0:
; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
; SSE-LABEL: v16f32_one_step:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm3, %xmm4
; SSE-NEXT: movaps %xmm0, %xmm5
; SSE-NEXT: rcpps %xmm0, %xmm6
; SSE-NEXT: mulps %xmm6, %xmm5
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: subps %xmm5, %xmm0
; SSE-NEXT: mulps %xmm6, %xmm0
; SSE-NEXT: addps %xmm6, %xmm0
; SSE-NEXT: rcpps %xmm1, %xmm6
; SSE-NEXT: mulps %xmm6, %xmm1
; SSE-NEXT: movaps %xmm3, %xmm5
; SSE-NEXT: subps %xmm1, %xmm5
; SSE-NEXT: mulps %xmm6, %xmm5
; SSE-NEXT: addps %xmm6, %xmm5
; SSE-NEXT: rcpps %xmm2, %xmm1
; SSE-NEXT: mulps %xmm1, %xmm2
; SSE-NEXT: movaps %xmm3, %xmm6
; SSE-NEXT: subps %xmm2, %xmm6
; SSE-NEXT: mulps %xmm1, %xmm6
; SSE-NEXT: addps %xmm1, %xmm6
; SSE-NEXT: rcpps %xmm4, %xmm1
; SSE-NEXT: mulps %xmm1, %xmm4
; SSE-NEXT: subps %xmm4, %xmm3
; SSE-NEXT: mulps %xmm1, %xmm3
; SSE-NEXT: addps %xmm1, %xmm3
; SSE-NEXT: movaps %xmm5, %xmm1
; SSE-NEXT: movaps %xmm6, %xmm2
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v16f32_one_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v16f32_one_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_one_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %ymm0, %ymm2
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vrcpps %ymm1, %ymm4
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %ymm0, %ymm2
; BTVER2-NEXT: vrcpps %ymm1, %ymm4
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1
; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %ymm0, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vrcpps %ymm1, %ymm4
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v16f32_one_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
; SSE-LABEL: v16f32_two_step:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm3, %xmm4
; SSE-NEXT: movaps %xmm1, %xmm5
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: rcpps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm6
; SSE-NEXT: mulps %xmm0, %xmm6
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SSE-NEXT: movaps %xmm3, %xmm7
; SSE-NEXT: subps %xmm6, %xmm7
; SSE-NEXT: mulps %xmm0, %xmm7
; SSE-NEXT: addps %xmm0, %xmm7
; SSE-NEXT: mulps %xmm7, %xmm1
; SSE-NEXT: movaps %xmm3, %xmm0
; SSE-NEXT: subps %xmm1, %xmm0
; SSE-NEXT: mulps %xmm7, %xmm0
; SSE-NEXT: addps %xmm7, %xmm0
; SSE-NEXT: rcpps %xmm5, %xmm1
; SSE-NEXT: movaps %xmm5, %xmm6
; SSE-NEXT: mulps %xmm1, %xmm6
; SSE-NEXT: movaps %xmm3, %xmm7
; SSE-NEXT: subps %xmm6, %xmm7
; SSE-NEXT: mulps %xmm1, %xmm7
; SSE-NEXT: addps %xmm1, %xmm7
; SSE-NEXT: mulps %xmm7, %xmm5
; SSE-NEXT: movaps %xmm3, %xmm1
; SSE-NEXT: subps %xmm5, %xmm1
; SSE-NEXT: mulps %xmm7, %xmm1
; SSE-NEXT: addps %xmm7, %xmm1
; SSE-NEXT: rcpps %xmm2, %xmm5
; SSE-NEXT: movaps %xmm2, %xmm6
; SSE-NEXT: mulps %xmm5, %xmm6
; SSE-NEXT: movaps %xmm3, %xmm7
; SSE-NEXT: subps %xmm6, %xmm7
; SSE-NEXT: mulps %xmm5, %xmm7
; SSE-NEXT: addps %xmm5, %xmm7
; SSE-NEXT: mulps %xmm7, %xmm2
; SSE-NEXT: movaps %xmm3, %xmm5
; SSE-NEXT: subps %xmm2, %xmm5
; SSE-NEXT: mulps %xmm7, %xmm5
; SSE-NEXT: addps %xmm7, %xmm5
; SSE-NEXT: rcpps %xmm4, %xmm2
; SSE-NEXT: movaps %xmm4, %xmm6
; SSE-NEXT: mulps %xmm2, %xmm6
; SSE-NEXT: movaps %xmm3, %xmm7
; SSE-NEXT: subps %xmm6, %xmm7
; SSE-NEXT: mulps %xmm2, %xmm7
; SSE-NEXT: addps %xmm2, %xmm7
; SSE-NEXT: mulps %xmm7, %xmm4
; SSE-NEXT: subps %xmm4, %xmm3
; SSE-NEXT: mulps %xmm7, %xmm3
; SSE-NEXT: addps %xmm7, %xmm3
; SSE-NEXT: movaps %xmm5, %xmm2
; SSE-NEXT: retq
;
; AVX-RECIP-LABEL: v16f32_two_step:
; AVX-RECIP: # %bb.0:
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm3
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm4, %ymm0
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
; AVX-RECIP-NEXT: vsubps %ymm1, %ymm4, %ymm1
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
; AVX-RECIP-NEXT: retq
;
; FMA-RECIP-LABEL: v16f32_two_step:
; FMA-RECIP: # %bb.0:
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_two_step:
; BDVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BDVER2-NEXT: vrcpps %ymm0, %ymm2
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
; BDVER2-NEXT: vrcpps %ymm1, %ymm2
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; BTVER2-NEXT: vrcpps %ymm0, %ymm2
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0
; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
; BTVER2-NEXT: vrcpps %ymm1, %ymm2
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1
; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_two_step:
; SANDY: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; SANDY-NEXT: vrcpps %ymm0, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3
; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_two_step:
; HASWELL: # %bb.0:
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; HASWELL-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NEXT: vmovaps %ymm2, %ymm4
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
; HASWELL-NEXT: vrcpps %ymm1, %ymm2
; HASWELL-NEXT: vmovaps %ymm2, %ymm4
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3
; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 llvm-svn: 353043
2019-02-04 20:51:26 +08:00
; AVX512-LABEL: v16f32_two_step:
; AVX512: # %bb.0:
; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX512-NEXT: vmovaps %zmm1, %zmm3
; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1
; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2
; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3
; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" }
attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" }
attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" }