forked from OSchip/llvm-project
[X86] Loosen memory folding requirements for cvtdq2pd and cvtps2pd instructions.
According to spec cvtdq2pd and cvtps2pd instructions don't require memory operand to be aligned to 16 bytes. This patch removes this requirement from the memory folding table. Differential Revision: https://reviews.llvm.org/D23919 llvm-svn: 280402
This commit is contained in:
parent
8e5b54021e
commit
cde38b6a99
|
@ -477,12 +477,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
|
||||
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
|
||||
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
|
||||
{ X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_ALIGN_16 },
|
||||
{ X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, 0 },
|
||||
{ X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_ALIGN_16 },
|
||||
{ X86::CVTPS2PDrr, X86::CVTPS2PDrm, 0 },
|
||||
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
|
||||
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
|
||||
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-pc-linux -mattr=+sse4.2 < %s | FileCheck %s --check-prefix=X86-64
|
||||
; RUN: llc -mtriple=i386-pc-linux -mattr=+sse4.2 < %s | FileCheck %s --check-prefix=I386
|
||||
|
||||
; Check that unaligned loads merge with cvtdq2pd and cvtps2pd.
|
||||
|
||||
define <2 x double> @peephole_cvtps2pd(<4 x float>* %a0) {
|
||||
; X86-64-LABEL: peephole_cvtps2pd:
|
||||
; X86-64: # BB#0:
|
||||
; X86-64-NEXT: cvtps2pd (%rdi), %xmm0
|
||||
; X86-64-NEXT: retq
|
||||
;
|
||||
; I386-LABEL: peephole_cvtps2pd:
|
||||
; I386: # BB#0:
|
||||
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; I386-NEXT: cvtps2pd (%eax), %xmm0
|
||||
; I386-NEXT: retl
|
||||
%1 = load <4 x float>, <4 x float>* %a0, align 1
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%3 = fpext <2 x float> %2 to <2 x double>
|
||||
ret <2 x double> %3
|
||||
}
|
||||
|
||||
define <2 x double> @peephole_cvtdq2pd(<4 x i32>* %a0) {
|
||||
; X86-64-LABEL: peephole_cvtdq2pd:
|
||||
; X86-64: # BB#0:
|
||||
; X86-64-NEXT: cvtdq2pd (%rdi), %xmm0
|
||||
; X86-64-NEXT: retq
|
||||
;
|
||||
; I386-LABEL: peephole_cvtdq2pd:
|
||||
; I386: # BB#0:
|
||||
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; I386-NEXT: cvtdq2pd (%eax), %xmm0
|
||||
; I386-NEXT: retl
|
||||
%1 = load <4 x i32>, <4 x i32>* %a0, align 1
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%3 = sitofp <2 x i32> %2 to <2 x double>
|
||||
ret <2 x double> %3
|
||||
}
|
Loading…
Reference in New Issue