forked from OSchip/llvm-project
[x86] Allow folding unaligned memory operands into pcmp[ei]str*
instructions. These have special permission according to the x86 manual to read unaligned memory, and this folding is done by ICC and GCC as well. This corrects one of the issues identified in PR37246. llvm-svn: 330896
This commit is contained in:
parent
8cc8c0a87c
commit
eb631ef51e
|
@ -631,10 +631,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
|||
{ X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
|
||||
{ X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
|
||||
{ X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
|
||||
{ X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
|
||||
{ X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
|
||||
{ X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
|
||||
{ X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
|
||||
{ X86::PCMPESTRIrr, X86::PCMPESTRIrm, 0 },
|
||||
{ X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, 0 },
|
||||
{ X86::PCMPISTRIrr, X86::PCMPISTRIrm, 0 },
|
||||
{ X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, 0 },
|
||||
{ X86::PHMINPOSUWrr, X86::PHMINPOSUWrm, TB_ALIGN_16 },
|
||||
{ X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
|
||||
{ X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
|
||||
|
|
|
@ -437,7 +437,7 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
|
|||
; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; VCHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x62,0x00,0x07]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%1 = load <16 x i8>, <16 x i8>* %a1
|
||||
%1 = load <16 x i8>, <16 x i8>* %a1, align 1
|
||||
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
|
|
@ -123,19 +123,18 @@ define i1 @pcmpestri_mem_eq_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rh
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movdqu (%esi), %xmm0
|
||||
; X32-NEXT: movdqu (%ecx), %xmm1
|
||||
; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
|
||||
; X32-NEXT: setae %al
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpestri_mem_eq_i8:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movq %rdx, %r8
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rdx), %xmm1
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpestri $24, (%r8), %xmm0
|
||||
; X64-NEXT: setae %al
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -157,19 +156,18 @@ define i32 @pcmpestri_mem_idx_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movdqu (%esi), %xmm0
|
||||
; X32-NEXT: movdqu (%ecx), %xmm1
|
||||
; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpestri_mem_idx_i8:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movq %rdx, %r8
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rdx), %xmm1
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpestri $24, (%r8), %xmm0
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -380,19 +378,18 @@ define i1 @pcmpestri_mem_eq_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movdqu (%esi), %xmm0
|
||||
; X32-NEXT: movdqu (%ecx), %xmm1
|
||||
; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
|
||||
; X32-NEXT: setae %al
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpestri_mem_eq_i16:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movq %rdx, %r8
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rdx), %xmm1
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpestri $25, (%r8), %xmm0
|
||||
; X64-NEXT: setae %al
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -416,19 +413,18 @@ define i32 @pcmpestri_mem_idx_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i3
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movdqu (%esi), %xmm0
|
||||
; X32-NEXT: movdqu (%ecx), %xmm1
|
||||
; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpestri_mem_idx_i16:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movq %rdx, %r8
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rdx), %xmm1
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: movl %ecx, %edx
|
||||
; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpestri $25, (%r8), %xmm0
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -622,16 +618,14 @@ define i1 @pcmpistri_mem_eq_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind {
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movdqu (%ecx), %xmm0
|
||||
; X32-NEXT: movdqu (%eax), %xmm1
|
||||
; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpistri $24, (%eax), %xmm0
|
||||
; X32-NEXT: setae %al
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpistri_mem_eq_i8:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
|
||||
; X64-NEXT: setae %al
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -650,16 +644,14 @@ define i32 @pcmpistri_mem_idx_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind {
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movdqu (%ecx), %xmm0
|
||||
; X32-NEXT: movdqu (%eax), %xmm1
|
||||
; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpistri $24, (%eax), %xmm0
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpistri_mem_idx_i8:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -849,16 +841,14 @@ define i1 @pcmpistri_mem_eq_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind {
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movdqu (%ecx), %xmm0
|
||||
; X32-NEXT: movdqu (%eax), %xmm1
|
||||
; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpistri $25, (%eax), %xmm0
|
||||
; X32-NEXT: setae %al
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpistri_mem_eq_i16:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
|
||||
; X64-NEXT: setae %al
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
@ -879,16 +869,14 @@ define i32 @pcmpistri_mem_idx_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind {
|
|||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movdqu (%ecx), %xmm0
|
||||
; X32-NEXT: movdqu (%eax), %xmm1
|
||||
; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
|
||||
; X32-NEXT: pcmpistri $25, (%eax), %xmm0
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: pcmpistri_mem_idx_i16:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movdqu (%rdi), %xmm0
|
||||
; X64-NEXT: movdqu (%rsi), %xmm1
|
||||
; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
|
||||
; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue