forked from OSchip/llvm-project
[AArch64][x86] add tests for trunc disguised as vector ops (PR39016); NFC
These correspond to the IR transform from: D52439 llvm-svn: 344353
This commit is contained in:
parent
0a3bb81974
commit
f5b1892348
|
@ -0,0 +1,118 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64_be-- < %s | FileCheck %s --check-prefix=BE
|
||||
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s --check-prefix=LE
|
||||
|
||||
define i32 @trunc_i64_to_i32_le(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i32_le:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; BE-NEXT: fmov w0, s0
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i32_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: fmov w0, s0
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <4 x i32>
|
||||
%ext = extractelement <4 x i32> %bc, i32 0
|
||||
ret i32 %ext
|
||||
}
|
||||
|
||||
define i32 @trunc_i64_to_i32_be(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i32_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.4s, v0.4s
|
||||
; BE-NEXT: mov w0, v0.s[1]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i32_be:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: mov w0, v0.s[1]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <4 x i32>
|
||||
%ext = extractelement <4 x i32> %bc, i32 1
|
||||
ret i32 %ext
|
||||
}
|
||||
|
||||
define i16 @trunc_i64_to_i16_le(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i16_le:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.8h, v0.8h
|
||||
; BE-NEXT: umov w0, v0.h[0]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i16_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: umov w0, v0.h[0]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
%ext = extractelement <8 x i16> %bc, i32 0
|
||||
ret i16 %ext
|
||||
}
|
||||
|
||||
define i16 @trunc_i64_to_i16_be(i64 %x) {
|
||||
; BE-LABEL: trunc_i64_to_i16_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov d0, x0
|
||||
; BE-NEXT: rev64 v0.8h, v0.8h
|
||||
; BE-NEXT: umov w0, v0.h[3]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i64_to_i16_be:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov d0, x0
|
||||
; LE-NEXT: umov w0, v0.h[3]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
%ext = extractelement <8 x i16> %bc, i32 3
|
||||
ret i16 %ext
|
||||
}
|
||||
|
||||
define i8 @trunc_i32_to_i8_le(i32 %x) {
|
||||
; BE-LABEL: trunc_i32_to_i8_le:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov s0, w0
|
||||
; BE-NEXT: rev32 v0.16b, v0.16b
|
||||
; BE-NEXT: umov w0, v0.b[0]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i32_to_i8_le:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov s0, w0
|
||||
; LE-NEXT: umov w0, v0.b[0]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%bc = bitcast <4 x i32> %ins to <16 x i8>
|
||||
%ext = extractelement <16 x i8> %bc, i32 0
|
||||
ret i8 %ext
|
||||
}
|
||||
|
||||
define i8 @trunc_i32_to_i8_be(i32 %x) {
|
||||
; BE-LABEL: trunc_i32_to_i8_be:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: fmov s0, w0
|
||||
; BE-NEXT: rev32 v0.16b, v0.16b
|
||||
; BE-NEXT: umov w0, v0.b[3]
|
||||
; BE-NEXT: ret
|
||||
;
|
||||
; LE-LABEL: trunc_i32_to_i8_be:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: fmov s0, w0
|
||||
; LE-NEXT: umov w0, v0.b[3]
|
||||
; LE-NEXT: ret
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%bc = bitcast <4 x i32> %ins to <16 x i8>
|
||||
%ext = extractelement <16 x i8> %bc, i32 3
|
||||
ret i8 %ext
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ define i8 @extractelt_bitcast(i32 %x) nounwind {
|
|||
}
|
||||
|
||||
; TODO: This should have folded to avoid vector ops, but the transform
|
||||
; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU
|
||||
; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU
|
||||
; codegen better.
|
||||
|
||||
define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
|
||||
|
@ -60,3 +60,56 @@ define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
|
|||
ret i8 %ext
|
||||
}
|
||||
|
||||
define i32 @trunc_i64_to_i32_le(i64 %x) {
|
||||
; X86-LABEL: trunc_i64_to_i32_le:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc_i64_to_i32_le:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: retq
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <4 x i32>
|
||||
%ext = extractelement <4 x i32> %bc, i32 0
|
||||
ret i32 %ext
|
||||
}
|
||||
|
||||
define i16 @trunc_i64_to_i16_le(i64 %x) {
|
||||
; X86-LABEL: trunc_i64_to_i16_le:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc_i64_to_i16_le:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %xmm0
|
||||
; X64-NEXT: movd %xmm0, %eax
|
||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NEXT: retq
|
||||
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
%ext = extractelement <8 x i16> %bc, i32 0
|
||||
ret i16 %ext
|
||||
}
|
||||
|
||||
define i8 @trunc_i32_to_i8_le(i32 %x) {
|
||||
; X86-LABEL: trunc_i32_to_i8_le:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: trunc_i32_to_i8_le:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||
%bc = bitcast <4 x i32> %ins to <16 x i8>
|
||||
%ext = extractelement <16 x i8> %bc, i32 0
|
||||
ret i8 %ext
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue