forked from OSchip/llvm-project
99 lines
3.2 KiB
LLVM
99 lines
3.2 KiB
LLVM
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
|
;
|
|
; Test that only one vperm of the vector compare is needed for both extracts.
|
|
|
|
define void @fun() {
|
|
; CHECK-LABEL: fun:
|
|
; CHECK: vperm
|
|
; CHECK-NOT: vperm
|
|
bb:
|
|
%tmp = load <4 x i8>, <4 x i8>* undef
|
|
%tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
|
|
%tmp2 = extractelement <4 x i1> %tmp1, i32 0
|
|
br i1 %tmp2, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
unreachable
|
|
|
|
bb2:
|
|
%tmp3 = extractelement <4 x i1> %tmp1, i32 1
|
|
br i1 %tmp3, label %bb3, label %bb4
|
|
|
|
bb3:
|
|
unreachable
|
|
|
|
bb4:
|
|
unreachable
|
|
}
|
|
|
|
; Test that a zero index in the permute vector is used instead of VGBM, with
|
|
; a zero index into the other source operand.
|
|
define <4 x i8> @fun1(<2 x i8> %arg) {
|
|
; CHECK-LABEL:.LCPI1_0:
|
|
; CHECK-NEXT: .byte 1 # 0x1
|
|
; CHECK-NEXT: .byte 18 # 0x12
|
|
; CHECK-NEXT: .byte 0 # 0x0
|
|
; CHECK-NEXT: .byte 18 # 0x12
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .text
|
|
; CHECK-NEXT: .globl fun1
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .type fun1,@function
|
|
; CHECK-NEXT: fun1: # @fun1
|
|
; CHECK-NEXT: .cfi_startproc
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: larl %r1, .LCPI1_0
|
|
; CHECK-NEXT: vl %v0, 0(%r1), 3
|
|
; CHECK-NEXT: vperm %v24, %v24, %v0, %v0
|
|
; CHECK-NEXT: br %r14
|
|
%res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
|
|
<4 x i32> <i32 1, i32 2, i32 0, i32 3>
|
|
ret <4 x i8> %res
|
|
}
|
|
|
|
; Same, but with the first byte indexing into an element of the zero vector.
|
|
define <4 x i8> @fun2(<2 x i8> %arg) {
|
|
; CHECK-LABEL:.LCPI2_0:
|
|
; CHECK-NEXT: .byte 0 # 0x0
|
|
; CHECK-NEXT: .byte 17 # 0x11
|
|
; CHECK-NEXT: .byte 17 # 0x11
|
|
; CHECK-NEXT: .byte 0 # 0x0
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .space 1
|
|
; CHECK-NEXT: .text
|
|
; CHECK-NEXT: .globl fun2
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .type fun2,@function
|
|
; CHECK-NEXT:fun2: # @fun2
|
|
; CHECK-NEXT: .cfi_startproc
|
|
; CHECK-NEXT:# %bb.0:
|
|
; CHECK-NEXT: larl %r1, .LCPI2_0
|
|
; CHECK-NEXT: vl %v0, 0(%r1), 3
|
|
; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
|
|
; CHECK-NEXT: br %r14
|
|
%res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
|
|
<4 x i32> <i32 3, i32 1, i32 1, i32 2>
|
|
ret <4 x i8> %res
|
|
}
|