forked from OSchip/llvm-project
[Hexagon] Replace incorrect pattern for vpackl HWI32 -> HVi8
V6_vdealb4w is not correct for pairs, use V6_vpackeh/V6_vpackeb instead.
This commit is contained in:
parent
ba12e77ec1
commit
5f4abb7fab
|
@ -417,7 +417,8 @@ let Predicates = [UseHVX] in {
|
|||
def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
|
||||
def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
|
||||
def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
|
||||
def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>;
|
||||
def: Pat<(VecI8 (vpackl HWI32:$Vs)),
|
||||
(V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
|
||||
def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
|
||||
|
||||
def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
|
||||
|
|
|
@ -2,10 +2,11 @@
|
|||
|
||||
; This has a v32i8 = truncate v16i32 (64b mode), which was legalized to
|
||||
; 64i8 = vpackl v32i32, for which there were no selection patterns provided.
|
||||
; Check that we generate vdeale for this.
|
||||
; Check that we generate vpackeh->vpackeb for this.
|
||||
|
||||
; CHECK-LABEL: fred:
|
||||
; CHECK: vdeale(v1.b,v0.b)
|
||||
; CHECK: v[[V0:[0-9]+]].h = vpacke(v1.w,v0.w)
|
||||
; CHECK: = vpacke({{.*}},v[[V0]].h)
|
||||
define void @fred(<32 x i8>* %a0, <32 x i32> %a1) #0 {
|
||||
%v0 = trunc <32 x i32> %a1 to <32 x i8>
|
||||
store <32 x i8> %v0, <32 x i8>* %a0, align 32
|
||||
|
|
|
@ -49,8 +49,9 @@ define void @f2(<64 x i16>* %a0, <64 x i8>* %a1) #0 {
|
|||
; CHECK-DAG: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK-DAG: v[[V1:[0-9]+]] = vmem(r0+#1)
|
||||
; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
|
||||
; CHECK: v[[V2:[0-9]+]].b = vdeale(v[[V1]].b,v[[V0]].b)
|
||||
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
|
||||
; CHECK: v[[V2:[0-9]+]].h = vpacke(v[[V1]].w,v[[V0]].w)
|
||||
; CHECK: v[[V3:[0-9]+]].b = vpacke({{.*}},v[[V2]].h)
|
||||
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V3]]
|
||||
define void @f3(<64 x i32>* %a0, <64 x i8>* %a1) #0 {
|
||||
%v0 = load <64 x i32>, <64 x i32>* %a0, align 128
|
||||
%v1 = trunc <64 x i32> %v0 to <64 x i8>
|
||||
|
|
Loading…
Reference in New Issue