[SLP][Test] Fix and precommit tests for D98714

This commit is contained in:
Anton Afanasyev 2021-03-19 11:09:58 +03:00
parent 44a4000181
commit 00a0595b25
4 changed files with 173 additions and 88 deletions

View File

@ -2,34 +2,55 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @bswap_v2i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX8: call <2 x i16> @llvm.bswap.v2i16(
define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
; GFX7-LABEL: @bswap_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[T:%.*]] = extractelement <2 x i16> [[ARG:%.*]], i64 0
; GFX7-NEXT: [[T1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[T]])
; GFX7-NEXT: [[T2:%.*]] = insertelement <2 x i16> poison, i16 [[T1]], i64 0
; GFX7-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[ARG]], i64 1
; GFX7-NEXT: [[T4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[T3]])
; GFX7-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[T4]], i64 1
; GFX7-NEXT: ret <2 x i16> [[T5]]
;
; GFX8-LABEL: @bswap_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x i16> [[T5]]
;
bb:
%tmp = extractelement <2 x i16> %arg, i64 0
%tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
%tmp2 = insertelement <2 x i16> poison, i16 %tmp1, i64 0
%tmp3 = extractelement <2 x i16> %arg, i64 1
%tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
%tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
ret <2 x i16> %tmp5
%t = extractelement <2 x i16> %arg, i64 0
%t1 = tail call i16 @llvm.bswap.i16(i16 %t)
%t2 = insertelement <2 x i16> poison, i16 %t1, i64 0
%t3 = extractelement <2 x i16> %arg, i64 1
%t4 = tail call i16 @llvm.bswap.i16(i16 %t3)
%t5 = insertelement <2 x i16> %t2, i16 %t4, i64 1
ret <2 x i16> %t5
}
; GCN-LABEL: @bswap_v2i32(
; GCN: call i32 @llvm.bswap.i32
; GCN: call i32 @llvm.bswap.i32
define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
; GCN-LABEL: @bswap_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[T:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0
; GCN-NEXT: [[T1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T]])
; GCN-NEXT: [[T2:%.*]] = insertelement <2 x i32> poison, i32 [[T1]], i64 0
; GCN-NEXT: [[T3:%.*]] = extractelement <2 x i32> [[ARG]], i64 1
; GCN-NEXT: [[T4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T3]])
; GCN-NEXT: [[T5:%.*]] = insertelement <2 x i32> [[T2]], i32 [[T4]], i64 1
; GCN-NEXT: ret <2 x i32> [[T5]]
;
bb:
%tmp = extractelement <2 x i32> %arg, i64 0
%tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
%tmp2 = insertelement <2 x i32> poison, i32 %tmp1, i64 0
%tmp3 = extractelement <2 x i32> %arg, i64 1
%tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
%tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
ret <2 x i32> %tmp5
%t = extractelement <2 x i32> %arg, i64 0
%t1 = tail call i32 @llvm.bswap.i32(i32 %t)
%t2 = insertelement <2 x i32> poison, i32 %t1, i64 0
%t3 = extractelement <2 x i32> %arg, i64 1
%t4 = tail call i32 @llvm.bswap.i32(i32 %t3)
%t5 = insertelement <2 x i32> %t2, i32 %t4, i64 1
ret <2 x i32> %t5
}
declare i16 @llvm.bswap.i16(i16) #0

View File

@ -2,34 +2,55 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @bswap_v2i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX8: call <2 x i16> @llvm.bswap.v2i16(
define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
; GFX7-LABEL: @bswap_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[T:%.*]] = extractelement <2 x i16> [[ARG:%.*]], i64 0
; GFX7-NEXT: [[T1:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[T]])
; GFX7-NEXT: [[T2:%.*]] = insertelement <2 x i16> undef, i16 [[T1]], i64 0
; GFX7-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[ARG]], i64 1
; GFX7-NEXT: [[T4:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[T3]])
; GFX7-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[T4]], i64 1
; GFX7-NEXT: ret <2 x i16> [[T5]]
;
; GFX8-LABEL: @bswap_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x i16> undef, i16 [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x i16> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x i16> [[T2]], i16 [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x i16> [[T5]]
;
bb:
%tmp = extractelement <2 x i16> %arg, i64 0
%tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
%tmp2 = insertelement <2 x i16> undef, i16 %tmp1, i64 0
%tmp3 = extractelement <2 x i16> %arg, i64 1
%tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
%tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
ret <2 x i16> %tmp5
%t = extractelement <2 x i16> %arg, i64 0
%t1 = tail call i16 @llvm.bswap.i16(i16 %t)
%t2 = insertelement <2 x i16> undef, i16 %t1, i64 0
%t3 = extractelement <2 x i16> %arg, i64 1
%t4 = tail call i16 @llvm.bswap.i16(i16 %t3)
%t5 = insertelement <2 x i16> %t2, i16 %t4, i64 1
ret <2 x i16> %t5
}
; GCN-LABEL: @bswap_v2i32(
; GCN: call i32 @llvm.bswap.i32
; GCN: call i32 @llvm.bswap.i32
define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
; GCN-LABEL: @bswap_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[T:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0
; GCN-NEXT: [[T1:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T]])
; GCN-NEXT: [[T2:%.*]] = insertelement <2 x i32> undef, i32 [[T1]], i64 0
; GCN-NEXT: [[T3:%.*]] = extractelement <2 x i32> [[ARG]], i64 1
; GCN-NEXT: [[T4:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T3]])
; GCN-NEXT: [[T5:%.*]] = insertelement <2 x i32> [[T2]], i32 [[T4]], i64 1
; GCN-NEXT: ret <2 x i32> [[T5]]
;
bb:
%tmp = extractelement <2 x i32> %arg, i64 0
%tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i64 0
%tmp3 = extractelement <2 x i32> %arg, i64 1
%tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
%tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
ret <2 x i32> %tmp5
%t = extractelement <2 x i32> %arg, i64 0
%t1 = tail call i32 @llvm.bswap.i32(i32 %t)
%t2 = insertelement <2 x i32> undef, i32 %t1, i64 0
%t3 = extractelement <2 x i32> %arg, i64 1
%t4 = tail call i32 @llvm.bswap.i32(i32 %t3)
%t5 = insertelement <2 x i32> %t2, i32 %t4, i64 1
ret <2 x i32> %t5
}
declare i16 @llvm.bswap.i16(i16) #0

View File

@ -2,34 +2,55 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @round_v2f16(
; GFX7: call half @llvm.round.f16(
; GFX7: call half @llvm.round.f16(
; GFX8: call <2 x half> @llvm.round.v2f16(
define <2 x half> @round_v2f16(<2 x half> %arg) {
; GFX7-LABEL: @round_v2f16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[T:%.*]] = extractelement <2 x half> [[ARG:%.*]], i64 0
; GFX7-NEXT: [[T1:%.*]] = tail call half @llvm.round.f16(half [[T]])
; GFX7-NEXT: [[T2:%.*]] = insertelement <2 x half> poison, half [[T1]], i64 0
; GFX7-NEXT: [[T3:%.*]] = extractelement <2 x half> [[ARG]], i64 1
; GFX7-NEXT: [[T4:%.*]] = tail call half @llvm.round.f16(half [[T3]])
; GFX7-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[T4]], i64 1
; GFX7-NEXT: ret <2 x half> [[T5]]
;
; GFX8-LABEL: @round_v2f16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x half> @llvm.round.v2f16(<2 x half> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x half> poison, half [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x half> [[T5]]
;
bb:
%tmp = extractelement <2 x half> %arg, i64 0
%tmp1 = tail call half @llvm.round.half(half %tmp)
%tmp2 = insertelement <2 x half> poison, half %tmp1, i64 0
%tmp3 = extractelement <2 x half> %arg, i64 1
%tmp4 = tail call half @llvm.round.half(half %tmp3)
%tmp5 = insertelement <2 x half> %tmp2, half %tmp4, i64 1
ret <2 x half> %tmp5
%t = extractelement <2 x half> %arg, i64 0
%t1 = tail call half @llvm.round.half(half %t)
%t2 = insertelement <2 x half> poison, half %t1, i64 0
%t3 = extractelement <2 x half> %arg, i64 1
%t4 = tail call half @llvm.round.half(half %t3)
%t5 = insertelement <2 x half> %t2, half %t4, i64 1
ret <2 x half> %t5
}
; GCN-LABEL: @round_v2f32(
; GCN: call float @llvm.round.f32(
; GCN: call float @llvm.round.f32(
define <2 x float> @round_v2f32(<2 x float> %arg) {
; GCN-LABEL: @round_v2f32(
; GCN-NEXT: bb:
; GCN-NEXT: [[T:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0
; GCN-NEXT: [[T1:%.*]] = tail call float @llvm.round.f32(float [[T]])
; GCN-NEXT: [[T2:%.*]] = insertelement <2 x float> poison, float [[T1]], i64 0
; GCN-NEXT: [[T3:%.*]] = extractelement <2 x float> [[ARG]], i64 1
; GCN-NEXT: [[T4:%.*]] = tail call float @llvm.round.f32(float [[T3]])
; GCN-NEXT: [[T5:%.*]] = insertelement <2 x float> [[T2]], float [[T4]], i64 1
; GCN-NEXT: ret <2 x float> [[T5]]
;
bb:
%tmp = extractelement <2 x float> %arg, i64 0
%tmp1 = tail call float @llvm.round.f32(float %tmp)
%tmp2 = insertelement <2 x float> poison, float %tmp1, i64 0
%tmp3 = extractelement <2 x float> %arg, i64 1
%tmp4 = tail call float @llvm.round.f32(float %tmp3)
%tmp5 = insertelement <2 x float> %tmp2, float %tmp4, i64 1
ret <2 x float> %tmp5
%t = extractelement <2 x float> %arg, i64 0
%t1 = tail call float @llvm.round.f32(float %t)
%t2 = insertelement <2 x float> poison, float %t1, i64 0
%t3 = extractelement <2 x float> %arg, i64 1
%t4 = tail call float @llvm.round.f32(float %t3)
%t5 = insertelement <2 x float> %t2, float %t4, i64 1
ret <2 x float> %t5
}
declare half @llvm.round.half(half) #0

View File

@ -2,34 +2,56 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @round_v2f16(
; GFX7: call half @llvm.round.f16(
; GFX7: call half @llvm.round.f16(
; GFX8: call <2 x half> @llvm.round.v2f16(
define <2 x half> @round_v2f16(<2 x half> %arg) {
; GFX7-LABEL: @round_v2f16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[T:%.*]] = extractelement <2 x half> [[ARG:%.*]], i64 0
; GFX7-NEXT: [[T1:%.*]] = tail call half @llvm.round.f16(half [[T]])
; GFX7-NEXT: [[T2:%.*]] = insertelement <2 x half> undef, half [[T1]], i64 0
; GFX7-NEXT: [[T3:%.*]] = extractelement <2 x half> [[ARG]], i64 1
; GFX7-NEXT: [[T4:%.*]] = tail call half @llvm.round.f16(half [[T3]])
; GFX7-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[T4]], i64 1
; GFX7-NEXT: ret <2 x half> [[T5]]
;
; GFX8-LABEL: @round_v2f16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x half> @llvm.round.v2f16(<2 x half> [[ARG:%.*]])
; GFX8-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[TMP0]], i32 0
; GFX8-NEXT: [[T2:%.*]] = insertelement <2 x half> undef, half [[TMP1]], i64 0
; GFX8-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[TMP0]], i32 1
; GFX8-NEXT: [[T5:%.*]] = insertelement <2 x half> [[T2]], half [[TMP2]], i64 1
; GFX8-NEXT: ret <2 x half> [[T5]]
;
bb:
%tmp = extractelement <2 x half> %arg, i64 0
%tmp1 = tail call half @llvm.round.half(half %tmp)
%tmp2 = insertelement <2 x half> undef, half %tmp1, i64 0
%tmp3 = extractelement <2 x half> %arg, i64 1
%tmp4 = tail call half @llvm.round.half(half %tmp3)
%tmp5 = insertelement <2 x half> %tmp2, half %tmp4, i64 1
ret <2 x half> %tmp5
%t = extractelement <2 x half> %arg, i64 0
%t1 = tail call half @llvm.round.half(half %t)
%t2 = insertelement <2 x half> undef, half %t1, i64 0
%t3 = extractelement <2 x half> %arg, i64 1
%t4 = tail call half @llvm.round.half(half %t3)
%t5 = insertelement <2 x half> %t2, half %t4, i64 1
ret <2 x half> %t5
}
; GCN-LABEL: @round_v2f32(
; GCN: call float @llvm.round.f32(
; GCN: call float @llvm.round.f32(
define <2 x float> @round_v2f32(<2 x float> %arg) {
; GCN-LABEL: @round_v2f32(
; GCN-NEXT: bb:
; GCN-NEXT: [[T:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0
; GCN-NEXT: [[T1:%.*]] = tail call float @llvm.round.f32(float [[T]])
; GCN-NEXT: [[T2:%.*]] = insertelement <2 x float> undef, float [[T1]], i64 0
; GCN-NEXT: [[T3:%.*]] = extractelement <2 x float> [[ARG]], i64 1
; GCN-NEXT: [[T4:%.*]] = tail call float @llvm.round.f32(float [[T3]])
; GCN-NEXT: [[T5:%.*]] = insertelement <2 x float> [[T2]], float [[T4]], i64 1
; GCN-NEXT: ret <2 x float> [[T5]]
;
bb:
%tmp = extractelement <2 x float> %arg, i64 0
%tmp1 = tail call float @llvm.round.f32(float %tmp)
%tmp2 = insertelement <2 x float> undef, float %tmp1, i64 0
%tmp3 = extractelement <2 x float> %arg, i64 1
%tmp4 = tail call float @llvm.round.f32(float %tmp3)
%tmp5 = insertelement <2 x float> %tmp2, float %tmp4, i64 1
ret <2 x float> %tmp5
%t = extractelement <2 x float> %arg, i64 0
%t1 = tail call float @llvm.round.f32(float %t)
%t2 = insertelement <2 x float> undef, float %t1, i64 0
%t3 = extractelement <2 x float> %arg, i64 1
%t4 = tail call float @llvm.round.f32(float %t3)
%t5 = insertelement <2 x float> %t2, float %t4, i64 1
ret <2 x float> %t5
}
declare half @llvm.round.half(half) #0