[SLPVectorizer][X86] Add avx512 sitofp/uitofp tests

llvm-svn: 283751
This commit is contained in:
Simon Pilgrim 2016-10-10 14:14:31 +00:00
parent e9abe64816
commit 2c0733c678
2 changed files with 147 additions and 56 deletions

View File

@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@ -18,14 +20,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
define void @uitofp_2i64_2f64() #0 {
; CHECK-LABEL: @uitofp_2i64_2f64(
; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @uitofp_2i64_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX256-LABEL: @uitofp_2i64_2f64(
; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; AVX256-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @uitofp_2i64_2f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
; AVX512-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@ -37,20 +54,41 @@ define void @uitofp_2i64_2f64() #0 {
}
define void @uitofp_4i64_4f64() #0 {
; CHECK-LABEL: @uitofp_4i64_4f64(
; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; CHECK-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
; CHECK-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @uitofp_4i64_4f64(
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
; SSE-NEXT: ret void
;
; AVX256-LABEL: @uitofp_4i64_4f64(
; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double
; AVX256-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double
; AVX256-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double
; AVX256-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double
; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; AVX256-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16
; AVX256-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @uitofp_4i64_4f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double>
; AVX512-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64
; AVX512-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
@ -68,14 +106,29 @@ define void @uitofp_4i64_4f64() #0 {
}
define void @uitofp_2i32_2f64() #0 {
; CHECK-LABEL: @uitofp_2i32_2f64(
; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; CHECK-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @uitofp_2i32_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX256-LABEL: @uitofp_2i32_2f64(
; AVX256-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
; AVX256-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
; AVX256-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double
; AVX256-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double
; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @uitofp_2i32_2f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double>
; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
; AVX512-NEXT: ret void
;
%ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64
%ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4
@ -180,14 +233,29 @@ define void @uitofp_4i16_4f64() #0 {
}
define void @uitofp_2i8_2f64() #0 {
; CHECK-LABEL: @uitofp_2i8_2f64(
; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
; CHECK-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
; CHECK-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; CHECK-NEXT: ret void
; SSE-LABEL: @uitofp_2i8_2f64(
; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; SSE-NEXT: ret void
;
; AVX256-LABEL: @uitofp_2i8_2f64(
; AVX256-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
; AVX256-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
; AVX256-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
; AVX256-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @uitofp_2i8_2f64(
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
; AVX512-NEXT: ret void
;
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
@ -259,20 +327,41 @@ define void @uitofp_2i64_2f32() #0 {
}
define void @uitofp_4i64_4f32() #0 {
; CHECK-LABEL: @uitofp_4i64_4f32(
; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
; CHECK-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
; CHECK-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; CHECK-NEXT: ret void
; SSE-LABEL: @uitofp_4i64_4f32(
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; SSE-NEXT: ret void
;
; AVX256-LABEL: @uitofp_4i64_4f32(
; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
; AVX256-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16
; AVX256-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8
; AVX256-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float
; AVX256-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float
; AVX256-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float
; AVX256-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float
; AVX256-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64
; AVX256-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
; AVX256-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8
; AVX256-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
; AVX256-NEXT: ret void
;
; AVX512-LABEL: @uitofp_4i64_4f32(
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64
; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float>
; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64
; AVX512-NEXT: ret void
;
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8