diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir index 5e577d778210..62c5ee2b2c47 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir @@ -5,7 +5,7 @@ // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \ // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \ -// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // Activate to dump assembly // R_UN: -dump-object-file -object-filename=/tmp/a.o \ diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir index de4e51bd8c0e..4bd0a64cb4b2 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir @@ -7,7 +7,7 @@ // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \ // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \ -// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // Activate to dump assembly // R_UN: -dump-object-file -object-filename=/tmp/a.o \ diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir index 95fc57506c43..e32d898e6c6d 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir @@ -8,7 +8,7 @@ // R_UN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \ // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \ -// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // Activate to dump assembly // R_UN: -dump-object-file -object-filename=/tmp/a.o \ diff --git a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir index abfb14739e25..32c977fbbe44 100644 --- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir +++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir @@ -5,7 +5,7 @@ // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \ // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \ -// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -mlir-disable-threading | \ +// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -mlir-disable-threading | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // Activate to dump assembly // R_UN: -dump-object-file -object-filename=/tmp/a.o \ diff --git a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir index a982baa646c3..a7c06c7c8391 100644 --- a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir +++ b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir index b6146137f4e1..3d259661d081 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir index b8f4045495ff..15d043f11e77 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir index fb4b1029c790..a6c053a5b014 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir @@ -1,11 +1,11 @@ // RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \ -// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ // RUN: FileCheck %s // RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ -// RUN: -convert-scf-to-std -convert-vector-to-llvm | mlir-cpu-runner -e main \ +// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main \ // RUN: -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp b/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp index 8a5790352263..1d95f73327fd 100644 --- a/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp +++ b/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp @@ -58,13 +58,56 @@ convertScalableVectorTypeToLLVM(ScalableVectorType svType, return sVectorType; } +template +class ForwardOperands : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(OpTy op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (ValueRange(operands).getTypes() == op->getOperands().getTypes()) + return rewriter.notifyMatchFailure(op, "operand types already match"); + + rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); }); + return success(); + } +}; + +class ReturnOpTypeConversion : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ReturnOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); }); + return success(); + } +}; + +static Optional addUnrealizedCast(OpBuilder &builder, + ScalableVectorType svType, + ValueRange inputs, Location loc) { + if (inputs.size() != 1 || + !inputs[0].getType().isa()) + return Value(); + return builder.create(loc, svType, inputs) + .getResult(0); +} + /// Populate the given list with patterns that convert from ArmSVE to LLVM. void mlir::populateArmSVEToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { converter.addConversion([&converter](ScalableVectorType svType) { return convertScalableVectorTypeToLLVM(svType, converter); }); + converter.addSourceMaterialization(addUnrealizedCast); + // clang-format off + patterns.insert, + ForwardOperands, + ForwardOperands>(converter, + &converter.getContext()); patterns.insert getI64SubArray(ArrayAttr arrayAttr, return res; } +static Value createCastToIndexLike(ConversionPatternRewriter &rewriter, + Location loc, Type targetType, Value value) { + if (targetType == value.getType()) + return value; + + bool targetIsIndex = targetType.isIndex(); + bool valueIsIndex = value.getType().isIndex(); + if (targetIsIndex ^ valueIsIndex) + return rewriter.create(loc, targetType, value); + + auto targetIntegerType = targetType.dyn_cast(); + auto valueIntegerType = value.getType().dyn_cast(); + assert(targetIntegerType && valueIntegerType && + "unexpected cast between types other than integers and index"); + assert(targetIntegerType.getSignedness() == valueIntegerType.getSignedness()); + + if (targetIntegerType.getWidth() > valueIntegerType.getWidth()) + return rewriter.create(loc, targetIntegerType, value); + return rewriter.create(loc, targetIntegerType, value); +} + // Helper that returns a vector comparison that constructs a mask: // mask = [0,1,..,n-1] + [o,o,..,o] < [b,b,..,b] // @@ -131,12 +152,12 @@ static Value buildVectorComparison(ConversionPatternRewriter &rewriter, } // Add in an offset if requested. if (off) { - Value o = rewriter.create(loc, idxType, *off); + Value o = createCastToIndexLike(rewriter, loc, idxType, *off); Value ov = rewriter.create(loc, indices.getType(), o); indices = rewriter.create(loc, ov, indices); } // Construct the vector comparison. - Value bound = rewriter.create(loc, idxType, b); + Value bound = createCastToIndexLike(rewriter, loc, idxType, b); Value bounds = rewriter.create(loc, indices.getType(), bound); return rewriter.create(loc, CmpIPredicate::slt, indices, bounds); } @@ -216,10 +237,8 @@ replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter, LLVMTypeConverter &typeConverter, Location loc, TransferReadOp xferOp, ArrayRef operands, Value dataPtr, Value mask) { - auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); }; VectorType fillType = xferOp.getVectorType(); Value fill = rewriter.create(loc, fillType, xferOp.padding()); - fill = rewriter.create(loc, toLLVMTy(fillType), fill); Type vecTy = typeConverter.convertType(xferOp.getVectorType()); if (!vecTy) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index af6ce6a0a68c..910524041bed 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -22,6 +22,7 @@ #include "mlir/Dialect/LLVMIR/LLVMArmNeonDialect.h" #include "mlir/Dialect/LLVMIR/LLVMArmSVEDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -70,10 +71,12 @@ void LowerVectorToLLVMPass::runOnOperation() { populateVectorToLLVMConversionPatterns( converter, patterns, reassociateFPReductions, enableIndexOptimizations); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); - populateStdToLLVMConversionPatterns(converter, patterns); // Architecture specific augmentations. LLVMConversionTarget target(getContext()); + target.addLegalOp(); + target.addLegalDialect(); + target.addLegalOp(); if (enableArmNeon) { target.addLegalDialect(); target.addIllegalDialect(); @@ -82,6 +85,23 @@ void LowerVectorToLLVMPass::runOnOperation() { if (enableArmSVE) { target.addLegalDialect(); target.addIllegalDialect(); + auto hasScalableVectorType = [](TypeRange types) { + for (Type type : types) + if (type.isa()) + return true; + return false; + }; + // Remove any ArmSVE-specific types from function signatures and results. + populateFuncOpTypeConversionPattern(patterns, &getContext(), converter); + target.addDynamicallyLegalOp([hasScalableVectorType](FuncOp op) { + return !hasScalableVectorType(op.getType().getInputs()) && + !hasScalableVectorType(op.getType().getResults()); + }); + target.addDynamicallyLegalOp( + [hasScalableVectorType](Operation *op) { + return !hasScalableVectorType(op->getOperandTypes()) && + !hasScalableVectorType(op->getResultTypes()); + }); populateArmSVEToLLVMConversionPatterns(converter, patterns); } if (enableAVX512) { diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index adf7ff7b74f5..8a3d2ce61055 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1274,10 +1274,40 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type, return op.emitOpError("invalid cast between index and non-integer type"); } + if (type.isa()) { + auto llvmIntegerType = llvmType.dyn_cast(); + if (!llvmIntegerType) + return op->emitOpError("invalid cast between integer and non-integer"); + if (llvmIntegerType.getWidth() != type.getIntOrFloatBitWidth()) + return op.emitOpError("invalid cast changing integer width"); + return success(); + } + // Vectors are compatible if they are 1D non-scalable, and their element types - // are compatible. - if (auto vectorType = type.dyn_cast()) - return op.emitOpError("vector types should not be casted"); + // are compatible. nD vectors are compatible with (n-1)D arrays containing 1D + // vector. + if (auto vectorType = type.dyn_cast()) { + if (vectorType == llvmType && !isElement) + return op.emitOpError("vector types should not be casted"); + + if (vectorType.getRank() == 1) { + auto llvmVectorType = llvmType.dyn_cast(); + if (!llvmVectorType || llvmVectorType.getRank() != 1) + return op.emitOpError("invalid cast for vector types"); + + return verifyCast(op, llvmVectorType.getElementType(), + vectorType.getElementType(), /*isElement=*/true); + } + + auto arrayType = llvmType.dyn_cast(); + if (!arrayType || + arrayType.getNumElements() != vectorType.getShape().front()) + return op.emitOpError("invalid cast for vector, expected array"); + return verifyCast(op, arrayType.getElementType(), + VectorType::get(vectorType.getShape().drop_front(), + vectorType.getElementType()), + /*isElement=*/true); + } if (auto memrefType = type.dyn_cast()) { // Bare pointer convention: statically-shaped memref is compatible with an diff --git a/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir index 5f218c9f421a..f05b37644fc7 100644 --- a/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir +++ b/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" | mlir-opt | FileCheck %s +// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" -convert-std-to-llvm | mlir-opt | FileCheck %s func @arm_sve_sdot(%a: !arm_sve.vector<16xi8>, %b: !arm_sve.vector<16xi8>, diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir index 85e19da84013..249f8c09e599 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -1,45 +1,41 @@ // RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=1' | FileCheck %s --check-prefix=CMP32 // RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=0' | FileCheck %s --check-prefix=CMP64 -// CMP32-LABEL: llvm.func @genbool_var_1d( -// CMP32-SAME: %[[A:.*]]: i64) -// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : vector<11xi32> -// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : i64 to i32 -// CMP32: %[[T2:.*]] = llvm.mlir.undef : vector<11xi32> -// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : vector<11xi32> -// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi32>, vector<11xi32> -// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : vector<11xi32> -// CMP32: llvm.return %[[T6]] : vector<11xi1> +// CMP32-LABEL: @genbool_var_1d( +// CMP32-SAME: %[[ARG:.*]]: index) +// CMP32: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64 +// CMP32: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32> +// CMP32: %[[T1:.*]] = trunci %[[A]] : i64 to i32 +// CMP32: %[[T2:.*]] = splat %[[T1]] : vector<11xi32> +// CMP32: %[[T3:.*]] = cmpi slt, %[[T0]], %[[T2]] : vector<11xi32> +// CMP32: return %[[T3]] : vector<11xi1> -// CMP64-LABEL: llvm.func @genbool_var_1d( -// CMP64-SAME: %[[A:.*]]: i64) -// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : vector<11xi64> -// CMP64: %[[T1:.*]] = llvm.mlir.undef : vector<11xi64> -// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<11xi64> -// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi64>, vector<11xi64> -// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : vector<11xi64> -// CMP64: llvm.return %[[T5]] : vector<11xi1> +// CMP64-LABEL: @genbool_var_1d( +// CMP64-SAME: %[[ARG:.*]]: index) +// CMP64: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64 +// CMP64: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64> +// CMP64: %[[T1:.*]] = splat %[[A]] : vector<11xi64> +// CMP64: %[[T2:.*]] = cmpi slt, %[[T0]], %[[T1]] : vector<11xi64> +// CMP64: return %[[T2]] : vector<11xi1> func @genbool_var_1d(%arg0: index) -> vector<11xi1> { %0 = vector.create_mask %arg0 : vector<11xi1> return %0 : vector<11xi1> } -// CMP32-LABEL: llvm.func @transfer_read_1d -// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : vector<16xi32> -// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi32> -// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi32> +// CMP32-LABEL: @transfer_read_1d +// CMP32: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32> +// CMP32: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi32> +// CMP32: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi32> // CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} -// CMP32: llvm.return %[[L]] : vector<16xf32> +// CMP32: return %[[L]] : vector<16xf32> -// CMP64-LABEL: llvm.func @transfer_read_1d -// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : vector<16xi64> -// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi64> -// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi64> +// CMP64-LABEL: @transfer_read_1d +// CMP64: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64> +// CMP64: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi64> +// CMP64: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi64> // CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}} -// CMP64: llvm.return %[[L]] : vector<16xf32> +// CMP64: return %[[L]] : vector<16xf32> func @transfer_read_1d(%A : memref, %i: index) -> vector<16xf32> { %d = constant -1.0: f32 diff --git a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir index 71d091413b2e..b2df9745070e 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir @@ -2,19 +2,19 @@ // RUN: mlir-opt %s -convert-vector-to-llvm='reassociate-fp-reductions' | FileCheck %s --check-prefix=REASSOC // -// CHECK-LABEL: llvm.func @reduce_add_f32( +// CHECK-LABEL: @reduce_add_f32( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 -// CHECK: llvm.return %[[V]] : f32 +// CHECK: return %[[V]] : f32 // -// REASSOC-LABEL: llvm.func @reduce_add_f32( +// REASSOC-LABEL: @reduce_add_f32( // REASSOC-SAME: %[[A:.*]]: vector<16xf32>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32 -// REASSOC: llvm.return %[[V]] : f32 +// REASSOC: return %[[V]] : f32 // func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { %0 = vector.reduction "add", %arg0 : vector<16xf32> into f32 @@ -22,19 +22,19 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { } // -// CHECK-LABEL: llvm.func @reduce_mul_f32( +// CHECK-LABEL: @reduce_mul_f32( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 -// CHECK: llvm.return %[[V]] : f32 +// CHECK: return %[[V]] : f32 // -// REASSOC-LABEL: llvm.func @reduce_mul_f32( +// REASSOC-LABEL: @reduce_mul_f32( // REASSOC-SAME: %[[A:.*]]: vector<16xf32>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]]) // REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32 -// REASSOC: llvm.return %[[V]] : f32 +// REASSOC: return %[[V]] : f32 // func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 { %0 = vector.reduction "mul", %arg0 : vector<16xf32> into f32 diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 1881c84ce1c9..facc91cf03d0 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -6,7 +6,7 @@ func @bitcast_f32_to_i32_vector(%input: vector<16xf32>) -> vector<16xi32> { return %0 : vector<16xi32> } -// CHECK-LABEL: llvm.func @bitcast_f32_to_i32_vector( +// CHECK-LABEL: @bitcast_f32_to_i32_vector // CHECK-SAME: %[[input:.*]]: vector<16xf32> // CHECK: llvm.bitcast %[[input]] : vector<16xf32> to vector<16xi32> @@ -17,7 +17,7 @@ func @bitcast_i8_to_f32_vector(%input: vector<64xi8>) -> vector<16xf32> { return %0 : vector<16xf32> } -// CHECK-LABEL: llvm.func @bitcast_i8_to_f32_vector( +// CHECK-LABEL: @bitcast_i8_to_f32_vector // CHECK-SAME: %[[input:.*]]: vector<64xi8> // CHECK: llvm.bitcast %[[input]] : vector<64xi8> to vector<16xf32> @@ -27,13 +27,10 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2xf32> return %0 : vector<2xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar( +// CHECK-LABEL: @broadcast_vec1d_from_scalar // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : vector<2xf32> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : i32] : vector<2xf32> -// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : vector<2xf32>, vector<2xf32> -// CHECK: llvm.return %[[T3]] : vector<2xf32> +// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2xf32> +// CHECK: return %[[T0]] : vector<2xf32> // ----- @@ -41,16 +38,10 @@ func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32> return %0 : vector<2x3xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar( +// CHECK-LABEL: @broadcast_vec2d_from_scalar( // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<3xf32> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: llvm.return %[[T6]] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2x3xf32> +// CHECK: return %[[T0]] : vector<2x3xf32> // ----- @@ -58,20 +49,10 @@ func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32> return %0 : vector<2x3x4xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar( +// CHECK-LABEL: @broadcast_vec3d_from_scalar( // CHECK-SAME: %[[A:.*]]: f32) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : vector<4xf32> -// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<4xf32> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm.array<2 x array<3 x vector<4xf32>>> -// CHECK: llvm.return %[[T10]] : !llvm.array<2 x array<3 x vector<4xf32>>> +// CHECK: %[[T0:.*]] = splat %[[A]] : vector<2x3x4xf32> +// CHECK: return %[[T0]] : vector<2x3x4xf32> // ----- @@ -79,9 +60,9 @@ func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32> return %0 : vector<2xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d( +// CHECK-LABEL: @broadcast_vec1d_from_vec1d( // CHECK-SAME: %[[A:.*]]: vector<2xf32>) -// CHECK: llvm.return %[[A]] : vector<2xf32> +// CHECK: return %[[A]] : vector<2xf32> // ----- @@ -89,13 +70,15 @@ func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32> return %0 : vector<3x2xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d( +// CHECK-LABEL: @broadcast_vec2d_from_vec1d( // CHECK-SAME: %[[A:.*]]: vector<2xf32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: llvm.return %[[T3]] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32> +// CHECK: %[[T1:.*]] = llvm.mlir.cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32> +// CHECK: return %[[T5]] : vector<3x2xf32> // ----- @@ -103,18 +86,24 @@ func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d( +// CHECK-LABEL: @broadcast_vec3d_from_vec1d( // CHECK-SAME: %[[A:.*]]: vector<2xf32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: llvm.return %[[T8]] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32> +// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32> + +// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<2xf32>> + +// CHECK: %[[T6:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> + +// CHECK: %[[T11:.*]] = llvm.mlir.cast %[[T10]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> +// CHECK: return %[[T11]] : vector<4x3x2xf32> // ----- @@ -122,14 +111,21 @@ func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d( -// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<2xf32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: llvm.return %[[T4]] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK-LABEL: @broadcast_vec3d_from_vec2d( +// CHECK-SAME: %[[A:.*]]: vector<3x2xf32>) +// CHECK: %[[T0:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32> +// CHECK: %[[T1:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[T0]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T4:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T6:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T8:.*]] = llvm.mlir.cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T10:.*]] = llvm.mlir.cast %[[T9]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> +// CHECK: return %[[T10]] : vector<4x3x2xf32> + // ----- @@ -137,15 +133,12 @@ func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> return %0 : vector<4xf32> } -// CHECK-LABEL: llvm.func @broadcast_stretch( +// CHECK-LABEL: @broadcast_stretch( // CHECK-SAME: %[[A:.*]]: vector<1xf32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<1xf32> -// CHECK: %[[T2:.*]] = llvm.mlir.undef : vector<4xf32> -// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : i32] : vector<4xf32> -// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<4xf32>, vector<4xf32> -// CHECK: llvm.return %[[T5]] : vector<4xf32> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32> +// CHECK: %[[T3:.*]] = splat %[[T2]] : vector<4xf32> +// CHECK: return %[[T3]] : vector<4xf32> // ----- @@ -153,14 +146,17 @@ func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> { %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32> return %0 : vector<3x4xf32> } -// CHECK-LABEL: llvm.func @broadcast_stretch_at_start( -// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm.array<3 x vector<4xf32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<3 x vector<4xf32>> +// CHECK-LABEL: @broadcast_stretch_at_start( +// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>) +// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<3x4xf32> +// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>> +// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<4xf32>> +// CHECK: %[[T4:.*]] = llvm.mlir.cast %[[T1]] : vector<3x4xf32> to !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[T8:.*]] = llvm.mlir.cast %[[T7]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> +// CHECK: return %[[T8]] : vector<3x4xf32> // ----- @@ -168,42 +164,36 @@ func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> { %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32> return %0 : vector<4x3xf32> } -// CHECK-LABEL: llvm.func @broadcast_stretch_at_end( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<1xf32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm.array<4 x vector<3xf32>> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<4 x vector<1xf32>> -// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : i64] : vector<1xf32> -// CHECK: %[[T4:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T5:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : i32] : vector<3xf32> -// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<4 x vector<3xf32>> -// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<4 x vector<1xf32>> -// CHECK: %[[T10:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : i64] : vector<1xf32> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>> -// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<1xf32>> -// CHECK: %[[T18:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : i64] : vector<1xf32> -// CHECK: %[[T20:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T21:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : i32] : vector<3xf32> -// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm.array<4 x vector<3xf32>> -// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<1xf32>> -// CHECK: %[[T26:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : i64] : vector<1xf32> -// CHECK: %[[T28:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T29:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : i32] : vector<3xf32> -// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm.array<4 x vector<3xf32>> -// CHECK: llvm.return %[[T32]] : !llvm.array<4 x vector<3xf32>> +// CHECK-LABEL: @broadcast_stretch_at_end( +// CHECK-SAME: %[[A:.*]]: vector<4x1xf32>) +// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3xf32> +// CHECK: %[[T2:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T5:.*]] = llvm.extractelement %[[T3]]{{\[}}%[[T4]] : i64] : vector<1xf32> +// CHECK: %[[T6:.*]] = splat %[[T5]] : vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3xf32> to !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T9:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T10:.*]] = llvm.extractvalue %[[T9]][1] : !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T11:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T12:.*]] = llvm.extractelement %[[T10]]{{\[}}%[[T11]] : i64] : vector<1xf32> +// CHECK: %[[T13:.*]] = splat %[[T12]] : vector<3xf32> +// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T13]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T15:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T15]][2] : !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T17:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T18:.*]] = llvm.extractelement %[[T16]]{{\[}}%[[T17]] : i64] : vector<1xf32> +// CHECK: %[[T19:.*]] = splat %[[T18]] : vector<3xf32> +// CHECK: %[[T20:.*]] = llvm.insertvalue %[[T19]], %[[T14]][2] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T21:.*]] = llvm.mlir.cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T22:.*]] = llvm.extractvalue %[[T21]][3] : !llvm.array<4 x vector<1xf32>> +// CHECK: %[[T23:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T24:.*]] = llvm.extractelement %[[T22]]{{\[}}%[[T23]] : i64] : vector<1xf32> +// CHECK: %[[T25:.*]] = splat %[[T24]] : vector<3xf32> +// CHECK: %[[T26:.*]] = llvm.insertvalue %[[T25]], %[[T20]][3] : !llvm.array<4 x vector<3xf32>> +// CHECK: %[[T27:.*]] = llvm.mlir.cast %[[T26]] : !llvm.array<4 x vector<3xf32>> to vector<4x3xf32> +// CHECK: return %[[T27]] : vector<4x3xf32> // ----- @@ -211,31 +201,41 @@ func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } -// CHECK-LABEL: llvm.func @broadcast_stretch_in_middle( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x array<1 x vector<2xf32>>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> -// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm.array<3 x vector<2xf32>> -// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> -// CHECK: llvm.return %[[T25]] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK-LABEL: @broadcast_stretch_in_middle( +// CHECK-SAME: %[[A:.*]]: vector<4x1x2xf32>) -> vector<4x3x2xf32> { +// CHECK: %[[T1:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32> +// CHECK: %[[T2:.*]] = constant dense<0.000000e+00> : vector<3x2xf32> +// CHECK: %[[T3:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T9:.*]] = llvm.mlir.cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T11:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T12:.*]] = llvm.extractvalue %[[T11]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T13:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T13]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T18:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T19:.*]] = llvm.extractvalue %[[T18]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T20:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T20]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T25:.*]] = llvm.mlir.cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T26:.*]] = llvm.extractvalue %[[T25]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> +// CHECK: %[[T27:.*]] = llvm.mlir.cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T27]][0] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<2xf32>> +// CHECK: %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> +// CHECK: %[[T32:.*]] = llvm.mlir.cast %[[T31]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> +// CHECK: return %[[T32]] : vector<4x3x2xf32> // ----- @@ -243,27 +243,23 @@ func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32 %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } -// CHECK-LABEL: llvm.func @outerproduct( -// CHECK-SAME: %[[A:.*]]: vector<2xf32>, -// CHECK-SAME: %[[B:.*]]: vector<3xf32>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : i32] : vector<3xf32> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : vector<3xf32> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : i64] : vector<2xf32> -// CHECK: %[[T11:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : i32] : vector<3xf32> -// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : vector<3xf32> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: llvm.return %[[T16]] : !llvm.array<2 x vector<3xf32>> +// CHECK-LABEL: @outerproduct( +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<3xf32>) +// CHECK: %[[T2:.*]] = constant dense<0.000000e+00> : vector<2x3xf32> +// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32> +// CHECK: %[[T5:.*]] = splat %[[T4]] : vector<3xf32> +// CHECK: %[[T6:.*]] = mulf %[[T5]], %[[B]] : vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[T2]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32> +// CHECK: %[[T11:.*]] = splat %[[T10]] : vector<3xf32> +// CHECK: %[[T12:.*]] = mulf %[[T11]], %[[B]] : vector<3xf32> +// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T14:.*]] = llvm.mlir.cast %[[T13]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> +// CHECK: return %[[T14]] : vector<2x3xf32> // ----- @@ -271,30 +267,28 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } -// CHECK-LABEL: llvm.func @outerproduct_add( -// CHECK-SAME: %[[A:.*]]: vector<2xf32>, -// CHECK-SAME: %[[B:.*]]: vector<3xf32>, -// CHECK-SAME: %[[C:.*]]: !llvm.array<2 x vector<3xf32>>) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : i64] : vector<2xf32> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : i32] : vector<3xf32> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : i64] : vector<2xf32> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : i32] : vector<3xf32> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: llvm.return %[[T18]] : !llvm.array<2 x vector<3xf32>> +// CHECK-LABEL: @outerproduct_add( +// CHECK-SAME: %[[A:.*]]: vector<2xf32>, +// CHECK-SAME: %[[B:.*]]: vector<3xf32>, +// CHECK-SAME: %[[C:.*]]: vector<2x3xf32>) -> vector<2x3xf32> +// CHECK: %[[T3:.*]] = constant dense<0.000000e+00> : vector<2x3xf32> +// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32> +// CHECK: %[[T6:.*]] = splat %[[T5]] : vector<3xf32> +// CHECK: %[[T7:.*]] = llvm.mlir.cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T9:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T10:.*]] = llvm.mlir.cast %[[T3]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32> +// CHECK: %[[T14:.*]] = splat %[[T13]] : vector<3xf32> +// CHECK: %[[T15:.*]] = llvm.mlir.cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T15]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>> +// CHECK: %[[T19:.*]] = llvm.mlir.cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> +// CHECK: return %[[T19]] : vector<2x3xf32> // ----- @@ -302,11 +296,11 @@ func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2x %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32> return %1 : vector<2xf32> } -// CHECK-LABEL: llvm.func @shuffle_1D_direct( +// CHECK-LABEL: @shuffle_1D_direct( // CHECK-SAME: %[[A:.*]]: vector<2xf32>, // CHECK-SAME: %[[B:.*]]: vector<2xf32>) // CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : vector<2xf32>, vector<2xf32> -// CHECK: llvm.return %[[s]] : vector<2xf32> +// CHECK: return %[[s]] : vector<2xf32> // ----- @@ -314,7 +308,7 @@ func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { %1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32> return %1 : vector<5xf32> } -// CHECK-LABEL: llvm.func @shuffle_1D( +// CHECK-LABEL: @shuffle_1D( // CHECK-SAME: %[[A:.*]]: vector<2xf32>, // CHECK-SAME: %[[B:.*]]: vector<3xf32>) // CHECK: %[[u0:.*]] = llvm.mlir.undef : vector<5xf32> @@ -338,7 +332,7 @@ func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { // CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : i64] : vector<2xf32> // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 // CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : i64] : vector<5xf32> -// CHECK: llvm.return %[[i5]] : vector<5xf32> +// CHECK: return %[[i5]] : vector<5xf32> // ----- @@ -346,17 +340,20 @@ func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> { %1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32> return %1 : vector<3x4xf32> } -// CHECK-LABEL: llvm.func @shuffle_2D( -// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vector<4xf32>>, -// CHECK-SAME: %[[B:.*]]: !llvm.array<2 x vector<4xf32>>) +// CHECK-LABEL: @shuffle_2D( +// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>, +// CHECK-SAME: %[[B:.*]]: vector<2x4xf32>) +// CHECK: %[[VAL_0:.*]] = llvm.mlir.cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>> +// CHECK: %[[VAL_1:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> // CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[e1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vector<4xf32>> +// CHECK: %[[e2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>> // CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[e3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vector<4xf32>> -// CHECK: llvm.return %[[i3]] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[VAL_3:.*]] = llvm.mlir.cast %[[i3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> +// CHECK: return %[[VAL_3]] : vector<3x4xf32> // ----- @@ -365,11 +362,11 @@ func @extract_element(%arg0: vector<16xf32>) -> f32 { %1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32> return %1 : f32 } -// CHECK-LABEL: llvm.func @extract_element( +// CHECK-LABEL: @extract_element( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) -// CHECK: %[[c:.*]] = llvm.mlir.constant(15 : i32) : i32 +// CHECK: %[[c:.*]] = constant 15 : i32 // CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<16xf32> -// CHECK: llvm.return %[[x]] : f32 +// CHECK: return %[[x]] : f32 // ----- @@ -377,10 +374,10 @@ func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { %0 = vector.extract %arg0[15]: vector<16xf32> return %0 : f32 } -// CHECK-LABEL: llvm.func @extract_element_from_vec_1d +// CHECK-LABEL: @extract_element_from_vec_1d // CHECK: llvm.mlir.constant(15 : i64) : i64 // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> -// CHECK: llvm.return {{.*}} : f32 +// CHECK: return {{.*}} : f32 // ----- @@ -388,9 +385,9 @@ func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> %0 = vector.extract %arg0[0]: vector<4x3x16xf32> return %0 : vector<3x16xf32> } -// CHECK-LABEL: llvm.func @extract_vec_2d_from_vec_3d +// CHECK-LABEL: @extract_vec_2d_from_vec_3d // CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<3 x vector<16xf32>> +// CHECK: return {{.*}} : vector<3x16xf32> // ----- @@ -398,9 +395,9 @@ func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf32> { %0 = vector.extract %arg0[0, 0]: vector<4x3x16xf32> return %0 : vector<16xf32> } -// CHECK-LABEL: llvm.func @extract_vec_1d_from_vec_3d +// CHECK-LABEL: @extract_vec_1d_from_vec_3d // CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> -// CHECK: llvm.return {{.*}} : vector<16xf32> +// CHECK: return {{.*}} : vector<16xf32> // ----- @@ -408,11 +405,11 @@ func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { %0 = vector.extract %arg0[0, 0, 0]: vector<4x3x16xf32> return %0 : f32 } -// CHECK-LABEL: llvm.func @extract_element_from_vec_3d +// CHECK-LABEL: @extract_element_from_vec_3d // CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: llvm.mlir.constant(0 : i64) : i64 // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> -// CHECK: llvm.return {{.*}} : f32 +// CHECK: return {{.*}} : f32 // ----- @@ -421,12 +418,12 @@ func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<4xf32> return %1 : vector<4xf32> } -// CHECK-LABEL: llvm.func @insert_element( +// CHECK-LABEL: @insert_element( // CHECK-SAME: %[[A:.*]]: f32, // CHECK-SAME: %[[B:.*]]: vector<4xf32>) -// CHECK: %[[c:.*]] = llvm.mlir.constant(3 : i32) : i32 +// CHECK: %[[c:.*]] = constant 3 : i32 // CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : vector<4xf32> -// CHECK: llvm.return %[[x]] : vector<4xf32> +// CHECK: return %[[x]] : vector<4xf32> // ----- @@ -434,10 +431,10 @@ func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32> return %0 : vector<4xf32> } -// CHECK-LABEL: llvm.func @insert_element_into_vec_1d +// CHECK-LABEL: @insert_element_into_vec_1d // CHECK: llvm.mlir.constant(3 : i64) : i64 // CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> -// CHECK: llvm.return {{.*}} : vector<4xf32> +// CHECK: return {{.*}} : vector<4xf32> // ----- @@ -445,9 +442,9 @@ func @insert_vec_2d_into_vec_3d(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf3 %0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } -// CHECK-LABEL: llvm.func @insert_vec_2d_into_vec_3d +// CHECK-LABEL: @insert_vec_2d_into_vec_3d // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: return {{.*}} : vector<4x8x16xf32> // ----- @@ -455,9 +452,9 @@ func @insert_vec_1d_into_vec_3d(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32> %0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } -// CHECK-LABEL: llvm.func @insert_vec_1d_into_vec_3d +// CHECK-LABEL: @insert_vec_1d_into_vec_3d // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: return {{.*}} : vector<4x8x16xf32> // ----- @@ -465,12 +462,12 @@ func @insert_element_into_vec_3d(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vecto %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } -// CHECK-LABEL: llvm.func @insert_element_into_vec_3d +// CHECK-LABEL: @insert_element_into_vec_3d // CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> // CHECK: llvm.mlir.constant(15 : i64) : i64 // CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> -// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vector<16xf32>>> +// CHECK: return {{.*}} : vector<4x8x16xf32> // ----- @@ -478,7 +475,7 @@ func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref> { %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref> return %0 : memref> } -// CHECK-LABEL: llvm.func @vector_type_cast +// CHECK-LABEL: @vector_type_cast // CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>>, ptr>>>, i64)> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>> @@ -495,7 +492,7 @@ func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref, 3> return %0 : memref, 3> } -// CHECK-LABEL: llvm.func @vector_type_cast_non_zero_addrspace +// CHECK-LABEL: @vector_type_cast_non_zero_addrspace // CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>, 3> @@ -515,9 +512,9 @@ func @vector_print_scalar_i1(%arg0: i1) { // // Type "boolean" always uses zero extension. // -// CHECK-LABEL: llvm.func @vector_print_scalar_i1( +// CHECK-LABEL: @vector_print_scalar_i1( // CHECK-SAME: %[[A:.*]]: i1) -// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i1 to i64 +// CHECK: %[[S:.*]] = zexti %[[A]] : i1 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -527,9 +524,9 @@ func @vector_print_scalar_i4(%arg0: i4) { vector.print %arg0 : i4 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_i4( +// CHECK-LABEL: @vector_print_scalar_i4( // CHECK-SAME: %[[A:.*]]: i4) -// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i4 to i64 +// CHECK: %[[S:.*]] = sexti %[[A]] : i4 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -539,9 +536,10 @@ func @vector_print_scalar_si4(%arg0: si4) { vector.print %arg0 : si4 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_si4( -// CHECK-SAME: %[[A:.*]]: i4) -// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i4 to i64 +// CHECK-LABEL: @vector_print_scalar_si4( +// CHECK-SAME: %[[A:.*]]: si4) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : si4 to i4 +// CHECK: %[[S:.*]] = sexti %[[C]] : i4 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -551,9 +549,10 @@ func @vector_print_scalar_ui4(%arg0: ui4) { vector.print %arg0 : ui4 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_ui4( -// CHECK-SAME: %[[A:.*]]: i4) -// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i4 to i64 +// CHECK-LABEL: @vector_print_scalar_ui4( +// CHECK-SAME: %[[A:.*]]: ui4) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui4 to i4 +// CHECK: %[[S:.*]] = zexti %[[C]] : i4 to i64 // CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -563,9 +562,9 @@ func @vector_print_scalar_i32(%arg0: i32) { vector.print %arg0 : i32 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_i32( +// CHECK-LABEL: @vector_print_scalar_i32( // CHECK-SAME: %[[A:.*]]: i32) -// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i32 to i64 +// CHECK: %[[S:.*]] = sexti %[[A]] : i32 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -575,9 +574,10 @@ func @vector_print_scalar_ui32(%arg0: ui32) { vector.print %arg0 : ui32 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_ui32( -// CHECK-SAME: %[[A:.*]]: i32) -// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i32 to i64 +// CHECK-LABEL: @vector_print_scalar_ui32( +// CHECK-SAME: %[[A:.*]]: ui32) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui32 to i32 +// CHECK: %[[S:.*]] = zexti %[[C]] : i32 to i64 // CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () // ----- @@ -586,9 +586,9 @@ func @vector_print_scalar_i40(%arg0: i40) { vector.print %arg0 : i40 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_i40( +// CHECK-LABEL: @vector_print_scalar_i40( // CHECK-SAME: %[[A:.*]]: i40) -// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i40 to i64 +// CHECK: %[[S:.*]] = sexti %[[A]] : i40 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -598,9 +598,10 @@ func @vector_print_scalar_si40(%arg0: si40) { vector.print %arg0 : si40 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_si40( -// CHECK-SAME: %[[A:.*]]: i40) -// CHECK: %[[S:.*]] = llvm.sext %[[A]] : i40 to i64 +// CHECK-LABEL: @vector_print_scalar_si40( +// CHECK-SAME: %[[A:.*]]: si40) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : si40 to i40 +// CHECK: %[[S:.*]] = sexti %[[C]] : i40 to i64 // CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -610,9 +611,10 @@ func @vector_print_scalar_ui40(%arg0: ui40) { vector.print %arg0 : ui40 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_ui40( -// CHECK-SAME: %[[A:.*]]: i40) -// CHECK: %[[S:.*]] = llvm.zext %[[A]] : i40 to i64 +// CHECK-LABEL: @vector_print_scalar_ui40( +// CHECK-SAME: %[[A:.*]]: ui40) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui40 to i40 +// CHECK: %[[S:.*]] = zexti %[[C]] : i40 to i64 // CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -622,7 +624,7 @@ func @vector_print_scalar_i64(%arg0: i64) { vector.print %arg0 : i64 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_i64( +// CHECK-LABEL: @vector_print_scalar_i64( // CHECK-SAME: %[[A:.*]]: i64) // CHECK: llvm.call @printI64(%[[A]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -633,9 +635,10 @@ func @vector_print_scalar_ui64(%arg0: ui64) { vector.print %arg0 : ui64 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_ui64( -// CHECK-SAME: %[[A:.*]]: i64) -// CHECK: llvm.call @printU64(%[[A]]) : (i64) -> () +// CHECK-LABEL: @vector_print_scalar_ui64( +// CHECK-SAME: %[[A:.*]]: ui64) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : ui64 to i64 +// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () // ----- @@ -644,9 +647,10 @@ func @vector_print_scalar_index(%arg0: index) { vector.print %arg0 : index return } -// CHECK-LABEL: llvm.func @vector_print_scalar_index( -// CHECK-SAME: %[[A:.*]]: i64) -// CHECK: llvm.call @printU64(%[[A]]) : (i64) -> () +// CHECK-LABEL: @vector_print_scalar_index( +// CHECK-SAME: %[[A:.*]]: index) +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[A]] : index to i64 +// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> () // CHECK: llvm.call @printNewline() : () -> () // ----- @@ -655,7 +659,7 @@ func @vector_print_scalar_f32(%arg0: f32) { vector.print %arg0 : f32 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_f32( +// CHECK-LABEL: @vector_print_scalar_f32( // CHECK-SAME: %[[A:.*]]: f32) // CHECK: llvm.call @printF32(%[[A]]) : (f32) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -666,7 +670,7 @@ func @vector_print_scalar_f64(%arg0: f64) { vector.print %arg0 : f64 return } -// CHECK-LABEL: llvm.func @vector_print_scalar_f64( +// CHECK-LABEL: @vector_print_scalar_f64( // CHECK-SAME: %[[A:.*]]: f64) // CHECK: llvm.call @printF64(%[[A]]) : (f64) -> () // CHECK: llvm.call @printNewline() : () -> () @@ -677,10 +681,11 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) { vector.print %arg0 : vector<2x2xf32> return } -// CHECK-LABEL: llvm.func @vector_print_vector( -// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<2xf32>>) +// CHECK-LABEL: @vector_print_vector( +// CHECK-SAME: %[[A:.*]]: vector<2x2xf32>) +// CHECK: %[[VAL_1:.*]] = llvm.mlir.cast %[[A]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>> // CHECK: llvm.call @printOpen() : () -> () -// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[x0:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<2xf32>> // CHECK: llvm.call @printOpen() : () -> () // CHECK: %[[x1:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : i64] : vector<2xf32> @@ -691,7 +696,7 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) { // CHECK: llvm.call @printF32(%[[x4]]) : (f32) -> () // CHECK: llvm.call @printClose() : () -> () // CHECK: llvm.call @printComma() : () -> () -// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[x5:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<2xf32>> // CHECK: llvm.call @printOpen() : () -> () // CHECK: %[[x6:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : i64] : vector<2xf32> @@ -710,10 +715,10 @@ func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32> return %0 : vector<2xf32> } -// CHECK-LABEL: llvm.func @extract_strided_slice1( +// CHECK-LABEL: @extract_strided_slice1( // CHECK-SAME: %[[A:.*]]: vector<4xf32>) // CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : vector<4xf32>, vector<4xf32> -// CHECK: llvm.return %[[T0]] : vector<2xf32> +// CHECK: return %[[T0]] : vector<2xf32> // ----- @@ -721,14 +726,16 @@ func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32> return %0 : vector<2x8xf32> } -// CHECK-LABEL: llvm.func @extract_strided_slice2( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>) +// CHECK-LABEL: @extract_strided_slice2( +// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) +// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>> // CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<8xf32>> // CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> // CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<8xf32>> // CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> // CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<8xf32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<2 x vector<8xf32>> +// CHECK: %[[T5:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<2 x vector<8xf32>> to vector<2x8xf32> +// CHECK: return %[[T5]] // ----- @@ -736,16 +743,21 @@ func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32> return %0 : vector<2x2xf32> } -// CHECK-LABEL: llvm.func @extract_strided_slice3( -// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vector<8xf32>>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vector<2xf32>> +// CHECK-LABEL: @extract_strided_slice3( +// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) +// CHECK: %[[VAL_1:.*]] = constant 0.000000e+00 : f32 +// CHECK: %[[VAL_2:.*]] = splat %[[VAL_1]] : vector<2x2xf32> +// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>> // CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> // CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : vector<8xf32>, vector<8xf32> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[VAL_6:.*]] = llvm.mlir.cast %[[VAL_2]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>> // CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> // CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : vector<8xf32>, vector<8xf32> // CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vector<2xf32>> -// CHECK: llvm.return %[[T7]] : !llvm.array<2 x vector<2xf32>> +// CHECK: %[[VAL_12:.*]] = llvm.mlir.cast %[[T7]] : !llvm.array<2 x vector<2xf32>> to vector<2x2xf32> +// CHECK: return %[[VAL_12]] : vector<2x2xf32> // ----- @@ -753,9 +765,9 @@ func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vecto %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> return %0 : vector<4x4x4xf32> } -// CHECK-LABEL: llvm.func @insert_strided_slice1 +// CHECK-LABEL: @insert_strided_slice1 // CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> // ----- @@ -763,35 +775,46 @@ func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector< %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32> return %0 : vector<4x4xf32> } -// CHECK-LABEL: llvm.func @insert_strided_slice2 +// CHECK-LABEL: @insert_strided_slice2 // // Subvector vector<2xf32> @0 into vector<4xf32> @2 // CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<2xf32>> +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>> // CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vector<4xf32>> // Element @0 -> element @2 -// CHECK-NEXT: llvm.mlir.constant(0 : index) : i64 +// CHECK-NEXT: constant 0 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> -// CHECK-NEXT: llvm.mlir.constant(2 : index) : i64 +// CHECK-NEXT: constant 2 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // Element @1 -> element @3 -// CHECK-NEXT: llvm.mlir.constant(1 : index) : i64 +// CHECK-NEXT: constant 1 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> -// CHECK-NEXT: llvm.mlir.constant(3 : index) : i64 +// CHECK-NEXT: constant 3 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>> // CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x vector<4xf32>> // // Subvector vector<2xf32> @1 into vector<4xf32> @3 // CHECK: llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<2xf32>> +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : vector<4x4xf32> to !llvm.array<4 x vector<4xf32>> // CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vector<4xf32>> // Element @0 -> element @2 -// CHECK-NEXT: llvm.mlir.constant(0 : index) : i64 +// CHECK-NEXT: constant 0 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> -// CHECK-NEXT: llvm.mlir.constant(2 : index) : i64 +// CHECK-NEXT: constant 2 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // Element @1 -> element @3 -// CHECK-NEXT: llvm.mlir.constant(1 : index) : i64 +// CHECK-NEXT: constant 1 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<2xf32> -// CHECK-NEXT: llvm.mlir.constant(3 : index) : i64 +// CHECK-NEXT: constant 3 : index +// CHECK-NEXT: llvm.mlir.cast %{{.*}} : index to i64 // CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x vector<4xf32>> @@ -802,50 +825,73 @@ func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) - vector<2x4xf32> into vector<16x4x8xf32> return %0 : vector<16x4x8xf32> } -// CHECK-LABEL: llvm.func @insert_strided_slice3( -// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vector<4xf32>>, -// CHECK-SAME: %[[B:.*]]: !llvm.array<16 x array<4 x vector<8xf32>>>) -// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> -// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vector<4xf32>> -// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>> -// CHECK: %[[s3:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : i64] : vector<4xf32> -// CHECK: %[[s5:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : i64] : vector<8xf32> -// CHECK: %[[s7:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : i64] : vector<4xf32> -// CHECK: %[[s9:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : i64] : vector<8xf32> -// CHECK: %[[s11:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : i64] : vector<4xf32> -// CHECK: %[[s13:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : i64] : vector<8xf32> -// CHECK: %[[s15:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : i64] : vector<4xf32> -// CHECK: %[[s17:.*]] = llvm.mlir.constant(5 : index) : i64 -// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : i64] : vector<8xf32> -// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm.array<4 x vector<8xf32>> -// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vector<4xf32>> -// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>> -// CHECK: %[[s22:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : i64] : vector<4xf32> -// CHECK: %[[s24:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : i64] : vector<8xf32> -// CHECK: %[[s26:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : i64] : vector<4xf32> -// CHECK: %[[s28:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : i64] : vector<8xf32> -// CHECK: %[[s30:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : i64] : vector<4xf32> -// CHECK: %[[s32:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : i64] : vector<8xf32> -// CHECK: %[[s34:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : i64] : vector<4xf32> -// CHECK: %[[s36:.*]] = llvm.mlir.constant(5 : index) : i64 -// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : i64] : vector<8xf32> -// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm.array<4 x vector<8xf32>> -// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> -// CHECK: llvm.return %[[s39]] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK-LABEL: @insert_strided_slice3( +// CHECK-SAME: %[[A:.*]]: vector<2x4xf32>, +// CHECK-SAME: %[[B:.*]]: vector<16x4x8xf32>) +// CHECK: %[[s2:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s3:.*]] = llvm.extractvalue %[[s2]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s4:.*]] = llvm.mlir.cast %[[A]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s5:.*]] = llvm.extractvalue %[[s4]][0] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s6:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s7:.*]] = llvm.extractvalue %[[s6]][0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s8:.*]] = constant 0 : index +// CHECK: %[[s9:.*]] = llvm.mlir.cast %[[s8]] : index to i64 +// CHECK: %[[s10:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s9]] : i64] : vector<4xf32> +// CHECK: %[[s11:.*]] = constant 2 : index +// CHECK: %[[s12:.*]] = llvm.mlir.cast %[[s11]] : index to i64 +// CHECK: %[[s13:.*]] = llvm.insertelement %[[s10]], %[[s7]]{{\[}}%[[s12]] : i64] : vector<8xf32> +// CHECK: %[[s14:.*]] = constant 1 : index +// CHECK: %[[s15:.*]] = llvm.mlir.cast %[[s14]] : index to i64 +// CHECK: %[[s16:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s15]] : i64] : vector<4xf32> +// CHECK: %[[s17:.*]] = constant 3 : index +// CHECK: %[[s18:.*]] = llvm.mlir.cast %[[s17]] : index to i64 +// CHECK: %[[s19:.*]] = llvm.insertelement %[[s16]], %[[s13]]{{\[}}%[[s18]] : i64] : vector<8xf32> +// CHECK: %[[s20:.*]] = constant 2 : index +// CHECK: %[[s21:.*]] = llvm.mlir.cast %[[s20]] : index to i64 +// CHECK: %[[s22:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s21]] : i64] : vector<4xf32> +// CHECK: %[[s23:.*]] = constant 4 : index +// CHECK: %[[s24:.*]] = llvm.mlir.cast %[[s23]] : index to i64 +// CHECK: %[[s25:.*]] = llvm.insertelement %[[s22]], %[[s19]]{{\[}}%[[s24]] : i64] : vector<8xf32> +// CHECK: %[[s26:.*]] = constant 3 : index +// CHECK: %[[s27:.*]] = llvm.mlir.cast %[[s26]] : index to i64 +// CHECK: %[[s28:.*]] = llvm.extractelement %[[s5]]{{\[}}%[[s27]] : i64] : vector<4xf32> +// CHECK: %[[s29:.*]] = constant 5 : index +// CHECK: %[[s30:.*]] = llvm.mlir.cast %[[s29]] : index to i64 +// CHECK: %[[s31:.*]] = llvm.insertelement %[[s28]], %[[s25]]{{\[}}%[[s30]] : i64] : vector<8xf32> +// CHECK: %[[s32:.*]] = llvm.insertvalue %[[s31]], %[[s3]][0] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[s33:.*]] = llvm.mlir.cast %[[A]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s34:.*]] = llvm.extractvalue %[[s33]][1] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[s35:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s36:.*]] = llvm.extractvalue %[[s35]][0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s37:.*]] = constant 0 : index +// CHECK: %[[s38:.*]] = llvm.mlir.cast %[[s37]] : index to i64 +// CHECK: %[[s39:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s38]] : i64] : vector<4xf32> +// CHECK: %[[s40:.*]] = constant 2 : index +// CHECK: %[[s41:.*]] = llvm.mlir.cast %[[s40]] : index to i64 +// CHECK: %[[s42:.*]] = llvm.insertelement %[[s39]], %[[s36]]{{\[}}%[[s41]] : i64] : vector<8xf32> +// CHECK: %[[s43:.*]] = constant 1 : index +// CHECK: %[[s44:.*]] = llvm.mlir.cast %[[s43]] : index to i64 +// CHECK: %[[s45:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s44]] : i64] : vector<4xf32> +// CHECK: %[[s46:.*]] = constant 3 : index +// CHECK: %[[s47:.*]] = llvm.mlir.cast %[[s46]] : index to i64 +// CHECK: %[[s48:.*]] = llvm.insertelement %[[s45]], %[[s42]]{{\[}}%[[s47]] : i64] : vector<8xf32> +// CHECK: %[[s49:.*]] = constant 2 : index +// CHECK: %[[s50:.*]] = llvm.mlir.cast %[[s49]] : index to i64 +// CHECK: %[[s51:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s50]] : i64] : vector<4xf32> +// CHECK: %[[s52:.*]] = constant 4 : index +// CHECK: %[[s53:.*]] = llvm.mlir.cast %[[s52]] : index to i64 +// CHECK: %[[s54:.*]] = llvm.insertelement %[[s51]], %[[s48]]{{\[}}%[[s53]] : i64] : vector<8xf32> +// CHECK: %[[s55:.*]] = constant 3 : index +// CHECK: %[[s56:.*]] = llvm.mlir.cast %[[s55]] : index to i64 +// CHECK: %[[s57:.*]] = llvm.extractelement %[[s34]]{{\[}}%[[s56]] : i64] : vector<4xf32> +// CHECK: %[[s58:.*]] = constant 5 : index +// CHECK: %[[s59:.*]] = llvm.mlir.cast %[[s58]] : index to i64 +// CHECK: %[[s60:.*]] = llvm.insertelement %[[s57]], %[[s54]]{{\[}}%[[s59]] : i64] : vector<8xf32> +// CHECK: %[[s61:.*]] = llvm.insertvalue %[[s60]], %[[s32]][1] : !llvm.array<4 x vector<8xf32>> +// CHECK: %[[s62:.*]] = llvm.mlir.cast %[[B]] : vector<16x4x8xf32> to !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s63:.*]] = llvm.insertvalue %[[s61]], %[[s62]][0] : !llvm.array<16 x array<4 x vector<8xf32>>> +// CHECK: %[[s64:.*]] = llvm.mlir.cast %[[s63]] : !llvm.array<16 x array<4 x vector<8xf32>>> to vector<16x4x8xf32> +// CHECK: return %[[s64]] : vector<16x4x8xf32> // ----- @@ -855,33 +901,43 @@ func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> { %1 = vector.tuple_get %0, 3 : tuple, vector<2x1xf32>, vector<1x2xf32>, vector<1x1xf32>> return %1 : vector<1x1xf32> } -// CHECK-LABEL: llvm.func @extract_strides( -// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vector<3xf32>>) -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vector<1xf32>> +// CHECK-LABEL: @extract_strides( +// CHECK-SAME: %[[ARG:.*]]: vector<3x3xf32>) +// CHECK: %[[VAL_1:.*]] = constant 0.000000e+00 : f32 +// CHECK: %[[VAL_2:.*]] = splat %[[VAL_1]] : vector<1x1xf32> +// CHECK: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : vector<3x3xf32> to !llvm.array<3 x vector<3xf32>> // CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vector<3xf32>> // CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2] : vector<3xf32>, vector<3xf32> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm.array<1 x vector<1xf32>> -// CHECK: llvm.return %[[T4]] : !llvm.array<1 x vector<1xf32>> +// CHECK: %[[VAL_6:.*]] = llvm.mlir.cast %[[VAL_2]] : vector<1x1xf32> to !llvm.array<1 x vector<1xf32>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<1 x vector<1xf32>> +// CHECK: %[[VAL_8:.*]] = llvm.mlir.cast %[[T4]] : !llvm.array<1 x vector<1xf32>> to vector<1x1xf32> +// CHECK: return %[[VAL_8]] : vector<1x1xf32> -// CHECK-LABEL: llvm.func @vector_fma( -// CHECK-SAME: %[[A:.*]]: vector<8xf32>, %[[B:.*]]: !llvm.array<2 x vector<4xf32>>) -// CHECK-SAME: -> !llvm.struct<(vector<8xf32>, array<2 x vector<4xf32>>)> { // ----- func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) { - // CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) : - // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + // CHECK-LABEL: @vector_fma + // CHECK-SAME: %[[A:.*]]: vector<8xf32> + // CHECK-SAME: %[[B:.*]]: vector<2x4xf32> + // CHECK: "llvm.intr.fmuladd" + // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %0 = vector.fma %a, %a, %a : vector<8xf32> - // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[BL:.*]] = llvm.mlir.cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> + // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> @@ -896,12 +952,12 @@ func @reduce_f16(%arg0: vector<16xf16>) -> f16 { %0 = vector.reduction "add", %arg0 : vector<16xf16> into f16 return %0 : f16 } -// CHECK-LABEL: llvm.func @reduce_f16( +// CHECK-LABEL: @reduce_f16( // CHECK-SAME: %[[A:.*]]: vector<16xf16>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (f16, vector<16xf16>) -> f16 -// CHECK: llvm.return %[[V]] : f16 +// CHECK: return %[[V]] : f16 // ----- @@ -909,12 +965,12 @@ func @reduce_f32(%arg0: vector<16xf32>) -> f32 { %0 = vector.reduction "add", %arg0 : vector<16xf32> into f32 return %0 : f32 } -// CHECK-LABEL: llvm.func @reduce_f32( +// CHECK-LABEL: @reduce_f32( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32 -// CHECK: llvm.return %[[V]] : f32 +// CHECK: return %[[V]] : f32 // ----- @@ -922,12 +978,12 @@ func @reduce_f64(%arg0: vector<16xf64>) -> f64 { %0 = vector.reduction "add", %arg0 : vector<16xf64> into f64 return %0 : f64 } -// CHECK-LABEL: llvm.func @reduce_f64( +// CHECK-LABEL: @reduce_f64( // CHECK-SAME: %[[A:.*]]: vector<16xf64>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64 // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) // CHECK-SAME: {reassoc = false} : (f64, vector<16xf64>) -> f64 -// CHECK: llvm.return %[[V]] : f64 +// CHECK: return %[[V]] : f64 // ----- @@ -935,10 +991,10 @@ func @reduce_i8(%arg0: vector<16xi8>) -> i8 { %0 = vector.reduction "add", %arg0 : vector<16xi8> into i8 return %0 : i8 } -// CHECK-LABEL: llvm.func @reduce_i8( +// CHECK-LABEL: @reduce_i8( // CHECK-SAME: %[[A:.*]]: vector<16xi8>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) -// CHECK: llvm.return %[[V]] : i8 +// CHECK: return %[[V]] : i8 // ----- @@ -946,10 +1002,10 @@ func @reduce_i32(%arg0: vector<16xi32>) -> i32 { %0 = vector.reduction "add", %arg0 : vector<16xi32> into i32 return %0 : i32 } -// CHECK-LABEL: llvm.func @reduce_i32( +// CHECK-LABEL: @reduce_i32( // CHECK-SAME: %[[A:.*]]: vector<16xi32>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) -// CHECK: llvm.return %[[V]] : i32 +// CHECK: return %[[V]] : i32 // ----- @@ -957,10 +1013,10 @@ func @reduce_i64(%arg0: vector<16xi64>) -> i64 { %0 = vector.reduction "add", %arg0 : vector<16xi64> into i64 return %0 : i64 } -// CHECK-LABEL: llvm.func @reduce_i64( +// CHECK-LABEL: @reduce_i64( // CHECK-SAME: %[[A:.*]]: vector<16xi64>) // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) -// CHECK: llvm.return %[[V]] : i64 +// CHECK: return %[[V]] : i64 // 4x16 16x3 4x3 @@ -972,7 +1028,7 @@ func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> { (vector<64xf64>, vector<48xf64>) -> vector<12xf64> return %C: vector<12xf64> } -// CHECK-LABEL: llvm.func @matrix_ops +// CHECK-LABEL: @matrix_ops // CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} { // CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32 // CHECK-SAME: } : (vector<64xf64>, vector<48xf64>) -> vector<12xf64> @@ -990,53 +1046,35 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32> +// CHECK: %[[c7:.*]] = constant 7.0 // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : // CHECK-SAME: (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : // CHECK-SAME: !llvm.ptr to !llvm.ptr> -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: %[[C0:.*]] = constant 0 : index +// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[C0]] : memref // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex:.*]] = llvm.mlir.constant(dense +// CHECK: %[[linearIndex:.*]] = constant dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi32>) : vector<17xi32> +// CHECK-SAME: vector<17xi32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[otrunc:.*]] = llvm.trunc %[[BASE]] : i64 to i32 -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[otrunc]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: i32] : vector<17xi32> -// CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> -// CHECK: %[[offsetVec4:.*]] = llvm.add %[[offsetVec3]], %[[linearIndex]] : -// CHECK-SAME: vector<17xi32> +// CHECK: %[[otrunc:.*]] = index_cast %[[BASE]] : index to i32 +// CHECK: %[[offsetVec:.*]] = splat %[[otrunc]] : vector<17xi32> +// CHECK: %[[offsetVec2:.*]] = addi %[[offsetVec]], %[[linearIndex]] : vector<17xi32> // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: %[[dtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32 -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32> -// CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dtrunc]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: i32] : vector<17xi32> -// CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> -// CHECK: %[[mask:.*]] = llvm.icmp "slt" %[[offsetVec4]], %[[dimVec3]] : -// CHECK-SAME: vector<17xi32> +// CHECK: %[[dtrunc:.*]] = index_cast %[[DIM]] : index to i32 +// CHECK: %[[dimVec:.*]] = splat %[[dtrunc]] : vector<17xi32> +// CHECK: %[[mask:.*]] = cmpi slt, %[[offsetVec2]], %[[dimVec]] : vector<17xi32> // // 5. Rewrite as a masked read. -// CHECK: %[[PASS_THROUGH:.*]] = llvm.mlir.constant(dense<7.000000e+00> : -// CHECK-SAME: vector<17xf32>) : vector<17xf32> +// CHECK: %[[PASS_THROUGH:.*]] = splat %[[c7]] : vector<17xf32> // CHECK: %[[loaded:.*]] = llvm.intr.masked.load %[[vecPtr]], %[[mask]], // CHECK-SAME: %[[PASS_THROUGH]] {alignment = 4 : i32} : // CHECK-SAME: (!llvm.ptr>, vector<17xi1>, vector<17xf32>) -> vector<17xf32> @@ -1049,24 +1087,18 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. -// CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant(dense +// CHECK: %[[linearIndex_b:.*]] = constant dense // CHECK-SAME: <[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi32>) : vector<17xi32> +// CHECK-SAME: vector<17xi32> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> -// CHECK: llvm.add +// CHECK: splat %{{.*}} : vector<17xi32> +// CHECK: addi // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> -// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : vector<17xi32> +// CHECK: splat %{{.*}} : vector<17xi32> +// CHECK: %[[mask_b:.*]] = cmpi slt, {{.*}} : vector<17xi32> // // 5. Rewrite as a masked write. // CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]] @@ -1083,34 +1115,18 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_2d_to_1d -// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: i64, %[[BASE_1:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: index, %[[BASE_1:[a-zA-Z0-9]*]]: index) -> vector<17xf32> +// CHECK: %[[c1:.*]] = constant 1 : index +// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c1]] : memref // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[trunc:.*]] = llvm.trunc %[[BASE_1]] : i64 to i32 -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : vector<17xi32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[trunc]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: i32] : vector<17xi32> -// CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> +// CHECK: %[[trunc:.*]] = index_cast %[[BASE_1]] : index to i32 +// CHECK: %[[offsetVec:.*]] = splat %[[trunc]] : vector<17xi32> // // Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] -// CHECK: %[[dimtrunc:.*]] = llvm.trunc %[[DIM]] : i64 to i32 -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : vector<17xi32> -// CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[dimtrunc]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: i32] : vector<17xi32> -// CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, -// CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: vector<17xi32>, vector<17xi32> +// CHECK: %[[dimtrunc:.*]] = index_cast %[[DIM]] : index to i32 +// CHECK: splat %[[dimtrunc]] : vector<17xi32> // ----- @@ -1125,7 +1141,7 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32> // // 1. Check address space for GEP is correct. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : @@ -1134,8 +1150,8 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - // CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Check address space of the memref is correct. -// CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: %[[c0:.*]] = constant 0 : index +// CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c0]] : memref // // 3. Check address apce for GEP is correct. // CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : @@ -1152,7 +1168,7 @@ func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17 return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_not_masked -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<17xf32> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<17xf32> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : @@ -1172,7 +1188,7 @@ func @transfer_read_1d_cast(%A : memref, %base: index) -> vector<12xi8> { return %v: vector<12xi8> } // CHECK-LABEL: func @transfer_read_1d_cast -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: i64) -> vector<12xi8> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: index) -> vector<12xi8> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : @@ -1190,8 +1206,8 @@ func @genbool_1d() -> vector<8xi1> { return %0 : vector<8xi1> } // CHECK-LABEL: func @genbool_1d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : vector<8xi1> -// CHECK: llvm.return %[[C1]] : vector<8xi1> +// CHECK: %[[VAL_0:.*]] = constant dense<[true, true, true, true, false, false, false, false]> : vector<8xi1> +// CHECK: return %[[VAL_0]] : vector<8xi1> // ----- @@ -1201,11 +1217,13 @@ func @genbool_2d() -> vector<4x4xi1> { } // CHECK-LABEL: func @genbool_2d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : vector<4xi1> -// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense : vector<4x4xi1>) : !llvm.array<4 x vector<4xi1>> -// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm.array<4 x vector<4xi1>> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm.array<4 x vector<4xi1>> -// CHECK: llvm.return %[[T1]] : !llvm.array<4 x vector<4xi1>> +// CHECK: %[[VAL_0:.*]] = constant dense<[true, true, false, false]> : vector<4xi1> +// CHECK: %[[VAL_1:.*]] = constant dense : vector<4x4xi1> +// CHECK: %[[VAL_2:.*]] = llvm.mlir.cast %[[VAL_1]] : vector<4x4xi1> to !llvm.array<4 x vector<4xi1>> +// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<4xi1>> +// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<4xi1>> +// CHECK: %[[VAL_5:.*]] = llvm.mlir.cast %[[VAL_4]] : !llvm.array<4 x vector<4xi1>> to vector<4x4xi1> +// CHECK: return %[[VAL_5]] : vector<4x4xi1> // ----- @@ -1220,7 +1238,7 @@ func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { // CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] // CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : // CHECK-SAME: vector<16xf32> into vector<16xf32> -// CHECK: llvm.return %[[T]] : vector<16xf32> +// CHECK: return %[[T]] : vector<16xf32> // ----- @@ -1231,11 +1249,12 @@ func @masked_load_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<1 } // CHECK-LABEL: func @masked_load_op -// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[CO:.*]] = constant 0 : index +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> // CHECK: %[[L:.*]] = llvm.intr.masked.load %[[B]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr>, vector<16xi1>, vector<16xf32>) -> vector<16xf32> -// CHECK: llvm.return %[[L]] : vector<16xf32> +// CHECK: return %[[L]] : vector<16xf32> // ----- @@ -1246,11 +1265,11 @@ func @masked_store_op(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector< } // CHECK-LABEL: func @masked_store_op -// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[CO:.*]] = constant 0 : index +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[B:.*]] = llvm.bitcast %[[P]] : !llvm.ptr to !llvm.ptr> // CHECK: llvm.intr.masked.store %{{.*}}, %[[B]], %{{.*}} {alignment = 4 : i32} : vector<16xf32>, vector<16xi1> into !llvm.ptr> -// CHECK: llvm.return // ----- @@ -1262,7 +1281,7 @@ func @gather_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, // CHECK-LABEL: func @gather_op // CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr> // CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> -// CHECK: llvm.return %[[G]] : vector<3xf32> +// CHECK: return %[[G]] : vector<3xf32> // ----- @@ -1274,7 +1293,6 @@ func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1> // CHECK-LABEL: func @scatter_op // CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr> // CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<3xf32>, vector<3xi1> into !llvm.vec<3 x ptr> -// CHECK: llvm.return // ----- @@ -1285,10 +1303,11 @@ func @expand_load_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<1 } // CHECK-LABEL: func @expand_load_op -// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[CO:.*]] = constant 0 : index +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xf32>) -> vector<11xf32> -// CHECK: llvm.return %[[E]] : vector<11xf32> +// CHECK: return %[[E]] : vector<11xf32> // ----- @@ -1299,7 +1318,7 @@ func @compress_store_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vecto } // CHECK-LABEL: func @compress_store_op -// CHECK: %[[C:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[CO:.*]] = constant 0 : index +// CHECK: %[[C:.*]] = llvm.mlir.cast %[[CO]] : index to i64 // CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (vector<11xf32>, !llvm.ptr, vector<11xi1>) -> () -// CHECK: llvm.return diff --git a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir index b72141dc4142..e3e2d96b6c28 100644 --- a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir +++ b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir @@ -3,12 +3,13 @@ // These are the supported cases, just make sure they don't trigger errors, op // syntax is tested elsewhere. -func @mlir_dialect_cast(%0: index, %1: i32, %2: bf16, %3: f16, %4: f32, %5: f64, +func @mlir_dialect_cast(%0: index, %1: vector<2x2x2xf32>, %6: vector<42xf32>, %7: memref<42xf32>, %8: memref, %9: memref, %10: memref<*xf32>) { llvm.mlir.cast %0 : index to i64 llvm.mlir.cast %0 : index to i32 + llvm.mlir.cast %1 : vector<2x2x2xf32> to !llvm.array<2 x array<2 x vector<2xf32>>> llvm.mlir.cast %7 : memref<42xf32> to !llvm.ptr llvm.mlir.cast %7 : memref<42xf32> to !llvm.struct<(ptr, ptr, i64, array<1xi64>, array<1xi64>)> llvm.mlir.cast %8 : memref to !llvm.struct<(ptr, ptr, i64, array<1xi64>, array<1xi64>)> @@ -65,19 +66,33 @@ func @mlir_dialect_cast_f64(%0 : f64) { // ----- func @mlir_dialect_cast_integer_non_integer(%0 : i16) { - // expected-error@+1 {{unsupported cast}} + // expected-error@+1 {{invalid cast between integer and non-integer}} llvm.mlir.cast %0 : i16 to f16 } // ----- func @mlir_dialect_cast_scalable_vector(%0 : vector<2xf32>) { - // expected-error@+1 {{vector types should not be casted}} + // expected-error@+1 {{invalid cast for vector types}} llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec } // ----- +func @mlir_dialect_cast_vector_to_self(%0 : vector<2xf32>) { + // expected-error@+1 {{vector types should not be casted}} + llvm.mlir.cast %0 : vector<2xf32> to vector<2xf32> +} + +// ----- + +func @mlir_dialect_cast_nd_vector(%0 : vector<2x2xf32>) { + // expected-error@+1 {{invalid cast for vector, expected array}} + llvm.mlir.cast %0 : vector<2x2xf32> to !llvm.struct<()> +} + +// ----- + func @mlir_dialect_cast_dynamic_memref_bare_ptr(%0 : memref) { // expected-error@+1 {{unexpected bare pointer for dynamically shaped memref}} llvm.mlir.cast %0 : memref to !llvm.ptr diff --git a/mlir/test/Target/vector-to-llvm-ir.mlir b/mlir/test/Target/vector-to-llvm-ir.mlir index 4ede6ca2a5df..271d97a2a170 100644 --- a/mlir/test/Target/vector-to-llvm-ir.mlir +++ b/mlir/test/Target/vector-to-llvm-ir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-vector-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s +// RUN: mlir-opt %s -convert-vector-to-llvm -convert-std-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1>