[mlir] make vector to llvm conversion truly partial

Historically, the Vector to LLVM dialect conversion subsumed the Standard to LLVM dialect conversion patterns. This was necessary because the conversion infrastructure did not have sufficient support for reconciling type conversions. This support is now available. Only keep the patterns related to the Vector dialect in the Vector to LLVM conversion and require type casts operations to be inserted if necessary. These casts will be removed by following conversions if possible. Update integration tests to also run the Standard to LLVM conversion. There is a significant amount of test churn, which is due to (a) unnecessarily strict tests in VectorToLLVM and (b) many patterns actually targeting Standard dialect ops instead of LLVM dialect ops leading to tests actually exercising a Vector->Standard->LLVM conversion. This churn is a good illustration of the reason to make the conversion partial: now the tests only check the code in the Vector to LLVM conversion and will not be randomly broken by changes in Standard to LLVM conversion. Arguably, it may be possible to extract Vector to Standard patterns into a separate pass, but given the ongoing splitting of the Standard dialect, such pass will be short-lived and will require further refactoring. Depends On D95626 Reviewed By: nicolasvasilache, aartbik Differential Revision: https://reviews.llvm.org/D95685
2021-02-04 11:16:48 +01:00 · 2021-02-04 11:16:48 +01:00 · ba87f99168
parent aa56b30014
commit ba87f99168
18 changed files with 634 additions and 492 deletions
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul.mlir
@ -5,7 +5,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \

 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
@ -7,7 +7,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \

 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
@ -8,7 +8,7 @@
 // R_UN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \

 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \
--- a/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
@ -5,7 +5,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \

-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -mlir-disable-threading | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -mlir-disable-threading | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \
--- a/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/integration_test/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
--- a/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-vector-distribute.mlir
@ -1,11 +1,11 @@
 // RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
-// RUN:   -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \
+// RUN:   -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s

 // RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
-// RUN: -convert-scf-to-std -convert-vector-to-llvm | mlir-cpu-runner -e main \
+// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main \
 // RUN: -entry-point-result=void \
 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
--- a/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
+++ b/mlir/lib/Conversion/ArmSVEToLLVM/ArmSVEToLLVM.cpp
@ -58,13 +58,56 @@ convertScalableVectorTypeToLLVM(ScalableVectorType svType,
  return sVectorType;
 }

+template <typename OpTy>
+class ForwardOperands : public OpConversionPattern<OpTy> {
+  using OpConversionPattern<OpTy>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(OpTy op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    if (ValueRange(operands).getTypes() == op->getOperands().getTypes())
+      return rewriter.notifyMatchFailure(op, "operand types already match");
+
+    rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); });
+    return success();
+  }
+};
+
+class ReturnOpTypeConversion : public OpConversionPattern<ReturnOp> {
+public:
+  using OpConversionPattern<ReturnOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(ReturnOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    rewriter.updateRootInPlace(op, [&]() { op->setOperands(operands); });
+    return success();
+  }
+};
+
+static Optional<Value> addUnrealizedCast(OpBuilder &builder,
+                                         ScalableVectorType svType,
+                                         ValueRange inputs, Location loc) {
+  if (inputs.size() != 1 ||
+      !inputs[0].getType().isa<LLVM::LLVMScalableVectorType>())
+    return Value();
+  return builder.create<UnrealizedConversionCastOp>(loc, svType, inputs)
+      .getResult(0);
+}
+
 /// Populate the given list with patterns that convert from ArmSVE to LLVM.
 void mlir::populateArmSVEToLLVMConversionPatterns(
    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
  converter.addConversion([&converter](ScalableVectorType svType) {
    return convertScalableVectorTypeToLLVM(svType, converter);
  });
+  converter.addSourceMaterialization(addUnrealizedCast);
+
  // clang-format off
+  patterns.insert<ForwardOperands<CallOp>,
+                  ForwardOperands<CallIndirectOp>,
+                  ForwardOperands<ReturnOp>>(converter,
+                                             &converter.getContext());
  patterns.insert<SdotOpLowering,
                  SmmlaOpLowering,
                  UdotOpLowering,
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@ -102,6 +102,27 @@ static SmallVector<int64_t, 4> getI64SubArray(ArrayAttr arrayAttr,
  return res;
 }

+static Value createCastToIndexLike(ConversionPatternRewriter &rewriter,
+                                   Location loc, Type targetType, Value value) {
+  if (targetType == value.getType())
+    return value;
+
+  bool targetIsIndex = targetType.isIndex();
+  bool valueIsIndex = value.getType().isIndex();
+  if (targetIsIndex ^ valueIsIndex)
+    return rewriter.create<IndexCastOp>(loc, targetType, value);
+
+  auto targetIntegerType = targetType.dyn_cast<IntegerType>();
+  auto valueIntegerType = value.getType().dyn_cast<IntegerType>();
+  assert(targetIntegerType && valueIntegerType &&
+         "unexpected cast between types other than integers and index");
+  assert(targetIntegerType.getSignedness() == valueIntegerType.getSignedness());
+
+  if (targetIntegerType.getWidth() > valueIntegerType.getWidth())
+    return rewriter.create<SignExtendIOp>(loc, targetIntegerType, value);
+  return rewriter.create<TruncateIOp>(loc, targetIntegerType, value);
+}
+
 // Helper that returns a vector comparison that constructs a mask:
 //     mask = [0,1,..,n-1] + [o,o,..,o] < [b,b,..,b]
 //
@ -131,12 +152,12 @@ static Value buildVectorComparison(ConversionPatternRewriter &rewriter,
  }
  // Add in an offset if requested.
  if (off) {
-    Value o = rewriter.create<IndexCastOp>(loc, idxType, *off);
+    Value o = createCastToIndexLike(rewriter, loc, idxType, *off);
    Value ov = rewriter.create<SplatOp>(loc, indices.getType(), o);
    indices = rewriter.create<AddIOp>(loc, ov, indices);
  }
  // Construct the vector comparison.
-  Value bound = rewriter.create<IndexCastOp>(loc, idxType, b);
+  Value bound = createCastToIndexLike(rewriter, loc, idxType, b);
  Value bounds = rewriter.create<SplatOp>(loc, indices.getType(), bound);
  return rewriter.create<CmpIOp>(loc, CmpIPredicate::slt, indices, bounds);
 }
@ -216,10 +237,8 @@ replaceTransferOpWithMasked(ConversionPatternRewriter &rewriter,
                            LLVMTypeConverter &typeConverter, Location loc,
                            TransferReadOp xferOp, ArrayRef<Value> operands,
                            Value dataPtr, Value mask) {
-  auto toLLVMTy = [&](Type t) { return typeConverter.convertType(t); };
  VectorType fillType = xferOp.getVectorType();
  Value fill = rewriter.create<SplatOp>(loc, fillType, xferOp.padding());
-  fill = rewriter.create<LLVM::DialectCastOp>(loc, toLLVMTy(fillType), fill);

  Type vecTy = typeConverter.convertType(xferOp.getVectorType());
  if (!vecTy)
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@ -22,6 +22,7 @@
 #include "mlir/Dialect/LLVMIR/LLVMArmNeonDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMArmSVEDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

@ -70,10 +71,12 @@ void LowerVectorToLLVMPass::runOnOperation() {
  populateVectorToLLVMConversionPatterns(
      converter, patterns, reassociateFPReductions, enableIndexOptimizations);
  populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
-  populateStdToLLVMConversionPatterns(converter, patterns);

  // Architecture specific augmentations.
  LLVMConversionTarget target(getContext());
+  target.addLegalOp<LLVM::DialectCastOp>();
+  target.addLegalDialect<StandardOpsDialect>();
+  target.addLegalOp<UnrealizedConversionCastOp>();
  if (enableArmNeon) {
    target.addLegalDialect<LLVM::LLVMArmNeonDialect>();
    target.addIllegalDialect<arm_neon::ArmNeonDialect>();
@ -82,6 +85,23 @@ void LowerVectorToLLVMPass::runOnOperation() {
  if (enableArmSVE) {
    target.addLegalDialect<LLVM::LLVMArmSVEDialect>();
    target.addIllegalDialect<arm_sve::ArmSVEDialect>();
+    auto hasScalableVectorType = [](TypeRange types) {
+      for (Type type : types)
+        if (type.isa<arm_sve::ScalableVectorType>())
+          return true;
+      return false;
+    };
+    // Remove any ArmSVE-specific types from function signatures and results.
+    populateFuncOpTypeConversionPattern(patterns, &getContext(), converter);
+    target.addDynamicallyLegalOp<FuncOp>([hasScalableVectorType](FuncOp op) {
+      return !hasScalableVectorType(op.getType().getInputs()) &&
+             !hasScalableVectorType(op.getType().getResults());
+    });
+    target.addDynamicallyLegalOp<CallOp, CallIndirectOp, ReturnOp>(
+        [hasScalableVectorType](Operation *op) {
+          return !hasScalableVectorType(op->getOperandTypes()) &&
+                 !hasScalableVectorType(op->getResultTypes());
+        });
    populateArmSVEToLLVMConversionPatterns(converter, patterns);
  }
  if (enableAVX512) {
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@ -1274,10 +1274,40 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type,
    return op.emitOpError("invalid cast between index and non-integer type");
  }

+  if (type.isa<IntegerType>()) {
+    auto llvmIntegerType = llvmType.dyn_cast<IntegerType>();
+    if (!llvmIntegerType)
+      return op->emitOpError("invalid cast between integer and non-integer");
+    if (llvmIntegerType.getWidth() != type.getIntOrFloatBitWidth())
+      return op.emitOpError("invalid cast changing integer width");
+    return success();
+  }
+
  // Vectors are compatible if they are 1D non-scalable, and their element types
-  // are compatible.
-  if (auto vectorType = type.dyn_cast<VectorType>())
-    return op.emitOpError("vector types should not be casted");
+  // are compatible. nD vectors are compatible with (n-1)D arrays containing 1D
+  // vector.
+  if (auto vectorType = type.dyn_cast<VectorType>()) {
+    if (vectorType == llvmType && !isElement)
+      return op.emitOpError("vector types should not be casted");
+
+    if (vectorType.getRank() == 1) {
+      auto llvmVectorType = llvmType.dyn_cast<VectorType>();
+      if (!llvmVectorType || llvmVectorType.getRank() != 1)
+        return op.emitOpError("invalid cast for vector types");
+
+      return verifyCast(op, llvmVectorType.getElementType(),
+                        vectorType.getElementType(), /*isElement=*/true);
+    }
+
+    auto arrayType = llvmType.dyn_cast<LLVM::LLVMArrayType>();
+    if (!arrayType ||
+        arrayType.getNumElements() != vectorType.getShape().front())
+      return op.emitOpError("invalid cast for vector, expected array");
+    return verifyCast(op, arrayType.getElementType(),
+                      VectorType::get(vectorType.getShape().drop_front(),
+                                      vectorType.getElementType()),
+                      /*isElement=*/true);
+  }

  if (auto memrefType = type.dyn_cast<MemRefType>()) {
    // Bare pointer convention: statically-shaped memref is compatible with an
--- a/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/ArmSVEToLLVM/convert-to-llvm.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sve" -convert-std-to-llvm | mlir-opt | FileCheck %s

 func @arm_sve_sdot(%a: !arm_sve.vector<16xi8>,
                   %b: !arm_sve.vector<16xi8>,
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@ -1,45 +1,41 @@
 // RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=1' | FileCheck %s --check-prefix=CMP32
 // RUN: mlir-opt %s --convert-vector-to-llvm='enable-index-optimizations=0' | FileCheck %s --check-prefix=CMP64

-// CMP32-LABEL: llvm.func @genbool_var_1d(
-// CMP32-SAME: %[[A:.*]]: i64)
-// CMP32: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>) : vector<11xi32>
-// CMP32: %[[T1:.*]] = llvm.trunc %[[A]] : i64 to i32
-// CMP32: %[[T2:.*]] = llvm.mlir.undef : vector<11xi32>
-// CMP32: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CMP32: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%[[T3]] : i32] : vector<11xi32>
-// CMP32: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi32>, vector<11xi32>
-// CMP32: %[[T6:.*]] = llvm.icmp "slt" %[[T0]], %[[T5]] : vector<11xi32>
-// CMP32: llvm.return %[[T6]] : vector<11xi1>
+// CMP32-LABEL: @genbool_var_1d(
+// CMP32-SAME: %[[ARG:.*]]: index)
+// CMP32: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64
+// CMP32: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>
+// CMP32: %[[T1:.*]] = trunci %[[A]] : i64 to i32
+// CMP32: %[[T2:.*]] = splat %[[T1]] : vector<11xi32>
+// CMP32: %[[T3:.*]] = cmpi slt, %[[T0]], %[[T2]] : vector<11xi32>
+// CMP32: return %[[T3]] : vector<11xi1>

-// CMP64-LABEL: llvm.func @genbool_var_1d(
-// CMP64-SAME: %[[A:.*]]: i64)
-// CMP64: %[[T0:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>) : vector<11xi64>
-// CMP64: %[[T1:.*]] = llvm.mlir.undef : vector<11xi64>
-// CMP64: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// CMP64: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : i32] : vector<11xi64>
-// CMP64: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T1]] [0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : vector<11xi64>, vector<11xi64>
-// CMP64: %[[T5:.*]] = llvm.icmp "slt" %[[T0]], %[[T4]] : vector<11xi64>
-// CMP64: llvm.return %[[T5]] : vector<11xi1>
+// CMP64-LABEL: @genbool_var_1d(
+// CMP64-SAME: %[[ARG:.*]]: index)
+// CMP64: %[[A:.*]] = llvm.mlir.cast %[[ARG]] : index to i64
+// CMP64: %[[T0:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi64>
+// CMP64: %[[T1:.*]] = splat %[[A]] : vector<11xi64>
+// CMP64: %[[T2:.*]] = cmpi slt, %[[T0]], %[[T1]] : vector<11xi64>
+// CMP64: return %[[T2]] : vector<11xi1>

 func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
  %0 = vector.create_mask %arg0 : vector<11xi1>
  return %0 : vector<11xi1>
 }

-// CMP32-LABEL: llvm.func @transfer_read_1d
-// CMP32: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>) : vector<16xi32>
-// CMP32: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi32>
-// CMP32: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi32>
+// CMP32-LABEL: @transfer_read_1d
+// CMP32: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
+// CMP32: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi32>
+// CMP32: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi32>
 // CMP32: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
-// CMP32: llvm.return %[[L]] : vector<16xf32>
+// CMP32: return %[[L]] : vector<16xf32>

-// CMP64-LABEL: llvm.func @transfer_read_1d
-// CMP64: %[[C:.*]] = llvm.mlir.constant(dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>) : vector<16xi64>
-// CMP64: %[[A:.*]] = llvm.add %{{.*}}, %[[C]] : vector<16xi64>
-// CMP64: %[[M:.*]] = llvm.icmp "slt" %[[A]], %{{.*}} : vector<16xi64>
+// CMP64-LABEL: @transfer_read_1d
+// CMP64: %[[C:.*]] = constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi64>
+// CMP64: %[[A:.*]] = addi %{{.*}}, %[[C]] : vector<16xi64>
+// CMP64: %[[M:.*]] = cmpi slt, %[[A]], %{{.*}} : vector<16xi64>
 // CMP64: %[[L:.*]] = llvm.intr.masked.load %{{.*}}, %[[M]], %{{.*}}
-// CMP64: llvm.return %[[L]] : vector<16xf32>
+// CMP64: return %[[L]] : vector<16xf32>

 func @transfer_read_1d(%A : memref<?xf32>, %i: index) -> vector<16xf32> {
  %d = constant -1.0: f32
--- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir
@ -2,19 +2,19 @@
 // RUN: mlir-opt %s -convert-vector-to-llvm='reassociate-fp-reductions' | FileCheck %s --check-prefix=REASSOC

 //
-// CHECK-LABEL: llvm.func @reduce_add_f32(
+// CHECK-LABEL: @reduce_add_f32(
 // CHECK-SAME: %[[A:.*]]: vector<16xf32>)
 //      CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
 //      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
 // CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
-//      CHECK: llvm.return %[[V]] : f32
+//      CHECK: return %[[V]] : f32
 //
-// REASSOC-LABEL: llvm.func @reduce_add_f32(
+// REASSOC-LABEL: @reduce_add_f32(
 // REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
 //      REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
 //      REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]])
 // REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
-//      REASSOC: llvm.return %[[V]] : f32
+//      REASSOC: return %[[V]] : f32
 //
 func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
  %0 = vector.reduction "add", %arg0 : vector<16xf32> into f32
@ -22,19 +22,19 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 {
 }

 //
-// CHECK-LABEL: llvm.func @reduce_mul_f32(
+// CHECK-LABEL: @reduce_mul_f32(
 // CHECK-SAME: %[[A:.*]]: vector<16xf32>)
 //      CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
 //      CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
 // CHECK-SAME: {reassoc = false} : (f32, vector<16xf32>) -> f32
-//      CHECK: llvm.return %[[V]] : f32
+//      CHECK: return %[[V]] : f32
 //
-// REASSOC-LABEL: llvm.func @reduce_mul_f32(
+// REASSOC-LABEL: @reduce_mul_f32(
 // REASSOC-SAME: %[[A:.*]]: vector<16xf32>)
 //      REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
 //      REASSOC: %[[V:.*]] = "llvm.intr.vector.reduce.fmul"(%[[C]], %[[A]])
 // REASSOC-SAME: {reassoc = true} : (f32, vector<16xf32>) -> f32
-//      REASSOC: llvm.return %[[V]] : f32
+//      REASSOC: return %[[V]] : f32
 //
 func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 {
  %0 = vector.reduction "mul", %arg0 : vector<16xf32> into f32
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
--- a/mlir/test/Dialect/LLVMIR/dialect-cast.mlir
+++ b/mlir/test/Dialect/LLVMIR/dialect-cast.mlir
@ -3,12 +3,13 @@
 // These are the supported cases, just make sure they don't trigger errors, op
 // syntax is tested elsewhere.

-func @mlir_dialect_cast(%0: index, %1: i32, %2: bf16, %3: f16, %4: f32, %5: f64,
+func @mlir_dialect_cast(%0: index, %1: vector<2x2x2xf32>,
                        %6: vector<42xf32>, %7: memref<42xf32>,
                        %8: memref<?xf32>, %9: memref<f32>,
                        %10: memref<*xf32>) {
  llvm.mlir.cast %0 : index to i64
  llvm.mlir.cast %0 : index to i32
+  llvm.mlir.cast %1 : vector<2x2x2xf32> to !llvm.array<2 x array<2 x vector<2xf32>>>
  llvm.mlir.cast %7 : memref<42xf32> to !llvm.ptr<f32>
  llvm.mlir.cast %7 : memref<42xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
  llvm.mlir.cast %8 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1xi64>, array<1xi64>)>
@ -65,19 +66,33 @@ func @mlir_dialect_cast_f64(%0 : f64) {
 // -----

 func @mlir_dialect_cast_integer_non_integer(%0 : i16) {
-  // expected-error@+1 {{unsupported cast}}
+  // expected-error@+1 {{invalid cast between integer and non-integer}}
  llvm.mlir.cast %0 : i16 to f16
 }

 // -----

 func @mlir_dialect_cast_scalable_vector(%0 : vector<2xf32>) {
-  // expected-error@+1 {{vector types should not be casted}}
+  // expected-error@+1 {{invalid cast for vector types}}
  llvm.mlir.cast %0 : vector<2xf32> to !llvm.vec<?x2xf32>
 }

 // -----

+func @mlir_dialect_cast_vector_to_self(%0 : vector<2xf32>) {
+  // expected-error@+1 {{vector types should not be casted}}
+  llvm.mlir.cast %0 : vector<2xf32> to vector<2xf32>
+}
+
+// -----
+
+func @mlir_dialect_cast_nd_vector(%0 : vector<2x2xf32>) {
+  // expected-error@+1 {{invalid cast for vector, expected array}}
+  llvm.mlir.cast %0 : vector<2x2xf32> to !llvm.struct<()>
+}
+
+// -----
+
 func @mlir_dialect_cast_dynamic_memref_bare_ptr(%0 : memref<?xf32>) {
  // expected-error@+1 {{unexpected bare pointer for dynamically shaped memref}}
  llvm.mlir.cast %0 : memref<?xf32> to !llvm.ptr<f32>
--- a/mlir/test/Target/vector-to-llvm-ir.mlir
+++ b/mlir/test/Target/vector-to-llvm-ir.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s
+// RUN: mlir-opt %s -convert-vector-to-llvm -convert-std-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s

 func @genbool_1d() -> vector<8xi1> {
  %0 = vector.constant_mask [4] : vector<8xi1>