From 914ce66413e9de560a4546e87cacbbecad4d63bb Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Thu, 24 Oct 2019 10:15:07 +0200
Subject: [PATCH] [MIPS GlobalISel] MSA vector generic and builtin fadd, fsub,
 fmul, fdiv

Select vector G_FADD, G_FSUB, G_FMUL and G_FDIV for MIPS32 with MSA. We
have to set bank for vector operands to fprb and selectImpl will do the
rest. __builtin_msa_fadd_<format>, __builtin_msa_fsub_<format>,
__builtin_msa_fmul_<format> and __builtin_msa_fdiv_<format> will be
transformed into G_FADD, G_FSUB, G_FMUL and G_FDIV in legalizeIntrinsic
respectively and selected in the same way.

Differential Revision: https://reviews.llvm.org/D69340
---
 llvm/lib/Target/Mips/MipsLegalizerInfo.cpp    |  23 +-
 llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp |   8 +-
 ...oating_point_vec_arithmetic_operations.mir | 257 ++++++++++++++++++
 ...oating_point_vec_arithmetic_operations.mir | 241 ++++++++++++++++
 ...oint_vec_arithmetic_operations_builtin.mir | 253 +++++++++++++++++
 ...loating_point_vec_arithmetic_operations.ll | 145 ++++++++++
 ...point_vec_arithmetic_operations_builtin.ll | 146 ++++++++++
 ...oating_point_vec_arithmetic_operations.mir | 249 +++++++++++++++++
 8 files changed, 1319 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations.ll
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations_builtin.ll
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index e44f5e7d1ce9..f820da3dbc7d 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -188,9 +188,18 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalFor({s32, s64});
 
-  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
+  getActionDefinitionsBuilder({G_FABS, G_FSQRT})
       .legalFor({s32, s64});
 
+  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
+      .legalIf([=, &ST](const LegalityQuery &Query) {
+        if (CheckTyN(0, Query, {s32, s64}))
+          return true;
+        if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
+          return true;
+        return false;
+      });
+
   getActionDefinitionsBuilder(G_FCMP)
       .legalFor({{s32, s32}, {s32, s64}})
       .minScalar(0, s32);
@@ -404,6 +413,18 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   case Intrinsic::mips_mod_u_w:
   case Intrinsic::mips_mod_u_d:
     return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UREM, MIRBuilder, ST);
+  case Intrinsic::mips_fadd_w:
+  case Intrinsic::mips_fadd_d:
+    return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FADD, MIRBuilder, ST);
+  case Intrinsic::mips_fsub_w:
+  case Intrinsic::mips_fsub_d:
+    return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FSUB, MIRBuilder, ST);
+  case Intrinsic::mips_fmul_w:
+  case Intrinsic::mips_fmul_d:
+    return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FMUL, MIRBuilder, ST);
+  case Intrinsic::mips_fdiv_w:
+  case Intrinsic::mips_fdiv_d:
+    return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FDIV, MIRBuilder, ST);
   default:
     break;
   }
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index c8700c86373d..8af360a33e3c 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -539,13 +539,17 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
                                           &Mips::ValueMappings[Mips::GPRIdx]});
     MappingID = CustomMappingID;
     break;
+  case G_FABS:
+  case G_FSQRT:
+    OperandsMapping = getFprbMapping(Op0Size);
+    break;
   case G_FADD:
   case G_FSUB:
   case G_FMUL:
   case G_FDIV:
-  case G_FABS:
-  case G_FSQRT:
     OperandsMapping = getFprbMapping(Op0Size);
+    if (Op0Size == 128)
+      OperandsMapping = getMSAMapping(MF);
     break;
   case G_FCONSTANT:
     OperandsMapping = getOperandsMapping({getFprbMapping(Op0Size), nullptr});
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir
new file mode 100644
index 000000000000..4b49d6f46b53
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
+--- |
+
+  define void @fadd_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fadd_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fsub_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fsub_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fmul_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fmul_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fdiv_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fdiv_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+...
+---
+name:            fadd_v4f32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FADD_W:%[0-9]+]]:msa128w = FADD_W [[LD_W]], [[LD_W1]]
+    ; P5600: ST_W [[FADD_W]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<4 x s32>) = G_FADD %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fadd_v2f64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FADD_D:%[0-9]+]]:msa128d = FADD_D [[LD_D]], [[LD_D1]]
+    ; P5600: ST_D [[FADD_D]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<2 x s64>) = G_FADD %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v4f32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FSUB_W:%[0-9]+]]:msa128w = FSUB_W [[LD_W]], [[LD_W1]]
+    ; P5600: ST_W [[FSUB_W]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<4 x s32>) = G_FSUB %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v2f64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FSUB_D:%[0-9]+]]:msa128d = FSUB_D [[LD_D]], [[LD_D1]]
+    ; P5600: ST_D [[FSUB_D]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<2 x s64>) = G_FSUB %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v4f32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FMUL_W:%[0-9]+]]:msa128w = FMUL_W [[LD_W]], [[LD_W1]]
+    ; P5600: ST_W [[FMUL_W]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<4 x s32>) = G_FMUL %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v2f64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FMUL_D:%[0-9]+]]:msa128d = FMUL_D [[LD_D]], [[LD_D1]]
+    ; P5600: ST_D [[FMUL_D]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<2 x s64>) = G_FMUL %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v4f32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FDIV_W:%[0-9]+]]:msa128w = FDIV_W [[LD_W]], [[LD_W1]]
+    ; P5600: ST_W [[FDIV_W]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<4 x s32>) = G_FDIV %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v2f64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2
+    ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
+    ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b)
+    ; P5600: [[FDIV_D:%[0-9]+]]:msa128d = FDIV_D [[LD_D]], [[LD_D1]]
+    ; P5600: ST_D [[FDIV_D]], [[COPY2]], 0 :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %2:gprb(p0) = COPY $a2
+    %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:fprb(<2 x s64>) = G_FDIV %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir
new file mode 100644
index 000000000000..e26310b1a81b
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir
@@ -0,0 +1,241 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
+--- |
+
+  define void @fadd_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fadd_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fsub_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fsub_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fmul_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fmul_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fdiv_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fdiv_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+...
+---
+name:            fadd_v4f32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FADD %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fadd_v2f64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FADD %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v4f32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:_(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FSUB %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v2f64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:_(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FSUB %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v4f32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FMUL %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v2f64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FMUL %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v4f32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:_(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FDIV %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v2f64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:_(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FDIV %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir
new file mode 100644
index 000000000000..b874df19e13c
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir
@@ -0,0 +1,253 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
+--- |
+
+  declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>)
+  define void @fadd_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+
+  declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>)
+  define void @fadd_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>)
+  define void @fsub_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+
+  declare <2 x double> @llvm.mips.fsub.d(<2 x double>, <2 x double>)
+  define void @fsub_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>)
+  define void @fmul_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+
+  declare <2 x double> @llvm.mips.fmul.d(<2 x double>, <2 x double>)
+  define void @fmul_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  declare <4 x float> @llvm.mips.fdiv.w(<4 x float>, <4 x float>)
+  define void @fdiv_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+
+  declare <2 x double> @llvm.mips.fdiv.d(<2 x double>, <2 x double>)
+  define void @fdiv_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+...
+---
+name:            fadd_v4f32_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v4f32_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fadd.w), %3(<4 x s32>), %4(<4 x s32>)
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fadd_v2f64_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v2f64_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fadd.d), %3(<2 x s64>), %4(<2 x s64>)
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v4f32_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v4f32_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:_(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fsub.w), %3(<4 x s32>), %4(<4 x s32>)
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v2f64_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v2f64_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:_(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fsub.d), %3(<2 x s64>), %4(<2 x s64>)
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v4f32_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v4f32_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fmul.w), %3(<4 x s32>), %4(<4 x s32>)
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v2f64_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v2f64_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fmul.d), %3(<2 x s64>), %4(<2 x s64>)
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v4f32_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v4f32_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:_(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fdiv.w), %3(<4 x s32>), %4(<4 x s32>)
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v2f64_builtin
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v2f64_builtin
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:_(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fdiv.d), %3(<2 x s64>), %4(<2 x s64>)
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations.ll
new file mode 100644
index 000000000000..6a7486fca5b4
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
+
+define void @fadd_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fadd_v4f32:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fadd.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %add = fadd <4 x float> %0, %1
+  store <4 x float> %add, <4 x float>* %c, align 16
+  ret void
+}
+
+
+define void @fadd_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fadd_v2f64:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fadd.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %add = fadd <2 x double> %0, %1
+  store <2 x double> %add, <2 x double>* %c, align 16
+  ret void
+}
+
+
+define void @fsub_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fsub_v4f32:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fsub.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %sub = fsub <4 x float> %0, %1
+  store <4 x float> %sub, <4 x float>* %c, align 16
+  ret void
+}
+
+
+define void @fsub_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fsub_v2f64:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fsub.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %sub = fsub <2 x double> %0, %1
+  store <2 x double> %sub, <2 x double>* %c, align 16
+  ret void
+}
+
+
+define void @fmul_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fmul_v4f32:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fmul.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %mul = fmul <4 x float> %0, %1
+  store <4 x float> %mul, <4 x float>* %c, align 16
+  ret void
+}
+
+
+define void @fmul_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fmul_v2f64:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fmul.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %mul = fmul <2 x double> %0, %1
+  store <2 x double> %mul, <2 x double>* %c, align 16
+  ret void
+}
+
+
+define void @fdiv_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fdiv_v4f32:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fdiv.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %div = fdiv <4 x float> %0, %1
+  store <4 x float> %div, <4 x float>* %c, align 16
+  ret void
+}
+
+
+define void @fdiv_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fdiv_v2f64:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fdiv.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %div = fdiv <2 x double> %0, %1
+  store <2 x double> %div, <2 x double>* %c, align 16
+  ret void
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations_builtin.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations_builtin.ll
new file mode 100644
index 000000000000..ad7a710be148
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/floating_point_vec_arithmetic_operations_builtin.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
+
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>)
+define void @fadd_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fadd_v4f32_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fadd.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %c, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>)
+define void @fadd_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fadd_v2f64_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fadd.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %c, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>)
+define void @fsub_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fsub_v4f32_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fsub.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %2 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %c, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fsub.d(<2 x double>, <2 x double>)
+define void @fsub_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fsub_v2f64_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fsub.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %2 = tail call <2 x double> @llvm.mips.fsub.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %c, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>)
+define void @fmul_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fmul_v4f32_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fmul.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %c, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fmul.d(<2 x double>, <2 x double>)
+define void @fmul_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fmul_v2f64_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fmul.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %2 = tail call <2 x double> @llvm.mips.fmul.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %c, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.mips.fdiv.w(<4 x float>, <4 x float>)
+define void @fdiv_v4f32_builtin(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) {
+; P5600-LABEL: fdiv_v4f32_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.w $w0, 0($4)
+; P5600-NEXT:    ld.w $w1, 0($5)
+; P5600-NEXT:    fdiv.w $w0, $w0, $w1
+; P5600-NEXT:    st.w $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %1 = load <4 x float>, <4 x float>* %b, align 16
+  %2 = tail call <4 x float> @llvm.mips.fdiv.w(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* %c, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.mips.fdiv.d(<2 x double>, <2 x double>)
+define void @fdiv_v2f64_builtin(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) {
+; P5600-LABEL: fdiv_v2f64_builtin:
+; P5600:       # %bb.0: # %entry
+; P5600-NEXT:    ld.d $w0, 0($4)
+; P5600-NEXT:    ld.d $w1, 0($5)
+; P5600-NEXT:    fdiv.d $w0, $w0, $w1
+; P5600-NEXT:    st.d $w0, 0($6)
+; P5600-NEXT:    jr $ra
+; P5600-NEXT:    nop
+entry:
+  %0 = load <2 x double>, <2 x double>* %a, align 16
+  %1 = load <2 x double>, <2 x double>* %b, align 16
+  %2 = tail call <2 x double> @llvm.mips.fdiv.d(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* %c, align 16
+  ret void
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir
new file mode 100644
index 000000000000..6cdadb0f4877
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir
@@ -0,0 +1,249 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
+--- |
+
+  define void @fadd_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fadd_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fsub_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fsub_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fmul_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fmul_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+  define void @fdiv_v4f32(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c) { entry: ret void }
+  define void @fdiv_v2f64(<2 x double>* %a, <2 x double>* %b, <2 x double>* %c) { entry: ret void }
+
+...
+---
+name:            fadd_v4f32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:fprb(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FADD %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fadd_v2f64
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fadd_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FADD:%[0-9]+]]:fprb(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FADD %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v4f32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:fprb(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FSUB %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fsub_v2f64
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fsub_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FSUB:%[0-9]+]]:fprb(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FSUB %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v4f32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:fprb(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FMUL %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fmul_v2f64
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fmul_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FMUL:%[0-9]+]]:fprb(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FMUL %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v4f32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v4f32
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:fprb(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<4 x s32>) = G_FDIV %3, %4
+    G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...
+---
+name:            fdiv_v2f64
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2
+
+    ; P5600-LABEL: name: fdiv_v2f64
+    ; P5600: liveins: $a0, $a1, $a2
+    ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2
+    ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
+    ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b)
+    ; P5600: [[FDIV:%[0-9]+]]:fprb(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]]
+    ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c)
+    ; P5600: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(p0) = COPY $a2
+    %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
+    %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b)
+    %5:_(<2 x s64>) = G_FDIV %3, %4
+    G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c)
+    RetRA
+
+...