[AArch64][FIX] f16 indexed patterns encoding restrictions.

2020-04-22 12:34:36 +01:00 · 2020-04-22 12:34:36 +01:00 · 4eca1c06a4
parent c3730ad8fc
commit 4eca1c06a4
2 changed files with 9 additions and 9 deletions
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
@ -121,7 +121,7 @@ float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+// CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
 // COMMONIR:      ret <4 x half> [[FMLA]]
 float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
  return vfma_laneq_f16(a, b, c, 7);
@ -239,7 +239,7 @@ float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
 // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
 // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
 // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
+// CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
 // COMMONIR:      ret <4 x half> [[FMLA]]
 float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
  return vfms_laneq_f16(a, b, c, 7);
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@ -8068,29 +8068,29 @@ multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
  let Predicates = [HasNEON, HasFullFP16] in {
  // Patterns for f16: DUPLANE, DUP scalar and vector_extract.
  def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
-                           (AArch64duplane16 (v8f16 V128:$Rm),
+                           (AArch64duplane16 (v8f16 V128_lo:$Rm),
                                           VectorIndexH:$idx))),
            (!cast<Instruction>(INST # "v8i16_indexed")
-                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexH:$idx)>;
+                V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
  def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
                           (AArch64dup (f16 FPR16Op:$Rm)))),
            (!cast<Instruction>(INST # "v8i16_indexed") V128:$Rd, V128:$Rn,
                (SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>;

  def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
-                           (AArch64duplane16 (v8f16 V128:$Rm),
-                                           VectorIndexS:$idx))),
+                           (AArch64duplane16 (v8f16 V128_lo:$Rm),
+                                           VectorIndexH:$idx))),
            (!cast<Instruction>(INST # "v4i16_indexed")
-                V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+                V64:$Rd, V64:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
  def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
                           (AArch64dup (f16 FPR16Op:$Rm)))),
            (!cast<Instruction>(INST # "v4i16_indexed") V64:$Rd, V64:$Rn,
                (SUBREG_TO_REG (i32 0), FPR16Op:$Rm, hsub), (i64 0))>;

  def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn),
-                         (vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx))),
+                         (vector_extract (v8f16 V128_lo:$Rm), VectorIndexH:$idx))),
            (!cast<Instruction>(INST # "v1i16_indexed") FPR16:$Rd, FPR16:$Rn,
-                V128:$Rm, VectorIndexH:$idx)>;
+                V128_lo:$Rm, VectorIndexH:$idx)>;
  } // Predicates = [HasNEON, HasFullFP16]

  // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.