[SystemZ] Improve handling of vector alignments.

Make the DataLayout string always hold a vector alignment of 8 bytes, regardless of the vector ABI. This makes the datalayout depend only on the target triple which is the general expectation (in assertions). On older architectures where vectors use the natural alignment (16 bytes), the front end will maintain the same behavior and produce an overalignment compared to the datalayout. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D131158
2022-08-04 12:16:44 +02:00 · 2022-08-04 12:16:44 +02:00 · de0e3117d4
parent e09c750498
commit de0e3117d4
6 changed files with 132 additions and 75 deletions
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@ -51,13 +51,13 @@ public:
      // All vector types are default aligned on an 8-byte boundary, even if the
      // vector facility is not available. That is different from Linux.
      MaxVectorAlign = 64;
-      // Compared to Linux/ELF, the data layout differs only in some details:
-      // - name mangling is GOFF
-      // - 128 bit vector types are 64 bit aligned
+      // Compared to Linux/ELF, the data layout differs only in that name
+      // mangling is GOFF.
      resetDataLayout(
          "E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
    } else
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
+      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                      "-v128:64-a:8:16-n32:64");
    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
    HasStrictFP = true;
  }
@ -171,12 +171,14 @@ public:
    }
    HasVector &= !SoftFloat;

-    // If we use the vector ABI, vector types are 64-bit aligned.
-    if (HasVector && !getTriple().isOSzOS()) {
+    // If we use the vector ABI, vector types are 64-bit aligned. The
+    // DataLayout string is always set to this alignment as it is not a
+    // requirement that it follows the alignment emitted by the front end. It
+    // is assumed generally that the Datalayout should reflect only the
+    // target triple and not any specific feature.
+    if (HasVector && !getTriple().isOSzOS())
      MaxVectorAlign = 64;
-      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                      "-v128:64-a:8:16-n32:64");
-    }
+
    return true;
  }

--- a/clang/test/CodeGen/SystemZ/align-systemz-02.c
+++ b/clang/test/CodeGen/SystemZ/align-systemz-02.c
@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=VECIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=VECASM
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-llvm \
+// RUN:    | FileCheck %s -check-prefix=SCALIR
+// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-obj -S \
+// RUN:    | FileCheck %s -check-prefix=SCALASM
+
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+
+volatile vec_sint GlobVsi;
+
+struct S {
+  int A;
+  vec_sint Vsi;
+} GlobS;
+
+void fun() {
+  GlobS.Vsi = GlobVsi;
+}
+
+// VECIR: %struct.S = type { i32, <4 x i32> }
+// VECIR: @GlobVsi = global <4 x i32> zeroinitializer, align 8
+// VECIR: @GlobS = global %struct.S zeroinitializer, align 8
+// VECIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 8
+// VECIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 1), align 8
+
+// VECASM:      lgrl %r1, GlobVsi@GOT
+// VECASM-NEXT: vl   %v0, 0(%r1), 3
+// VECASM-NEXT: lgrl %r1, GlobS@GOT
+// VECASM-NEXT: vst  %v0, 8(%r1), 3
+//
+// VECASM:   .globl  GlobVsi
+// VECASM:   .p2align        3
+// VECASM: GlobVsi:
+// VECASM:   .space  16
+// VECASM:   .globl  GlobS
+// VECASM:   .p2align        3
+// VECASM: GlobS:
+// VECASM:   .space  24
+
+// SCALIR: %struct.S = type { i32, [12 x i8], <4 x i32> }
+// SCALIR: @GlobVsi = global <4 x i32> zeroinitializer, align 16
+// SCALIR: @GlobS = global %struct.S zeroinitializer, align 16
+// SCALIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 16
+// SCALIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 2), align 16
+
+// SCALASM:      lgrl    %r1, GlobVsi@GOT
+// SCALASM-NEXT: l       %r0, 0(%r1)
+// SCALASM-NEXT: l       %r2, 4(%r1)
+// SCALASM-NEXT: l       %r3, 8(%r1)
+// SCALASM-NEXT: l       %r4, 12(%r1)
+// SCALASM-NEXT: lgrl    %r1, GlobS@GOT
+// SCALASM-NEXT: st      %r4, 28(%r1)
+// SCALASM-NEXT: st      %r3, 24(%r1)
+// SCALASM-NEXT: st      %r2, 20(%r1)
+// SCALASM-NEXT: st      %r0, 16(%r1)
+//
+// SCALASM:   .globl  GlobVsi
+// SCALASM:   .p2align        4
+// SCALASM: GlobVsi:
+// SCALASM:   .space  16
+// SCALASM:   .globl  GlobS
+// SCALASM:   .p2align        4
+// SCALASM: GlobS:
+// SCALASM:   .space  32
+
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@ -223,7 +223,7 @@
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -target-feature +soft-float -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
-// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
+// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"

 // RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ-VECTOR
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@ -42,37 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
  initializeSystemZTDCPassPass(PR);
 }

-// Determine whether we use the vector ABI.
-static bool UsesVectorABI(StringRef CPU, StringRef FS) {
-  // We use the vector ABI whenever the vector facility is avaiable.
-  // This is the case by default if CPU is z13 or later, and can be
-  // overridden via "[+-]vector" feature string elements.
-  bool VectorABI = true;
-  bool SoftFloat = false;
-  if (CPU.empty() || CPU == "generic" ||
-      CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
-      CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
-    VectorABI = false;
-
-  SmallVector<StringRef, 3> Features;
-  FS.split(Features, ',', -1, false /* KeepEmpty */);
-  for (auto &Feature : Features) {
-    if (Feature == "vector" || Feature == "+vector")
-      VectorABI = true;
-    if (Feature == "-vector")
-      VectorABI = false;
-    if (Feature == "soft-float" || Feature == "+soft-float")
-      SoftFloat = true;
-    if (Feature == "-soft-float")
-      SoftFloat = false;
-  }
-
-  return VectorABI && !SoftFloat;
-}
-
-static std::string computeDataLayout(const Triple &TT, StringRef CPU,
-                                     StringRef FS) {
-  bool VectorABI = UsesVectorABI(CPU, FS);
+static std::string computeDataLayout(const Triple &TT) {
  std::string Ret;

  // Big endian.
@ -92,10 +62,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
  // 128-bit floats are aligned only to 64 bits.
  Ret += "-f128:64";

-  // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
-  // bits. On z/OS, vector types are always aligned to 64 bits.
-  if (VectorABI || TT.isOSzOS())
-    Ret += "-v128:64";
+  // The DataLayout string always holds a vector alignment of 64 bits, see
+  // comment in clang/lib/Basic/Targets/SystemZ.h.
+  Ret += "-v128:64";

  // We prefer 16 bits of aligned for all globals; see above.
  Ret += "-a:8:16";
@ -174,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
                                           Optional<CodeModel::Model> CM,
                                           CodeGenOpt::Level OL, bool JIT)
    : LLVMTargetMachine(
-          T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
+          T, computeDataLayout(TT), TT, CPU, FS, Options,
          getEffectiveRelocModel(RM),
          getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
          OL),
--- a/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
+++ b/llvm/test/CodeGen/SystemZ/function-attributes-01.ll
@ -38,11 +38,11 @@ entry:
 attributes #3 = { "target-cpu"="z14" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun3(<2 x double>* %A) #3 {
 ; CHECK-LABEL:     fun3:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14
@ -111,11 +111,11 @@ entry:
 attributes #7 = { "target-cpu"="zEC12" "target-features"="+vector" "use-soft-float"="false" }
 define <2 x double> @fun7(<2 x double>* %A) #7 {
 ; CHECK-LABEL:     fun7:
-; DEFAULT:         vl %v24, 0(%r2), 4
+; DEFAULT:         vl %v24, 0(%r2), 3
 ; SOFT-FLOAT:      lg %r0, 0(%r2)
 ; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
 ; SOFT-FLOAT-NEXT: lgr %r2, %r0
-; NO-SOFT-FL:      vl %v24, 0(%r2), 4
+; NO-SOFT-FL:      vl %v24, 0(%r2), 3
 ; NO-VECTOR:       ld %f0, 0(%r2)
 ; NO-VECTOR-NEXT:  ld %f2, 8(%r2)
 ; CHECK-NEXT:      br %r14
--- a/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-abi-align.ll
@ -1,55 +1,73 @@
-; Verify that we use the vector ABI datalayout if and only if
-; the vector facility is present.
+; Verify that a struct as generated by the frontend is correctly accessed in
+; both cases of enabling/disabling the vector facility.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s

 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s

 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s

 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=+soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=soft-float,-soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
 ; RUN:   -mattr=-soft-float,soft-float | \
-; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN:   FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s

-%struct.S = type { i8, <2 x i64> }
+%struct.S_vx = type { i8, <2 x i64> }
+%struct.S_novx = type { i8, [15 x i8], <2 x i64> }

-define void @test(%struct.S* %s) nounwind {
-; CHECK-VECTOR-LABEL: @test
+define void @fun_vx(%struct.S_vx* %s) nounwind {
+; CHECK-LABEL: @fun_vx
+;
 ; CHECK-VECTOR: vl %v0, 8(%r2)
-; CHECK-NOVECTOR-LABEL: @test
+; CHECK-VECTOR: vst %v0, 8(%r2), 3
+;
 ; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
-; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
-  %ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
+; CHECK-NOVECTOR-DAG: agsi 8(%r2), 1
+  %ptr = getelementptr %struct.S_vx, %struct.S_vx* %s, i64 0, i32 1
+  %vec = load <2 x i64>, <2 x i64>* %ptr
+  %add = add <2 x i64> %vec, <i64 1, i64 1>
+  store <2 x i64> %add, <2 x i64>* %ptr
+  ret void
+}
+
+define void @fun_novx(%struct.S_novx* %s) nounwind {
+; CHECK-LABEL: @fun_novx
+;
+; CHECK-VECTOR: vl  %v0, 16(%r2), 3
+; CHECK-VECTOR: vst %v0, 16(%r2), 3
+;
+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
+; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
+  %ptr = getelementptr %struct.S_novx, %struct.S_novx* %s, i64 0, i32 2
  %vec = load <2 x i64>, <2 x i64>* %ptr
  %add = add <2 x i64> %vec, <i64 1, i64 1>
  store <2 x i64> %add, <2 x i64>* %ptr