[SystemZ] Improve handling of vector alignments.

Make the DataLayout string always hold a vector alignment of 8 bytes,
regardless of the vector ABI. This makes the datalayout depend only on the
target triple which is the general expectation (in assertions).

On older architectures where vectors use the natural alignment (16 bytes),
the front end will maintain the same behavior and produce an overalignment
compared to the datalayout.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D131158
This commit is contained in:
Jonas Paulsson 2022-08-04 12:16:44 +02:00
parent e09c750498
commit de0e3117d4
6 changed files with 132 additions and 75 deletions

View File

@ -51,13 +51,13 @@ public:
// All vector types are default aligned on an 8-byte boundary, even if the
// vector facility is not available. That is different from Linux.
MaxVectorAlign = 64;
// Compared to Linux/ELF, the data layout differs only in some details:
// - name mangling is GOFF
// - 128 bit vector types are 64 bit aligned
// Compared to Linux/ELF, the data layout differs only in that name
// mangling is GOFF.
resetDataLayout(
"E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
} else
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
"-v128:64-a:8:16-n32:64");
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
HasStrictFP = true;
}
@ -171,12 +171,14 @@ public:
}
HasVector &= !SoftFloat;
// If we use the vector ABI, vector types are 64-bit aligned.
if (HasVector && !getTriple().isOSzOS()) {
// If we use the vector ABI, vector types are 64-bit aligned. The
// DataLayout string is always set to this alignment as it is not a
// requirement that it follows the alignment emitted by the front end. It
// is assumed generally that the Datalayout should reflect only the
// target triple and not any specific feature.
if (HasVector && !getTriple().isOSzOS())
MaxVectorAlign = 64;
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
"-v128:64-a:8:16-n32:64");
}
return true;
}

View File

@ -0,0 +1,68 @@
// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-llvm \
// RUN: | FileCheck %s -check-prefix=VECIR
// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature +vector -emit-obj -S \
// RUN: | FileCheck %s -check-prefix=VECASM
// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-llvm \
// RUN: | FileCheck %s -check-prefix=SCALIR
// RUN: %clang_cc1 -triple s390x-linux-gnu %s -o - -target-feature -vector -emit-obj -S \
// RUN: | FileCheck %s -check-prefix=SCALASM
typedef __attribute__((vector_size(16))) signed int vec_sint;
volatile vec_sint GlobVsi;
struct S {
int A;
vec_sint Vsi;
} GlobS;
void fun() {
GlobS.Vsi = GlobVsi;
}
// VECIR: %struct.S = type { i32, <4 x i32> }
// VECIR: @GlobVsi = global <4 x i32> zeroinitializer, align 8
// VECIR: @GlobS = global %struct.S zeroinitializer, align 8
// VECIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 8
// VECIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 1), align 8
// VECASM: lgrl %r1, GlobVsi@GOT
// VECASM-NEXT: vl %v0, 0(%r1), 3
// VECASM-NEXT: lgrl %r1, GlobS@GOT
// VECASM-NEXT: vst %v0, 8(%r1), 3
//
// VECASM: .globl GlobVsi
// VECASM: .p2align 3
// VECASM: GlobVsi:
// VECASM: .space 16
// VECASM: .globl GlobS
// VECASM: .p2align 3
// VECASM: GlobS:
// VECASM: .space 24
// SCALIR: %struct.S = type { i32, [12 x i8], <4 x i32> }
// SCALIR: @GlobVsi = global <4 x i32> zeroinitializer, align 16
// SCALIR: @GlobS = global %struct.S zeroinitializer, align 16
// SCALIR: %0 = load volatile <4 x i32>, ptr @GlobVsi, align 16
// SCALIR: store <4 x i32> %0, ptr getelementptr inbounds (%struct.S, ptr @GlobS, i32 0, i32 2), align 16
// SCALASM: lgrl %r1, GlobVsi@GOT
// SCALASM-NEXT: l %r0, 0(%r1)
// SCALASM-NEXT: l %r2, 4(%r1)
// SCALASM-NEXT: l %r3, 8(%r1)
// SCALASM-NEXT: l %r4, 12(%r1)
// SCALASM-NEXT: lgrl %r1, GlobS@GOT
// SCALASM-NEXT: st %r4, 28(%r1)
// SCALASM-NEXT: st %r3, 24(%r1)
// SCALASM-NEXT: st %r2, 20(%r1)
// SCALASM-NEXT: st %r0, 16(%r1)
//
// SCALASM: .globl GlobVsi
// SCALASM: .p2align 4
// SCALASM: GlobVsi:
// SCALASM: .space 16
// SCALASM: .globl GlobS
// SCALASM: .p2align 4
// SCALASM: GlobS:
// SCALASM: .space 32

View File

@ -223,7 +223,7 @@
// RUN: FileCheck %s -check-prefix=SYSTEMZ
// RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -target-feature +soft-float -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SYSTEMZ
// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"
// SYSTEMZ: target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
// RUN: %clang_cc1 -triple s390x-unknown -target-cpu z13 -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SYSTEMZ-VECTOR

View File

@ -42,37 +42,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
initializeSystemZTDCPassPass(PR);
}
// Determine whether we use the vector ABI.
static bool UsesVectorABI(StringRef CPU, StringRef FS) {
// We use the vector ABI whenever the vector facility is avaiable.
// This is the case by default if CPU is z13 or later, and can be
// overridden via "[+-]vector" feature string elements.
bool VectorABI = true;
bool SoftFloat = false;
if (CPU.empty() || CPU == "generic" ||
CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
VectorABI = false;
SmallVector<StringRef, 3> Features;
FS.split(Features, ',', -1, false /* KeepEmpty */);
for (auto &Feature : Features) {
if (Feature == "vector" || Feature == "+vector")
VectorABI = true;
if (Feature == "-vector")
VectorABI = false;
if (Feature == "soft-float" || Feature == "+soft-float")
SoftFloat = true;
if (Feature == "-soft-float")
SoftFloat = false;
}
return VectorABI && !SoftFloat;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
StringRef FS) {
bool VectorABI = UsesVectorABI(CPU, FS);
static std::string computeDataLayout(const Triple &TT) {
std::string Ret;
// Big endian.
@ -92,10 +62,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// 128-bit floats are aligned only to 64 bits.
Ret += "-f128:64";
// When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
// bits. On z/OS, vector types are always aligned to 64 bits.
if (VectorABI || TT.isOSzOS())
Ret += "-v128:64";
// The DataLayout string always holds a vector alignment of 64 bits, see
// comment in clang/lib/Basic/Targets/SystemZ.h.
Ret += "-v128:64";
// We prefer 16 bits of aligned for all globals; see above.
Ret += "-a:8:16";
@ -174,7 +143,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(
T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
OL),

View File

@ -38,11 +38,11 @@ entry:
attributes #3 = { "target-cpu"="z14" "target-features"="+vector" "use-soft-float"="false" }
define <2 x double> @fun3(<2 x double>* %A) #3 {
; CHECK-LABEL: fun3:
; DEFAULT: vl %v24, 0(%r2), 4
; DEFAULT: vl %v24, 0(%r2), 3
; SOFT-FLOAT: lg %r0, 0(%r2)
; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
; SOFT-FLOAT-NEXT: lgr %r2, %r0
; NO-SOFT-FL: vl %v24, 0(%r2), 4
; NO-SOFT-FL: vl %v24, 0(%r2), 3
; NO-VECTOR: ld %f0, 0(%r2)
; NO-VECTOR-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: br %r14
@ -111,11 +111,11 @@ entry:
attributes #7 = { "target-cpu"="zEC12" "target-features"="+vector" "use-soft-float"="false" }
define <2 x double> @fun7(<2 x double>* %A) #7 {
; CHECK-LABEL: fun7:
; DEFAULT: vl %v24, 0(%r2), 4
; DEFAULT: vl %v24, 0(%r2), 3
; SOFT-FLOAT: lg %r0, 0(%r2)
; SOFT-FLOAT-NEXT: lg %r3, 8(%r2)
; SOFT-FLOAT-NEXT: lgr %r2, %r0
; NO-SOFT-FL: vl %v24, 0(%r2), 4
; NO-SOFT-FL: vl %v24, 0(%r2), 3
; NO-VECTOR: ld %f0, 0(%r2)
; NO-VECTOR-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: br %r14

View File

@ -1,55 +1,73 @@
; Verify that we use the vector ABI datalayout if and only if
; the vector facility is present.
; Verify that a struct as generated by the frontend is correctly accessed in
; both cases of enabling/disabling the vector facility.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=+soft-float | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -mattr=soft-float,-soft-float | \
; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-VECTOR %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -mattr=-soft-float,soft-float | \
; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
; RUN: FileCheck -check-prefixes=CHECK,CHECK-NOVECTOR %s
%struct.S = type { i8, <2 x i64> }
%struct.S_vx = type { i8, <2 x i64> }
%struct.S_novx = type { i8, [15 x i8], <2 x i64> }
define void @test(%struct.S* %s) nounwind {
; CHECK-VECTOR-LABEL: @test
define void @fun_vx(%struct.S_vx* %s) nounwind {
; CHECK-LABEL: @fun_vx
;
; CHECK-VECTOR: vl %v0, 8(%r2)
; CHECK-NOVECTOR-LABEL: @test
; CHECK-VECTOR: vst %v0, 8(%r2), 3
;
; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
%ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
; CHECK-NOVECTOR-DAG: agsi 8(%r2), 1
%ptr = getelementptr %struct.S_vx, %struct.S_vx* %s, i64 0, i32 1
%vec = load <2 x i64>, <2 x i64>* %ptr
%add = add <2 x i64> %vec, <i64 1, i64 1>
store <2 x i64> %add, <2 x i64>* %ptr
ret void
}
define void @fun_novx(%struct.S_novx* %s) nounwind {
; CHECK-LABEL: @fun_novx
;
; CHECK-VECTOR: vl %v0, 16(%r2), 3
; CHECK-VECTOR: vst %v0, 16(%r2), 3
;
; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
%ptr = getelementptr %struct.S_novx, %struct.S_novx* %s, i64 0, i32 2
%vec = load <2 x i64>, <2 x i64>* %ptr
%add = add <2 x i64> %vec, <i64 1, i64 1>
store <2 x i64> %add, <2 x i64>* %ptr