Fix argument ABI for overaligned structs on ppc64le

When passing a 16 (or higher) aligned struct by value on ppc64le,
it needs to be passed as an array of `i128` rather than an array
of `i64`. This will force the use of an even starting register.

For the case of a 16 byte struct with alignment 16 it is important
that `[1 x i128]` is used instead of `i128` -- apparently, the
latter will get treated similarly to `[2 x i64]`, not exhibiting
the correct ABI. Add a `force_array` flag to `Uniform` to support
this.

The relevant clang code can be found here:
fe2119a7b0/clang/lib/CodeGen/Targets/PPC.cpp (L878-L884)
fe2119a7b0/clang/lib/CodeGen/Targets/PPC.cpp (L780-L784)

I think the corresponding psABI wording is this:

> Fixed size aggregates and unions passed by value are mapped to as
> many doublewords of the parameter save area as the value uses in
> memory. Aggregrates and unions are aligned according to their
> alignment requirements. This may result in doublewords being
> skipped for alignment.

In particular the last sentence.

Fixes https://github.com/rust-lang/rust/issues/122767.
This commit is contained in:
Nikita Popov 2024-03-20 14:38:32 +01:00
parent a2c72ce594
commit 009280c5e3
15 changed files with 152 additions and 33 deletions

View File

@ -150,7 +150,7 @@ impl LlvmType for CastTarget {
// Simplify to a single unit or an array if there's no prefix.
// This produces the same layout, but using a simpler type.
if self.prefix.iter().all(|x| x.is_none()) {
if rest_count == 1 {
if rest_count == 1 && !self.rest.force_array {
return rest_ll_unit;
}

View File

@ -31,7 +31,7 @@ where
RegKind::Vector => size.bits() == 64 || size.bits() == 128,
};
valid_unit.then_some(Uniform { unit, total: size })
valid_unit.then_some(Uniform { unit, total: size, force_array: false })
})
}
@ -60,7 +60,7 @@ where
let size = ret.layout.size;
let bits = size.bits();
if bits <= 128 {
ret.cast_to(Uniform { unit: Reg::i64(), total: size });
ret.cast_to(Uniform { unit: Reg::i64(), total: size, force_array: false });
return;
}
ret.make_indirect();
@ -100,9 +100,9 @@ where
};
if size.bits() <= 128 {
if align.bits() == 128 {
arg.cast_to(Uniform { unit: Reg::i128(), total: size });
arg.cast_to(Uniform { unit: Reg::i128(), total: size, force_array: false });
} else {
arg.cast_to(Uniform { unit: Reg::i64(), total: size });
arg.cast_to(Uniform { unit: Reg::i64(), total: size, force_array: false });
}
return;
}

View File

@ -21,7 +21,7 @@ where
RegKind::Vector => size.bits() == 64 || size.bits() == 128,
};
valid_unit.then_some(Uniform { unit, total: size })
valid_unit.then_some(Uniform { unit, total: size, force_array: false })
})
}
@ -49,7 +49,7 @@ where
let size = ret.layout.size;
let bits = size.bits();
if bits <= 32 {
ret.cast_to(Uniform { unit: Reg::i32(), total: size });
ret.cast_to(Uniform { unit: Reg::i32(), total: size, force_array: false });
return;
}
ret.make_indirect();
@ -78,7 +78,11 @@ where
let align = arg.layout.align.abi.bytes();
let total = arg.layout.size;
arg.cast_to(Uniform { unit: if align <= 4 { Reg::i32() } else { Reg::i64() }, total });
arg.cast_to(Uniform {
unit: if align <= 4 { Reg::i32() } else { Reg::i64() },
total,
force_array: false,
});
}
pub fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>)

View File

@ -18,7 +18,7 @@ fn classify_ret<Ty>(arg: &mut ArgAbi<'_, Ty>) {
if total.bits() > 64 {
arg.make_indirect();
} else if total.bits() > 32 {
arg.cast_to(Uniform { unit: Reg::i32(), total });
arg.cast_to(Uniform { unit: Reg::i32(), total, force_array: false });
} else {
arg.cast_to(Reg::i32());
}
@ -38,7 +38,7 @@ fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
if arg.layout.is_aggregate() {
let total = arg.layout.size;
if total.bits() > 32 {
arg.cast_to(Uniform { unit: Reg::i32(), total });
arg.cast_to(Uniform { unit: Reg::i32(), total, force_array: false });
} else {
arg.cast_to(Reg::i32());
}

View File

@ -195,7 +195,11 @@ where
if total.bits() <= xlen {
arg.cast_to(xlen_reg);
} else {
arg.cast_to(Uniform { unit: xlen_reg, total: Size::from_bits(xlen * 2) });
arg.cast_to(Uniform {
unit: xlen_reg,
total: Size::from_bits(xlen * 2),
force_array: false,
});
}
return false;
}
@ -281,6 +285,7 @@ fn classify_arg<'a, Ty, C>(
arg.cast_to(Uniform {
unit: if align_regs { double_xlen_reg } else { xlen_reg },
total: Size::from_bits(xlen * 2),
force_array: false,
});
}
if align_regs && is_vararg {

View File

@ -27,7 +27,10 @@ where
if arg.layout.is_aggregate() {
let pad_i32 = !offset.is_aligned(align);
arg.cast_to_and_pad_i32(Uniform { unit: Reg::i32(), total: size }, pad_i32);
arg.cast_to_and_pad_i32(
Uniform { unit: Reg::i32(), total: size, force_array: false },
pad_i32,
);
} else {
arg.extend_integer_width_to(32);
}

View File

@ -68,7 +68,7 @@ where
}
// Cast to a uniform int structure
ret.cast_to(Uniform { unit: Reg::i64(), total: size });
ret.cast_to(Uniform { unit: Reg::i64(), total: size, force_array: false });
} else {
ret.make_indirect();
}
@ -139,7 +139,7 @@ where
let rest_size = size - Size::from_bytes(8) * prefix_index as u64;
arg.cast_to(CastTarget {
prefix,
rest: Uniform { unit: Reg::i64(), total: rest_size },
rest: Uniform { unit: Reg::i64(), total: rest_size, force_array: false },
attrs: ArgAttributes {
regular: ArgAttribute::default(),
arg_ext: ArgExtension::None,

View File

@ -255,11 +255,14 @@ pub struct Uniform {
/// for 64-bit integers with a total size of 20 bytes. When the argument is actually passed,
/// this size will be rounded up to the nearest multiple of `unit.size`.
pub total: Size,
/// Force the use of an array, even if there is only a single element.
pub force_array: bool,
}
impl From<Reg> for Uniform {
fn from(unit: Reg) -> Uniform {
Uniform { unit, total: unit.size }
Uniform { unit, total: unit.size, force_array: false }
}
}

View File

@ -35,7 +35,7 @@ where
16 => Reg::i128(),
_ => unreachable!("Align is given as power of 2 no larger than 16 bytes"),
};
arg.cast_to(Uniform { unit, total: Size::from_bytes(2 * align_bytes) });
arg.cast_to(Uniform { unit, total: Size::from_bytes(2 * align_bytes), force_array: false });
} else {
// FIXME: find a better way to do this. See https://github.com/rust-lang/rust/issues/117271.
arg.make_direct_deprecated();

View File

@ -2,7 +2,7 @@
// Alignment of 128 bit types is not currently handled, this will
// need to be fixed when PowerPC vector support is added.
use crate::abi::call::{ArgAbi, FnAbi, Reg, RegKind, Uniform};
use crate::abi::call::{Align, ArgAbi, FnAbi, Reg, RegKind, Uniform};
use crate::abi::{Endian, HasDataLayout, TyAbiInterface};
use crate::spec::HasTargetSpec;
@ -37,7 +37,7 @@ where
RegKind::Vector => arg.layout.size.bits() == 128,
};
valid_unit.then_some(Uniform { unit, total: arg.layout.size })
valid_unit.then_some(Uniform { unit, total: arg.layout.size, force_array: false })
})
}
@ -81,7 +81,7 @@ where
Reg::i64()
};
ret.cast_to(Uniform { unit, total: size });
ret.cast_to(Uniform { unit, total: size, force_array: false });
return;
}
@ -108,18 +108,21 @@ where
}
let size = arg.layout.size;
let (unit, total) = if size.bits() <= 64 {
if size.bits() <= 64 {
// Aggregates smaller than a doubleword should appear in
// the least-significant bits of the parameter doubleword.
(Reg { kind: RegKind::Integer, size }, size)
arg.cast_to(Reg { kind: RegKind::Integer, size })
} else {
// Aggregates larger than a doubleword should be padded
// at the tail to fill out a whole number of doublewords.
let reg_i64 = Reg::i64();
(reg_i64, size.align_to(reg_i64.align(cx)))
// Aggregates larger than i64 should be padded at the tail to fill out a whole number
// of i64s or i128s, depending on the aggregate alignment. Always use an array for
// this, even if there is only a single element.
let reg = if arg.layout.align.abi.bytes() > 8 { Reg::i128() } else { Reg::i64() };
arg.cast_to(Uniform {
unit: reg,
total: size.align_to(Align::from_bytes(reg.size.bytes()).unwrap()),
force_array: true,
})
};
arg.cast_to(Uniform { unit, total });
}
pub fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>)

View File

@ -201,7 +201,11 @@ where
if total.bits() <= xlen {
arg.cast_to(xlen_reg);
} else {
arg.cast_to(Uniform { unit: xlen_reg, total: Size::from_bits(xlen * 2) });
arg.cast_to(Uniform {
unit: xlen_reg,
total: Size::from_bits(xlen * 2),
force_array: false,
});
}
return false;
}
@ -287,6 +291,7 @@ fn classify_arg<'a, Ty, C>(
arg.cast_to(Uniform {
unit: if align_regs { double_xlen_reg } else { xlen_reg },
total: Size::from_bits(xlen * 2),
force_array: false,
});
}
if align_regs && is_vararg {

View File

@ -27,7 +27,10 @@ where
if arg.layout.is_aggregate() {
let pad_i32 = !offset.is_aligned(align);
arg.cast_to_and_pad_i32(Uniform { unit: Reg::i32(), total: size }, pad_i32);
arg.cast_to_and_pad_i32(
Uniform { unit: Reg::i32(), total: size, force_array: false },
pad_i32,
);
} else {
arg.extend_integer_width_to(32);
}

View File

@ -192,7 +192,7 @@ where
arg.cast_to(CastTarget {
prefix: data.prefix,
rest: Uniform { unit: Reg::i64(), total: rest_size },
rest: Uniform { unit: Reg::i64(), total: rest_size, force_array: false },
attrs: ArgAttributes {
regular: data.arg_attribute,
arg_ext: ArgExtension::None,
@ -205,7 +205,7 @@ where
}
}
arg.cast_to(Uniform { unit: Reg::i64(), total });
arg.cast_to(Uniform { unit: Reg::i64(), total, force_array: false });
}
pub fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>)

View File

@ -1,4 +1,4 @@
use crate::abi::call::{ArgAbi, FnAbi, Uniform};
use crate::abi::call::{ArgAbi, FnAbi};
use crate::abi::{HasDataLayout, TyAbiInterface};
fn unwrap_trivial_aggregate<'a, Ty, C>(cx: &C, val: &mut ArgAbi<'a, Ty>) -> bool
@ -10,7 +10,7 @@ where
if let Some(unit) = val.layout.homogeneous_aggregate(cx).ok().and_then(|ha| ha.unit()) {
let size = val.layout.size;
if unit.size == size {
val.cast_to(Uniform { unit, total: size });
val.cast_to(unit);
return true;
}
}

View File

@ -0,0 +1,93 @@
// Test that structs aligned to 128 bits are passed with the correct ABI on powerpc64le.
// This is similar to aarch64-struct-align-128.rs, but for ppc.
//@ compile-flags: --target powerpc64le-unknown-linux-gnu
//@ needs-llvm-components: powerpc
#![feature(no_core, lang_items)]
#![crate_type = "lib"]
#![no_core]
#[lang="sized"]
trait Sized { }
#[lang="freeze"]
trait Freeze { }
#[lang="copy"]
trait Copy { }
#[repr(C)]
pub struct Align8 {
pub a: u64,
pub b: u64,
}
#[repr(transparent)]
pub struct Transparent8 {
a: Align8
}
#[repr(C)]
pub struct Wrapped8 {
a: Align8,
}
extern "C" {
// CHECK: declare void @test_8([2 x i64], [2 x i64], [2 x i64])
fn test_8(a: Align8, b: Transparent8, c: Wrapped8);
}
#[repr(C)]
#[repr(align(16))]
pub struct Align16 {
pub a: u64,
pub b: u64,
}
#[repr(transparent)]
pub struct Transparent16 {
a: Align16
}
#[repr(C)]
pub struct Wrapped16 {
pub a: Align16,
}
extern "C" {
// It's important that this produces [1 x i128] rather than just i128!
// CHECK: declare void @test_16([1 x i128], [1 x i128], [1 x i128])
fn test_16(a: Align16, b: Transparent16, c: Wrapped16);
}
#[repr(C)]
#[repr(align(32))]
pub struct Align32 {
pub a: u64,
pub b: u64,
pub c: u64,
}
#[repr(transparent)]
pub struct Transparent32 {
a: Align32
}
#[repr(C)]
pub struct Wrapped32 {
pub a: Align32,
}
extern "C" {
// CHECK: declare void @test_32([2 x i128], [2 x i128], [2 x i128])
fn test_32(a: Align32, b: Transparent32, c: Wrapped32);
}
pub unsafe fn main(
a1: Align8, a2: Transparent8, a3: Wrapped8,
b1: Align16, b2: Transparent16, b3: Wrapped16,
c1: Align32, c2: Transparent32, c3: Wrapped32,
) {
test_8(a1, a2, a3);
test_16(b1, b2, b3);
test_32(c1, c2, c3);
}