llvm-project/clang/test/CodeGen/aapcs-bitfield.c

4493 lines
230 KiB
C
Raw Normal View History

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE
// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE
// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS
// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS
// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH
// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH
// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM
// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM
struct st0 {
short c : 7;
};
// LE-LABEL: @st0_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st0_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st0_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st0_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st0_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st0_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st0_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st0_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st0_check_load(struct st0 *m) {
return m->c;
}
// LE-LABEL: @st0_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// LE-NEXT: ret void
//
// BE-LABEL: @st0_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st0_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st0_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st0_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st0_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st0_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st0_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: ret void
//
void st0_check_store(struct st0 *m) {
m->c = 1;
}
struct st1 {
int a : 10;
short c : 6;
};
// LE-LABEL: @st1_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
// LE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st1_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
// BE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
// BE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st1_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st1_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st1_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st1_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st1_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st1_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st1_check_load(struct st1 *m) {
return m->c;
}
// LE-LABEL: @st1_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
// LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @st1_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st1_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st1_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st1_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st1_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st1_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st1_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void st1_check_store(struct st1 *m) {
m->c = 1;
}
struct st2 {
int a : 10;
short c : 7;
};
// LE-LABEL: @st2_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st2_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st2_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st2_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st2_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st2_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st2_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st2_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st2_check_load(struct st2 *m) {
return m->c;
}
// LE-LABEL: @st2_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// LE-NEXT: ret void
//
// BE-LABEL: @st2_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st2_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st2_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st2_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st2_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st2_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st2_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2
// BEWIDTHNUM-NEXT: ret void
//
void st2_check_store(struct st2 *m) {
m->c = 1;
}
// Volatile access is allowed to use 16 bits
struct st3 {
volatile short c : 7;
};
// LE-LABEL: @st3_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st3_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st3_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st3_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st3_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st3_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st3_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st3_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st3_check_load(struct st3 *m) {
return m->c;
}
// LE-LABEL: @st3_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
// LE-NEXT: ret void
//
// BE-LABEL: @st3_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st3_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st3_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st3_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st3_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
// BEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st3_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st3_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
// BEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
// BEWIDTHNUM-NEXT: ret void
//
void st3_check_store(struct st3 *m) {
m->c = 1;
}
// Volatile access to st4.c should use a char ld/st
struct st4 {
int b : 9;
volatile char c : 5;
};
// LE-LABEL: @st4_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2
// LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
// LE-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
// LE-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
// LE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st4_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
// BE-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
// BE-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
// BE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st4_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
// LENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st4_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
// BENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st4_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st4_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st4_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st4_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st4_check_load(struct st4 *m) {
return m->c;
}
// LE-LABEL: @st4_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @st4_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4
// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st4_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st4_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4
// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st4_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st4_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4
// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st4_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st4_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4
// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void st4_check_store(struct st4 *m) {
m->c = 1;
}
// LE-LABEL: @st4_check_nonv_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @st4_check_nonv_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st4_check_nonv_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st4_check_nonv_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st4_check_nonv_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st4_check_nonv_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st4_check_nonv_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st4_check_nonv_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void st4_check_nonv_store(struct st4 *m) {
m->b = 1;
}
struct st5 {
int a : 12;
volatile char c : 5;
};
// LE-LABEL: @st5_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[CONV]]
//
// BE-LABEL: @st5_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[CONV]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st5_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[CONV]]
//
// BENUMLOADS-LABEL: @st5_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[CONV]]
//
// LEWIDTH-LABEL: @st5_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: ret i32 [[CONV]]
//
// BEWIDTH-LABEL: @st5_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: ret i32 [[CONV]]
//
// LEWIDTHNUM-LABEL: @st5_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
// BEWIDTHNUM-LABEL: @st5_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[CONV]]
//
int st5_check_load(struct st5 *m) {
return m->c;
}
// LE-LABEL: @st5_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// LE-NEXT: ret void
//
// BE-LABEL: @st5_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
// BE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st5_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st5_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st5_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st5_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st5_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st5_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2
// BEWIDTHNUM-NEXT: ret void
//
void st5_check_store(struct st5 *m) {
m->c = 1;
}
struct st6 {
int a : 12;
char b;
int c : 5;
};
// LE-LABEL: @st6_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
// LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// LE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// LE-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
// LE-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
// LE-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
// LE-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
// LE-NEXT: ret i32 [[ADD5]]
//
// BE-LABEL: @st6_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
// BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// BE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// BE-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
// BE-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
// BE-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
// BE-NEXT: ret i32 [[ADD4]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st6_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
// LENUMLOADS-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
// LENUMLOADS-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
// LENUMLOADS-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
// LENUMLOADS-NEXT: ret i32 [[ADD5]]
//
// BENUMLOADS-LABEL: @st6_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// BENUMLOADS-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
// BENUMLOADS-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
// BENUMLOADS-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
// BENUMLOADS-NEXT: ret i32 [[ADD4]]
//
// LEWIDTH-LABEL: @st6_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
// LEWIDTH-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
// LEWIDTH-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
// LEWIDTH-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
// LEWIDTH-NEXT: ret i32 [[ADD5]]
//
// BEWIDTH-LABEL: @st6_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// BEWIDTH-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
// BEWIDTH-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
// BEWIDTH-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
// BEWIDTH-NEXT: ret i32 [[ADD4]]
//
// LEWIDTHNUM-LABEL: @st6_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
// LEWIDTHNUM-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
// LEWIDTHNUM-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
// LEWIDTHNUM-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
// LEWIDTHNUM-NEXT: ret i32 [[ADD5]]
//
// BEWIDTHNUM-LABEL: @st6_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
// BEWIDTHNUM-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
// BEWIDTHNUM-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
// BEWIDTHNUM-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
// BEWIDTHNUM-NEXT: ret i32 [[ADD4]]
//
int st6_check_load(volatile struct st6 *m) {
int x = m->a;
x += m->b;
x += m->c;
return x;
}
// LE-LABEL: @st6_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// LE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
// LE-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
// LE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @st6_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
// BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// BE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
// BE-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
// BE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st6_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// LENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
// LENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
// LENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st6_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// BENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
// BENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
// BENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st6_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// LEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
// LEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
// LEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st6_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// BEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
// BEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
// BEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st6_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// LEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// LEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
// LEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
// LEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st6_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
// BEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
// BEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
// BEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
// BEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void st6_check_store(struct st6 *m) {
m->a = 1;
m->b = 2;
m->c = 3;
}
// Nested structs and bitfields.
struct st7a {
char a;
int b : 5;
};
struct st7b {
char x;
volatile struct st7a y;
};
// LE-LABEL: @st7_check_load(
// LE-NEXT: entry:
// LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// LE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// LE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LE-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// LE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// LE-NEXT: ret i32 [[ADD3]]
//
// BE-LABEL: @st7_check_load(
// BE-NEXT: entry:
// BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// BE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// BE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// BE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// BE-NEXT: ret i32 [[ADD3]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st7_check_load(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// LENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// LENUMLOADS-NEXT: ret i32 [[ADD3]]
//
// BENUMLOADS-LABEL: @st7_check_load(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// BENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// BENUMLOADS-NEXT: ret i32 [[ADD3]]
//
// LEWIDTH-LABEL: @st7_check_load(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// LEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// LEWIDTH-NEXT: ret i32 [[ADD3]]
//
// BEWIDTH-LABEL: @st7_check_load(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// BEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// BEWIDTH-NEXT: ret i32 [[ADD3]]
//
// LEWIDTHNUM-LABEL: @st7_check_load(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// LEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// LEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// LEWIDTHNUM-NEXT: ret i32 [[ADD3]]
//
// BEWIDTHNUM-LABEL: @st7_check_load(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
// BEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
// BEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
// BEWIDTHNUM-NEXT: ret i32 [[ADD3]]
//
int st7_check_load(struct st7b *m) {
int r = m->x;
r += m->y.a;
r += m->y.b;
return r;
}
// LE-LABEL: @st7_check_store(
// LE-NEXT: entry:
// LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
// LE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @st7_check_store(
// BE-NEXT: entry:
// BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BE-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
// BE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st7_check_store(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @st7_check_store(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @st7_check_store(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @st7_check_store(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @st7_check_store(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// LEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @st7_check_store(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8
// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
// BEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void st7_check_store(struct st7b *m) {
m->x = 1;
m->y.a = 2;
m->y.b = 3;
}
// Check overflowing assignments to bitfields.
struct st8 {
unsigned f : 16;
};
// LE-LABEL: @st8_check_assignment(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// LE-NEXT: store i16 -1, i16* [[TMP0]], align 4
// LE-NEXT: ret i32 65535
//
// BE-LABEL: @st8_check_assignment(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// BE-NEXT: store i16 -1, i16* [[TMP0]], align 4
// BE-NEXT: ret i32 65535
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @st8_check_assignment(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret i32 65535
//
// BENUMLOADS-LABEL: @st8_check_assignment(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret i32 65535
//
// LEWIDTH-LABEL: @st8_check_assignment(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: ret i32 65535
//
// BEWIDTH-LABEL: @st8_check_assignment(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: ret i32 65535
//
// LEWIDTHNUM-LABEL: @st8_check_assignment(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret i32 65535
//
// BEWIDTHNUM-LABEL: @st8_check_assignment(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret i32 65535
//
int st8_check_assignment(struct st8 *m) {
return m->f = 0xffff;
}
struct st9{
int f : 8;
};
// LE-LABEL: @read_st9(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
// LE-NEXT: ret i32 [[BF_CAST]]
//
// BE-LABEL: @read_st9(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
// BE-NEXT: ret i32 [[BF_CAST]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @read_st9(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// BENUMLOADS-LABEL: @read_st9(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// LEWIDTH-LABEL: @read_st9(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24
// LEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTH-LABEL: @read_st9(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24
// BEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// LEWIDTHNUM-LABEL: @read_st9(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24
// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTHNUM-LABEL: @read_st9(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24
// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
int read_st9(volatile struct st9 *m) {
return m->f;
}
// LE-LABEL: @store_st9(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @store_st9(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @store_st9(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @store_st9(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @store_st9(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @store_st9(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @store_st9(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @store_st9(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void store_st9(volatile struct st9 *m) {
m->f = 1;
}
// LE-LABEL: @increment_st9(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_st9(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_st9(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_st9(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_st9(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_st9(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_st9(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_st9(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_st9(volatile struct st9 *m) {
++m->f;
}
struct st10{
int e : 1;
int f : 8;
};
// LE-LABEL: @read_st10(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7
// LE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
// LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LE-NEXT: ret i32 [[BF_CAST]]
//
// BE-LABEL: @read_st10(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
// BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BE-NEXT: ret i32 [[BF_CAST]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @read_st10(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// LENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// BENUMLOADS-LABEL: @read_st10(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
// BENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// LEWIDTH-LABEL: @read_st10(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
// LEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTH-LABEL: @read_st10(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
// BEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// LEWIDTHNUM-LABEL: @read_st10(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTHNUM-LABEL: @read_st10(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
int read_st10(volatile struct st10 *m) {
return m->f;
}
// LE-LABEL: @store_st10(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2
// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @store_st10(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @store_st10(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2
// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @store_st10(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @store_st10(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @store_st10(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @store_st10(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @store_st10(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void store_st10(volatile struct st10 *m) {
m->f = 1;
}
// LE-LABEL: @increment_st10(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2
// LE-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_st10(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128
// BE-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_st10(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_st10(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_st10(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2
// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_st10(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608
// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_st10(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2
// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_st10(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608
// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_st10(volatile struct st10 *m) {
++m->f;
}
struct st11{
char e;
int f : 16;
};
// LE-LABEL: @read_st11(
// LE-NEXT: entry:
// LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// LE-NEXT: ret i32 [[BF_CAST]]
//
// BE-LABEL: @read_st11(
// BE-NEXT: entry:
// BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// BE-NEXT: ret i32 [[BF_CAST]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @read_st11(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// BENUMLOADS-LABEL: @read_st11(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: ret i32 [[BF_CAST]]
//
// LEWIDTH-LABEL: @read_st11(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// LEWIDTH-NEXT: ret i32 [[BF_CAST]]
//
// BEWIDTH-LABEL: @read_st11(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// BEWIDTH-NEXT: ret i32 [[BF_CAST]]
//
// LEWIDTHNUM-LABEL: @read_st11(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// LEWIDTHNUM-NEXT: ret i32 [[BF_CAST]]
//
// BEWIDTHNUM-LABEL: @read_st11(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
// BEWIDTHNUM-NEXT: ret i32 [[BF_CAST]]
//
int read_st11(volatile struct st11 *m) {
return m->f;
}
// LE-LABEL: @store_st11(
// LE-NEXT: entry:
// LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LE-NEXT: store volatile i16 1, i16* [[F]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @store_st11(
// BE-NEXT: entry:
// BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BE-NEXT: store volatile i16 1, i16* [[F]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @store_st11(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @store_st11(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @store_st11(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @store_st11(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @store_st11(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @store_st11(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void store_st11(volatile struct st11 *m) {
m->f = 1;
}
// LE-LABEL: @increment_st11(
// LE-NEXT: entry:
// LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// LE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_st11(
// BE-NEXT: entry:
// BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// BE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_st11(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
// LENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_st11(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
// BENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_st11(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_st11(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_st11(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
// LEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_st11(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
// BEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_st11(volatile struct st11 *m) {
++m->f;
}
// LE-LABEL: @increment_e_st11(
// LE-NEXT: entry:
// LE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// LE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// LE-NEXT: ret void
//
// BE-LABEL: @increment_e_st11(
// BE-NEXT: entry:
// BE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// BE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_e_st11(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_e_st11(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_e_st11(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_e_st11(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_e_st11(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_e_st11(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
// BEWIDTHNUM-NEXT: ret void
//
void increment_e_st11(volatile struct st11 *m) {
++m->e;
}
struct st12{
int e : 8;
int f : 16;
};
// LE-LABEL: @read_st12(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// LE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// LE-NEXT: ret i32 [[BF_ASHR]]
//
// BE-LABEL: @read_st12(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// BE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// BE-NEXT: ret i32 [[BF_ASHR]]
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @read_st12(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// LENUMLOADS-NEXT: ret i32 [[BF_ASHR]]
//
// BENUMLOADS-LABEL: @read_st12(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// BENUMLOADS-NEXT: ret i32 [[BF_ASHR]]
//
// LEWIDTH-LABEL: @read_st12(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// LEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTH-LABEL: @read_st12(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// BEWIDTH-NEXT: ret i32 [[BF_ASHR]]
//
// LEWIDTHNUM-LABEL: @read_st12(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
// BEWIDTHNUM-LABEL: @read_st12(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]]
//
int read_st12(volatile struct st12 *m) {
return m->f;
}
// LE-LABEL: @store_st12(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @store_st12(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @store_st12(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @store_st12(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @store_st12(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @store_st12(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @store_st12(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @store_st12(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void store_st12(volatile struct st12 *m) {
m->f = 1;
}
// LE-LABEL: @increment_st12(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// LE-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_st12(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// BE-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_st12(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_st12(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_st12(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_st12(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_st12(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_st12(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_st12(volatile struct st12 *m) {
++m->f;
}
// LE-LABEL: @increment_e_st12(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_e_st12(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_e_st12(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_e_st12(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_e_st12(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_e_st12(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_e_st12(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_e_st12(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_e_st12(volatile struct st12 *m) {
++m->e;
}
struct st13 {
char a : 8;
int b : 32;
} __attribute__((packed));
// LE-LABEL: @increment_b_st13(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// LE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// LE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// LE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_b_st13(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// BE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_b_st13(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_b_st13(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_b_st13(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_b_st13(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_b_st13(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_b_st13(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_b_st13(volatile struct st13 *s) {
s->b++;
}
struct st14 {
char a : 8;
} __attribute__((packed));
// LE-LABEL: @increment_a_st14(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_a_st14(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_a_st14(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_a_st14(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_a_st14(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_a_st14(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_a_st14(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_a_st14(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_a_st14(volatile struct st14 *s) {
s->a++;
}
struct st15 {
short a : 8;
} __attribute__((packed));
// LE-LABEL: @increment_a_st15(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_a_st15(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_a_st15(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_a_st15(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_a_st15(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_a_st15(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_a_st15(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_a_st15(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_a_st15(volatile struct st15 *s) {
s->a++;
}
struct st16 {
int a : 32;
int b : 16;
int c : 32;
int d : 16;
};
// LE-LABEL: @increment_a_st16(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_a_st16(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BE-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_a_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_a_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_a_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_a_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_a_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_a_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_a_st16(struct st16 *s) {
s->a++;
}
// LE-LABEL: @increment_b_st16(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LE-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LE-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LE-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LE-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_b_st16(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BE-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BE-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BE-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_b_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_b_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_b_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_b_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_b_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_b_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_b_st16(struct st16 *s) {
s->b++;
}
// LE-LABEL: @increment_c_st16(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_c_st16(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BE-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_c_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_c_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_c_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_c_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_c_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_c_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_c_st16(struct st16 *s) {
s->c++;
}
// LE-LABEL: @increment_d_st16(
// LE-NEXT: entry:
// LE-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LE-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LE-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LE-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LE-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_d_st16(
// BE-NEXT: entry:
// BE-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BE-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BE-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BE-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_d_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_d_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_d_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_d_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_d_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_d_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_d_st16(struct st16 *s) {
s->d++;
}
// LE-LABEL: @increment_v_a_st16(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_a_st16(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_a_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_a_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_a_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_a_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_a_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_a_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_a_st16(volatile struct st16 *s) {
s->a++;
}
// LE-LABEL: @increment_v_b_st16(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LE-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LE-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LE-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_b_st16(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BE-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BE-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BE-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_b_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_b_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_b_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_b_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_b_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_b_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_b_st16(volatile struct st16 *s) {
s->b++;
}
// LE-LABEL: @increment_v_c_st16(
// LE-NEXT: entry:
// LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_c_st16(
// BE-NEXT: entry:
// BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BE-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_c_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_c_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_c_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_c_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_c_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_c_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_c_st16(volatile struct st16 *s) {
s->c++;
}
// LE-LABEL: @increment_v_d_st16(
// LE-NEXT: entry:
// LE-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LE-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LE-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LE-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LE-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
// LE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_d_st16(
// BE-NEXT: entry:
// BE-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BE-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BE-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BE-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BE-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
// BE-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_d_st16(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_d_st16(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536
// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536
// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_d_st16(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_d_st16(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_d_st16(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_d_st16(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_d_st16(volatile struct st16 *s) {
s->d++;
}
// st17 has alignment = 1, the AAPCS defines nothing for the
// accessing of b, but accessing c should use char
struct st17 {
int b : 32;
char c : 8;
} __attribute__((packed));
// LE-LABEL: @increment_v_b_st17(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// LE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LE-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// LE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// LE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_b_st17(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// BE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// BE-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BE-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// BE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// BE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_b_st17(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_b_st17(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_b_st17(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_b_st17(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_b_st17(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_b_st17(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_b_st17(volatile struct st17 *s) {
s->b++;
}
// LE-LABEL: @increment_v_c_st17(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32
// LE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1
// LE-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LE-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32
// LE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295
// LE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_v_c_st17(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1
// BE-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BE-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256
// BE-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BE-NEXT: ret void
//
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// LENUMLOADS-LABEL: @increment_v_c_st17(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1
// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_v_c_st17(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1
// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_v_c_st17(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_v_c_st17(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_v_c_st17(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_v_c_st17(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_v_c_st17(volatile struct st17 *s) {
s->c++;
}
[ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij
2020-08-28 22:08:02 +08:00
// A zero bitfield should block, as the C11 specification
// requires a and b to be different memory positions
struct zero_bitfield {
int a : 8;
char : 0;
int b : 8;
};
// LE-LABEL: @increment_a_zero_bitfield(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_a_zero_bitfield(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BE-NEXT: ret void
//
// LENUMLOADS-LABEL: @increment_a_zero_bitfield(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_a_zero_bitfield(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_a_zero_bitfield(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_a_zero_bitfield(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_a_zero_bitfield(volatile struct zero_bitfield *s) {
s->a++;
}
// LE-LABEL: @increment_b_zero_bitfield(
// LE-NEXT: entry:
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// LE-NEXT: ret void
//
// BE-LABEL: @increment_b_zero_bitfield(
// BE-NEXT: entry:
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// BE-NEXT: ret void
//
// LENUMLOADS-LABEL: @increment_b_zero_bitfield(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_b_zero_bitfield(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_b_zero_bitfield(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_b_zero_bitfield(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_b_zero_bitfield(volatile struct zero_bitfield *s) {
s->b++;
}
// The zero bitfield here does not affect
struct zero_bitfield_ok {
short a : 8;
char a1 : 8;
long : 0;
int b : 24;
};
// LE-LABEL: @increment_a_zero_bitfield_ok(
// LE-NEXT: entry:
// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8
// LE-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8
// LE-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]]
// LE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16
// LE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LE-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8
// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255
// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]]
// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_a_zero_bitfield_ok(
// BE-NEXT: entry:
// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
// BE-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8
// BE-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]]
// BE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16
// BE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256
// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]]
// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BE-NEXT: ret void
//
// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8
// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8
// LENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]]
// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16
// LENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]]
// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
// BENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8
// BENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]]
// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16
// BENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]]
// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
// LEWIDTH-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1
// LEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
// LEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
// BEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
// BEWIDTH-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
// BEWIDTH-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1
// BEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
// BEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1
// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
// LEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1
// LEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
// BEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
// BEWIDTHNUM-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1
// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
// BEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1
// BEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1
// BEWIDTHNUM-NEXT: ret void
//
void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) {
s->a1 += s->a;
}
// LE-LABEL: @increment_b_zero_bitfield_ok(
// LE-NEXT: entry:
// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// LE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LE-NEXT: ret void
//
// BE-LABEL: @increment_b_zero_bitfield_ok(
// BE-NEXT: entry:
// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// BE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BE-NEXT: ret void
//
// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok(
// LENUMLOADS-NEXT: entry:
// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LENUMLOADS-NEXT: ret void
//
// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok(
// BENUMLOADS-NEXT: entry:
// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BENUMLOADS-NEXT: ret void
//
// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok(
// LEWIDTH-NEXT: entry:
// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTH-NEXT: ret void
//
// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok(
// BEWIDTH-NEXT: entry:
// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTH-NEXT: ret void
//
// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok(
// LEWIDTHNUM-NEXT: entry:
// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1
// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// LEWIDTHNUM-NEXT: ret void
//
// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok(
// BEWIDTHNUM-NEXT: entry:
// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
// BEWIDTHNUM-NEXT: ret void
//
void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) {
s->b++;
}