2013-07-04 22:58:42 +08:00
|
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s \
|
2012-12-09 15:26:04 +08:00
|
|
|
// RUN: | FileCheck -check-prefix=CHECK-X86-64 %s
|
2013-07-04 22:58:42 +08:00
|
|
|
// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -emit-llvm -o - %s \
|
2012-12-09 15:26:04 +08:00
|
|
|
// RUN: | FileCheck -check-prefix=CHECK-PPC64 %s
|
2012-12-06 19:14:44 +08:00
|
|
|
//
|
|
|
|
// Tests for bitfield access patterns in C++ with special attention to
|
|
|
|
// conformance to C++11 memory model requirements.
|
|
|
|
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
namespace N0 {
|
|
|
|
// Test basic bitfield layout access across interesting byte and word
|
|
|
|
// boundaries on both little endian and big endian platforms.
|
|
|
|
struct __attribute__((packed)) S {
|
|
|
|
unsigned b00 : 14;
|
|
|
|
unsigned b01 : 2;
|
|
|
|
unsigned b20 : 6;
|
|
|
|
unsigned b21 : 2;
|
|
|
|
unsigned b30 : 30;
|
|
|
|
unsigned b31 : 2;
|
|
|
|
unsigned b70 : 6;
|
|
|
|
unsigned b71 : 2;
|
|
|
|
};
|
|
|
|
unsigned read00(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read00
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[val]], 16383
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read00
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 50
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[shr]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b00;
|
|
|
|
}
|
|
|
|
unsigned read01(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read01
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 14
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read01
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 48
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b01;
|
|
|
|
}
|
|
|
|
unsigned read20(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read20
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 16
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 63
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read20
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 42
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 63
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b20;
|
|
|
|
}
|
|
|
|
unsigned read21(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read21
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 22
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read21
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 40
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b21;
|
|
|
|
}
|
|
|
|
unsigned read30(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read30
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 24
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 1073741823
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read30
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 10
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 1073741823
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b30;
|
|
|
|
}
|
|
|
|
unsigned read31(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read31
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 54
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read31
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 8
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 3
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b31;
|
|
|
|
}
|
|
|
|
unsigned read70(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read70
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 56
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i64 %[[shr]], 63
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read70
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i64 %[[val]], 2
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[shr]], 63
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b70;
|
|
|
|
}
|
|
|
|
unsigned read71(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N06read71
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[shr:.*]] = lshr i64 %[[val]], 62
|
|
|
|
// CHECK-X86-64: %[[trunc:.*]] = trunc i64 %[[shr]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[trunc]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N06read71
|
Add a test case that I've been using to clarify the bitfield layout for
both LE and BE targets.
AFAICT, Clang get's this correct for PPC64. I've compared it to GCC 4.8
output for PPC64 (thanks Roman!) and to my limited ability to read power
assembly, it looks functionally equivalent. It would be really good to
fill in the assertions on this test case for x86-32, PPC32, ARM, etc.,
but I've reached the limit of my time and energy... Hopefully other
folks can chip in as it would be good to have this in place to test any
subsequent changes.
To those who care about PPC64 performance, a side note: there is some
*obnoxiously* bad code generated for these test cases. It would be worth
someone's time to sit down and teach the PPC backend to pattern match
these IR constructs better. It appears that things like '(shr %foo,
<imm>)' turn into 'rldicl R, R, 64-<imm>, <imm>' or some such. They
don't even get combined with other 'rldicl' instructions *immediately
adjacent*. I'll add a couple of these patterns to the README, but
I think it would be better to look at all the patterns produced by this
and other bitfield access code, and systematically build up a collection
of patterns that efficiently reduce them to the minimal code.
llvm-svn: 169693
2012-12-09 18:08:22 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i64*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i64* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[and:.*]] = and i64 %[[val]], 3
|
|
|
|
// CHECK-PPC64: %[[trunc:.*]] = trunc i64 %[[and]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[trunc]]
|
|
|
|
return s->b71;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-06 19:14:44 +08:00
|
|
|
namespace N1 {
|
|
|
|
// Ensure that neither loads nor stores to bitfields are not widened into
|
|
|
|
// other memory locations. (PR13691)
|
|
|
|
//
|
|
|
|
// NOTE: We could potentially widen loads based on their alignment if we are
|
|
|
|
// comfortable requiring that subsequent memory locations within the
|
|
|
|
// alignment-widened load are not volatile.
|
|
|
|
struct S {
|
|
|
|
char a;
|
|
|
|
unsigned b : 1;
|
|
|
|
char c;
|
|
|
|
};
|
|
|
|
unsigned read(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N14read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = getelementptr inbounds %{{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i8* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i8 %[[val]], 1
|
|
|
|
// CHECK-X86-64: %[[ext:.*]] = zext i8 %[[and]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[ext]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N14read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = getelementptr inbounds %{{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i8* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i8 %[[val]], 7
|
|
|
|
// CHECK-PPC64: %[[ext:.*]] = zext i8 %[[shr]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[ext]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return s->b;
|
|
|
|
}
|
|
|
|
void write(S* s, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N15write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = getelementptr inbounds %{{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[x_trunc:.*]] = trunc i32 %{{.*}} to i8
|
|
|
|
// CHECK-X86-64: %[[old:.*]] = load i8* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[x_and:.*]] = and i8 %[[x_trunc]], 1
|
|
|
|
// CHECK-X86-64: %[[old_and:.*]] = and i8 %[[old]], -2
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = or i8 %[[old_and]], %[[x_and]]
|
|
|
|
// CHECK-X86-64: store i8 %[[new]], i8* %[[ptr]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N15write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = getelementptr inbounds %{{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[x_trunc:.*]] = trunc i32 %{{.*}} to i8
|
|
|
|
// CHECK-PPC64: %[[old:.*]] = load i8* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[x_and:.*]] = and i8 %[[x_trunc]], 1
|
|
|
|
// CHECK-PPC64: %[[x_shl:.*]] = shl i8 %[[x_and]], 7
|
|
|
|
// CHECK-PPC64: %[[old_and:.*]] = and i8 %[[old]], 127
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = or i8 %[[old_and]], %[[x_shl]]
|
|
|
|
// CHECK-PPC64: store i8 %[[new]], i8* %[[ptr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
s->b = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace N2 {
|
|
|
|
// Do widen loads and stores to bitfields when those bitfields have padding
|
|
|
|
// within the struct following them.
|
|
|
|
struct S {
|
|
|
|
unsigned b : 24;
|
|
|
|
void *p;
|
|
|
|
};
|
|
|
|
unsigned read(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N24read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 16777215
|
|
|
|
// CHECK-X86-64: ret i32 %[[and]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N24read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i32 %[[val]], 8
|
|
|
|
// CHECK-PPC64: ret i32 %[[shr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return s->b;
|
|
|
|
}
|
|
|
|
void write(S* s, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N25write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-X86-64: %[[old_and:.*]] = and i32 %[[old]], -16777216
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = or i32 %[[old_and]], %[[x_and]]
|
|
|
|
// CHECK-X86-64: store i32 %[[new]], i32* %[[ptr]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N25write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-PPC64: %[[x_shl:.*]] = shl i32 %[[x_and]], 8
|
|
|
|
// CHECK-PPC64: %[[old_and:.*]] = and i32 %[[old]], 255
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = or i32 %[[old_and]], %[[x_shl]]
|
|
|
|
// CHECK-PPC64: store i32 %[[new]], i32* %[[ptr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
s->b = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace N3 {
|
|
|
|
// Do widen loads and stores to bitfields through the trailing padding at the
|
|
|
|
// end of a struct.
|
|
|
|
struct S {
|
|
|
|
unsigned b : 24;
|
|
|
|
};
|
|
|
|
unsigned read(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N34read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 16777215
|
|
|
|
// CHECK-X86-64: ret i32 %[[and]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N34read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i32 %[[val]], 8
|
|
|
|
// CHECK-PPC64: ret i32 %[[shr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return s->b;
|
|
|
|
}
|
|
|
|
void write(S* s, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N35write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-X86-64: %[[old_and:.*]] = and i32 %[[old]], -16777216
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = or i32 %[[old_and]], %[[x_and]]
|
|
|
|
// CHECK-X86-64: store i32 %[[new]], i32* %[[ptr]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N35write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-PPC64: %[[x_shl:.*]] = shl i32 %[[x_and]], 8
|
|
|
|
// CHECK-PPC64: %[[old_and:.*]] = and i32 %[[old]], 255
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = or i32 %[[old_and]], %[[x_shl]]
|
|
|
|
// CHECK-PPC64: store i32 %[[new]], i32* %[[ptr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
s->b = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace N4 {
|
|
|
|
// Do NOT widen loads and stores to bitfields into padding at the end of
|
|
|
|
// a class which might end up with members inside of it when inside a derived
|
|
|
|
// class.
|
|
|
|
struct Base {
|
|
|
|
virtual ~Base() {}
|
|
|
|
|
|
|
|
unsigned b : 24;
|
|
|
|
};
|
|
|
|
// Imagine some other translation unit introduces:
|
|
|
|
#if 0
|
|
|
|
struct Derived : public Base {
|
|
|
|
char c;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
unsigned read(Base* s) {
|
|
|
|
// FIXME: We should widen this load as long as the function isn't being
|
|
|
|
// instrumented by thread-sanitizer.
|
|
|
|
//
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N44read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i24* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[ext:.*]] = zext i24 %[[val]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[ext]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N44read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i24* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[ext:.*]] = zext i24 %[[val]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[ext]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return s->b;
|
|
|
|
}
|
|
|
|
void write(Base* s, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N45write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-X86-64: store i24 %[[new]], i24* %[[ptr]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N45write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-PPC64: store i24 %[[new]], i24* %[[ptr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
s->b = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace N5 {
|
|
|
|
// Widen through padding at the end of a struct even if that struct
|
|
|
|
// participates in a union with another struct which has a separate field in
|
|
|
|
// that location. The reasoning is that if the operation is storing to that
|
|
|
|
// member of the union, it must be the active member, and thus we can write
|
|
|
|
// through the padding. If it is a load, it might be a load of a common
|
|
|
|
// prefix through a non-active member, but in such a case the extra bits
|
|
|
|
// loaded are masked off anyways.
|
|
|
|
union U {
|
|
|
|
struct X { unsigned b : 24; char c; } x;
|
|
|
|
struct Y { unsigned b : 24; } y;
|
|
|
|
};
|
|
|
|
unsigned read(U* u) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N54read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[and:.*]] = and i32 %[[val]], 16777215
|
|
|
|
// CHECK-X86-64: ret i32 %[[and]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N54read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[shr:.*]] = lshr i32 %[[val]], 8
|
|
|
|
// CHECK-PPC64: ret i32 %[[shr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return u->y.b;
|
|
|
|
}
|
|
|
|
void write(U* u, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N55write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-X86-64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-X86-64: %[[old_and:.*]] = and i32 %[[old]], -16777216
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = or i32 %[[old_and]], %[[x_and]]
|
|
|
|
// CHECK-X86-64: store i32 %[[new]], i32* %[[ptr]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N55write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast %{{.*}}* %{{.*}} to i32*
|
|
|
|
// CHECK-PPC64: %[[old:.*]] = load i32* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[x_and:.*]] = and i32 %{{.*}}, 16777215
|
|
|
|
// CHECK-PPC64: %[[x_shl:.*]] = shl i32 %[[x_and]], 8
|
|
|
|
// CHECK-PPC64: %[[old_and:.*]] = and i32 %[[old]], 255
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = or i32 %[[old_and]], %[[x_shl]]
|
|
|
|
// CHECK-PPC64: store i32 %[[new]], i32* %[[ptr]]
|
2012-12-06 19:14:44 +08:00
|
|
|
u->y.b = x;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace N6 {
|
|
|
|
// Zero-length bitfields partition the memory locations of bitfields for the
|
|
|
|
// purposes of the memory model. That means stores must not span zero-length
|
|
|
|
// bitfields and loads may only span them when we are not instrumenting with
|
|
|
|
// thread sanitizer.
|
|
|
|
// FIXME: We currently don't widen loads even without thread sanitizer, even
|
|
|
|
// though we could.
|
|
|
|
struct S {
|
|
|
|
unsigned b1 : 24;
|
|
|
|
unsigned char : 0;
|
|
|
|
unsigned char b2 : 8;
|
|
|
|
};
|
|
|
|
unsigned read(S* s) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N64read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr1:.*]] = bitcast {{.*}}* %{{.*}} to i24*
|
|
|
|
// CHECK-X86-64: %[[val1:.*]] = load i24* %[[ptr1]]
|
|
|
|
// CHECK-X86-64: %[[ext1:.*]] = zext i24 %[[val1]] to i32
|
|
|
|
// CHECK-X86-64: %[[ptr2:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[val2:.*]] = load i8* %[[ptr2]]
|
|
|
|
// CHECK-X86-64: %[[ext2:.*]] = zext i8 %[[val2]] to i32
|
|
|
|
// CHECK-X86-64: %[[add:.*]] = add nsw i32 %[[ext1]], %[[ext2]]
|
|
|
|
// CHECK-X86-64: ret i32 %[[add]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N64read
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr1:.*]] = bitcast {{.*}}* %{{.*}} to i24*
|
|
|
|
// CHECK-PPC64: %[[val1:.*]] = load i24* %[[ptr1]]
|
|
|
|
// CHECK-PPC64: %[[ext1:.*]] = zext i24 %[[val1]] to i32
|
|
|
|
// CHECK-PPC64: %[[ptr2:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[val2:.*]] = load i8* %[[ptr2]]
|
|
|
|
// CHECK-PPC64: %[[ext2:.*]] = zext i8 %[[val2]] to i32
|
|
|
|
// CHECK-PPC64: %[[add:.*]] = add nsw i32 %[[ext1]], %[[ext2]]
|
|
|
|
// CHECK-PPC64: ret i32 %[[add]]
|
2012-12-06 19:14:44 +08:00
|
|
|
return s->b1 + s->b2;
|
|
|
|
}
|
|
|
|
void write(S* s, unsigned x) {
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N65write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-X86-64: %[[ptr1:.*]] = bitcast {{.*}}* %{{.*}} to i24*
|
|
|
|
// CHECK-X86-64: %[[new1:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-X86-64: store i24 %[[new1]], i24* %[[ptr1]]
|
|
|
|
// CHECK-X86-64: %[[new2:.*]] = trunc i32 %{{.*}} to i8
|
|
|
|
// CHECK-X86-64: %[[ptr2:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: store i8 %[[new2]], i8* %[[ptr2]]
|
2013-08-15 14:47:53 +08:00
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N65write
|
2012-12-09 15:26:04 +08:00
|
|
|
// CHECK-PPC64: %[[ptr1:.*]] = bitcast {{.*}}* %{{.*}} to i24*
|
|
|
|
// CHECK-PPC64: %[[new1:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-PPC64: store i24 %[[new1]], i24* %[[ptr1]]
|
|
|
|
// CHECK-PPC64: %[[new2:.*]] = trunc i32 %{{.*}} to i8
|
|
|
|
// CHECK-PPC64: %[[ptr2:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: store i8 %[[new2]], i8* %[[ptr2]]
|
2012-12-06 19:14:44 +08:00
|
|
|
s->b1 = x;
|
|
|
|
s->b2 = x;
|
|
|
|
}
|
|
|
|
}
|
2014-01-29 08:09:16 +08:00
|
|
|
|
|
|
|
namespace N7 {
|
|
|
|
// Similar to N4 except that this adds a virtual base to the picture. (PR18430)
|
|
|
|
// Do NOT widen loads and stores to bitfields into padding at the end of
|
|
|
|
// a class which might end up with members inside of it when inside a derived
|
|
|
|
// class.
|
|
|
|
struct B1 {
|
|
|
|
virtual void f();
|
|
|
|
unsigned b1 : 24;
|
|
|
|
};
|
|
|
|
struct B2 : virtual B1 {
|
|
|
|
virtual ~B2();
|
|
|
|
unsigned b : 24;
|
|
|
|
};
|
|
|
|
// Imagine some other translation unit introduces:
|
|
|
|
#if 0
|
|
|
|
struct Derived : public B2 {
|
|
|
|
char c;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
unsigned read(B2* s) {
|
|
|
|
// FIXME: We should widen this load as long as the function isn't being
|
|
|
|
// instrumented by thread-sanitizer.
|
|
|
|
//
|
|
|
|
// CHECK-X86-64-LABEL: define i32 @_ZN2N74read
|
|
|
|
// CHECK-X86-64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-X86-64: %[[val:.*]] = load i24* %[[ptr]]
|
|
|
|
// CHECK-X86-64: %[[ext:.*]] = zext i24 %[[val]] to i32
|
|
|
|
// CHECK-X86-64: ret i32 %[[ext]]
|
|
|
|
// CHECK-PPC64-LABEL: define zeroext i32 @_ZN2N74read
|
|
|
|
// CHECK-PPC64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-PPC64: %[[val:.*]] = load i24* %[[ptr]]
|
|
|
|
// CHECK-PPC64: %[[ext:.*]] = zext i24 %[[val]] to i32
|
|
|
|
// CHECK-PPC64: ret i32 %[[ext]]
|
|
|
|
return s->b;
|
|
|
|
}
|
|
|
|
void write(B2* s, unsigned x) {
|
|
|
|
// CHECK-X86-64-LABEL: define void @_ZN2N75write
|
|
|
|
// CHECK-X86-64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-X86-64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-X86-64: %[[new:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-X86-64: store i24 %[[new]], i24* %[[ptr]]
|
|
|
|
// CHECK-PPC64-LABEL: define void @_ZN2N75write
|
|
|
|
// CHECK-PPC64: %[[gep:.*]] = getelementptr inbounds {{.*}}* %{{.*}}, i32 0, i32 1
|
|
|
|
// CHECK-PPC64: %[[ptr:.*]] = bitcast [3 x i8]* %[[gep]] to i24*
|
|
|
|
// CHECK-PPC64: %[[new:.*]] = trunc i32 %{{.*}} to i24
|
|
|
|
// CHECK-PPC64: store i24 %[[new]], i24* %[[ptr]]
|
|
|
|
s->b = x;
|
|
|
|
}
|
|
|
|
}
|