From 461bd680c33f35b90843072fd558bbd03b12de3c Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 14 Apr 2016 14:32:57 +0000
Subject: [PATCH] Handle bit fields on big-endian systems correctly

Currently, the DataExtractor::GetMaxU64Bitfield and GetMaxS64Bitfield
routines assume the incoming "bitfield_bit_offset" parameter uses
little-endian bit numbering, i.e. a bitfield_bit_offset 0 refers to
a bitfield whose least-significant bit coincides with the least-
significant bit of the surrounding integer.

On many big-endian systems, however, the big-endian bit numbering
is used for bit fields.  Here, a bitfield_bit_offset 0 refers to
a bitfield whose most-significant bit conincides with the most-
significant bit of the surrounding integer.

Now, in principle LLDB could arbitrarily choose which semantics of
bitfield_bit_offset to use.  However, there are two problems with
the current approach:

- When parsing DWARF, LLDB decodes bit offsets in little-endian
  bit numbering on LE systems, but in big-endian bit numbering
  on BE systems.  Passing those offsets later on into the
  DataExtractor routines gives incorrect results on BE.

- In the interim, LLDB's type layer combines byte and bit offsets
  into a single number.  I.e. instead of recording bitfields by
  specifying the byte offset and byte size of the surrounding
  integer *plus* the bit offset of the bit field within that field,
  it simply records a single bit offset number.

  Now, note that converting from byte offset + bit offset to a
  single offset value and back is well-defined if we either use
  little-endian byte order *and* little-endian bit numbering,
  or use big-endian byte order *and* big-endian bit numbering.
  Any other combination will yield incorrect results.

Therefore, the simplest approach would seem to be to always use
the bit numbering that matches the system byte order.  This makes
storing a single bit offset valid, and makes the existing DWARF
code correct.  The only place to fix is to teach DataExtractor
to use big-endian bit numbering on big endian systems.

However, there is only additional caveat: we also get bit offsets
from LLDB synthetic bitfields.  While the exact semantics of those
doesn't seem to be well-defined, from test cases it appears that
the intent was for the user-provided synthetic bitfield offset to
always use little-endian bit numbering.  Therefore, on a big-endian
system we now have to convert those to big-endian bit numbering
to remain consistent.

Differential Revision: http://reviews.llvm.org/D18982

llvm-svn: 266312
---
 lldb/include/lldb/Core/DataExtractor.h    | 12 ++++---
 lldb/source/Core/DataExtractor.cpp        | 14 +++++---
 lldb/source/Core/ValueObject.cpp          |  8 +++--
 lldb/unittests/Core/CMakeLists.txt        |  1 +
 lldb/unittests/Core/DataExtractorTest.cpp | 39 +++++++++++++++++++++++
 5 files changed, 64 insertions(+), 10 deletions(-)
 create mode 100644 lldb/unittests/Core/DataExtractorTest.cpp

diff --git a/lldb/include/lldb/Core/DataExtractor.h b/lldb/include/lldb/Core/DataExtractor.h
index d5cb5e8ba4bc..51ecade2d374 100644
--- a/lldb/include/lldb/Core/DataExtractor.h
+++ b/lldb/include/lldb/Core/DataExtractor.h
@@ -763,8 +763,10 @@ public:
     ///
     /// @param[in] bitfield_bit_offset
     ///     The bit offset of the bitfield value in the extracted
-    ///     integer (the number of bits to shift the integer to the
-    ///     right).
+    ///     integer.  For little-endian data, this is the offset of
+    ///     the LSB of the bitfield from the LSB of the integer.
+    ///     For big-endian data, this is the offset of the MSB of the
+    ///     bitfield from the MSB of the integer.
     ///
     /// @return
     ///     The unsigned bitfield integer value that was extracted, or
@@ -805,8 +807,10 @@ public:
     ///
     /// @param[in] bitfield_bit_offset
     ///     The bit offset of the bitfield value in the extracted
-    ///     integer (the number of bits to shift the integer to the
-    ///     right).
+    ///     integer.  For little-endian data, this is the offset of
+    ///     the LSB of the bitfield from the LSB of the integer.
+    ///     For big-endian data, this is the offset of the MSB of the
+    ///     bitfield from the MSB of the integer.
     ///
     /// @return
     ///     The signed bitfield integer value that was extracted, or
diff --git a/lldb/source/Core/DataExtractor.cpp b/lldb/source/Core/DataExtractor.cpp
index 8cc2a2890c72..05c43da248e0 100644
--- a/lldb/source/Core/DataExtractor.cpp
+++ b/lldb/source/Core/DataExtractor.cpp
@@ -733,8 +733,11 @@ DataExtractor::GetMaxU64Bitfield (offset_t *offset_ptr, size_t size, uint32_t bi
     uint64_t uval64 = GetMaxU64 (offset_ptr, size);
     if (bitfield_bit_size > 0)
     {
-        if (bitfield_bit_offset > 0)
-            uval64 >>= bitfield_bit_offset;
+        int32_t lsbcount = bitfield_bit_offset;
+        if (m_byte_order == eByteOrderBig)
+            lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
+        if (lsbcount > 0)
+            uval64 >>= lsbcount;
         uint64_t bitfield_mask = ((1ul << bitfield_bit_size) - 1);
         if (!bitfield_mask && bitfield_bit_offset == 0 && bitfield_bit_size == 64)
             return uval64;
@@ -749,8 +752,11 @@ DataExtractor::GetMaxS64Bitfield (offset_t *offset_ptr, size_t size, uint32_t bi
     int64_t sval64 = GetMaxS64 (offset_ptr, size);
     if (bitfield_bit_size > 0)
     {
-        if (bitfield_bit_offset > 0)
-            sval64 >>= bitfield_bit_offset;
+        int32_t lsbcount = bitfield_bit_offset;
+        if (m_byte_order == eByteOrderBig)
+            lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
+        if (lsbcount > 0)
+            sval64 >>= lsbcount;
         uint64_t bitfield_mask = (((uint64_t)1) << bitfield_bit_size) - 1;
         sval64 &= bitfield_mask;
         // sign extend if needed
diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 075f031d4673..69205aaf754f 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -2146,6 +2146,10 @@ ValueObject::GetSyntheticBitFieldChild (uint32_t from, uint32_t to, bool can_cre
         synthetic_child_sp = GetSyntheticChild (index_const_str);
         if (!synthetic_child_sp)
         {
+            uint32_t bit_field_size = to - from + 1;
+            uint32_t bit_field_offset = from;
+            if (GetDataExtractor().GetByteOrder() == eByteOrderBig)
+                bit_field_offset = GetByteSize() * 8 - bit_field_size - bit_field_offset;
             // We haven't made a synthetic array member for INDEX yet, so
             // lets make one and cache it for any future reference.
             ValueObjectChild *synthetic_child = new ValueObjectChild (*this,
@@ -2153,8 +2157,8 @@ ValueObject::GetSyntheticBitFieldChild (uint32_t from, uint32_t to, bool can_cre
                                                                       index_const_str,
                                                                       GetByteSize(),
                                                                       0,
-                                                                      to-from+1,
-                                                                      from,
+                                                                      bit_field_size,
+                                                                      bit_field_offset,
                                                                       false,
                                                                       false,
                                                                       eAddressTypeInvalid,
diff --git a/lldb/unittests/Core/CMakeLists.txt b/lldb/unittests/Core/CMakeLists.txt
index 90fbe7fc1e20..ad9def181de5 100644
--- a/lldb/unittests/Core/CMakeLists.txt
+++ b/lldb/unittests/Core/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_lldb_unittest(LLDBCoreTests
+  DataExtractorTest.cpp
   ScalarTest.cpp
   )
diff --git a/lldb/unittests/Core/DataExtractorTest.cpp b/lldb/unittests/Core/DataExtractorTest.cpp
new file mode 100644
index 000000000000..f22883875055
--- /dev/null
+++ b/lldb/unittests/Core/DataExtractorTest.cpp
@@ -0,0 +1,39 @@
+//===-- DataExtractorTest.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(_MSC_VER) && (_HAS_EXCEPTIONS == 0)
+// Workaround for MSVC standard library bug, which fails to include <thread> when
+// exceptions are disabled.
+#include <eh.h>
+#endif
+
+#include "gtest/gtest.h"
+
+#include "lldb/Core/DataExtractor.h"
+
+using namespace lldb_private;
+
+TEST(DataExtractorTest, GetBitfield)
+{
+    char buffer[] = { 0x01, 0x23, 0x45, 0x67 };
+    DataExtractor LE(buffer, sizeof(buffer), lldb::eByteOrderLittle, sizeof(void *));
+    DataExtractor BE(buffer, sizeof(buffer), lldb::eByteOrderBig, sizeof(void *));
+
+    lldb::offset_t offset;
+
+    offset = 0;
+    ASSERT_EQ(buffer[1], LE.GetMaxU64Bitfield(&offset, sizeof(buffer), 8, 8));
+    offset = 0;
+    ASSERT_EQ(buffer[1], BE.GetMaxU64Bitfield(&offset, sizeof(buffer), 8, 8));
+
+    offset = 0;
+    ASSERT_EQ(buffer[1], LE.GetMaxS64Bitfield(&offset, sizeof(buffer), 8, 8));
+    offset = 0;
+    ASSERT_EQ(buffer[1], BE.GetMaxS64Bitfield(&offset, sizeof(buffer), 8, 8));
+}