[obj2yaml] [yaml2obj] Support for MachO Load Command data

This re-applies r270115.

Many of the MachO load commands can have data appended after the command structure. This data is frequently strings, but can actually be anything. This patch adds support for three optional fields on load command yaml descriptions.

The new PayloadString YAML field is populated with the data after load commands known to have strings as extra data.

The new ZeroPadBytes YAML field is a count of zero'd bytes after the end of the load command structure before the next command. This can apply anywhere in the file. MachO2YAML verifies that bytes are zero before populating this field, and YAML2MachO will add zero'd bytes.

The new PayloadBytes YAML field stores all bytes after the end of the load command structure before the next command if they are non-zero. This is a catch all for all unhandled bytes. If MachO2Yaml populates PayloadBytes it will not populate ZeroPadBytes, instead zero'd bytes will be in the PayloadBytes structure.

llvm-svn: 270124
This commit is contained in:
Chris Bieneman 2016-05-19 20:54:43 +00:00
parent f605d10a06
commit 9f243e9a1c
4 changed files with 170 additions and 31 deletions

View File

@ -52,6 +52,9 @@ struct LoadCommand {
virtual ~LoadCommand();
llvm::MachO::macho_load_command Data;
std::vector<Section> Sections;
std::vector<llvm::yaml::Hex8> PayloadBytes;
std::string PayloadString;
uint64_t ZeroPadBytes;
};
struct Object {
@ -65,6 +68,7 @@ struct Object {
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::LoadCommand)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Section)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8)
namespace llvm {
namespace yaml {

View File

@ -96,6 +96,33 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
IO.setContext(nullptr);
}
template <typename StructType>
void mapLoadCommandData(IO &IO, MachOYAML::LoadCommand &LoadCommand) {}
template <>
void mapLoadCommandData<MachO::segment_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("Sections", LoadCommand.Sections);
}
template <>
void mapLoadCommandData<MachO::segment_command_64>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("Sections", LoadCommand.Sections);
}
template <>
void mapLoadCommandData<MachO::dylib_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("PayloadString", LoadCommand.PayloadString);
}
template <>
void mapLoadCommandData<MachO::dylinker_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("PayloadString", LoadCommand.PayloadString);
}
void MappingTraits<MachOYAML::LoadCommand>::mapping(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapRequired(
@ -106,15 +133,14 @@ void MappingTraits<MachOYAML::LoadCommand>::mapping(
case MachO::LCName: \
MappingTraits<MachO::LCStruct>::mapping(IO, \
LoadCommand.Data.LCStruct##_data); \
mapLoadCommandData<MachO::LCStruct>(IO, LoadCommand); \
break;
switch (LoadCommand.Data.load_command_data.cmd) {
#include "llvm/Support/MachO.def"
}
if (LoadCommand.Data.load_command_data.cmd == MachO::LC_SEGMENT ||
LoadCommand.Data.load_command_data.cmd == MachO::LC_SEGMENT_64) {
IO.mapOptional("Sections", LoadCommand.Sections);
}
IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes);
IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, (uint64_t)0ull);
}
void MappingTraits<MachO::dyld_info_command>::mapping(

View File

@ -19,6 +19,11 @@ using namespace llvm;
class MachODumper {
template <typename StructType>
const char *processLoadCommandData(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd);
const object::MachOObjectFile &Obj;
public:
@ -32,6 +37,7 @@ public:
sizeof(MachO::LCStruct)); \
if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
MachO::swapStruct(LC.Data.LCStruct##_data); \
EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \
break;
template <typename SectionType>
@ -68,9 +74,10 @@ template <> MachOYAML::Section constructSection(MachO::section_64 Sec) {
}
template <typename SectionType, typename SegmentType>
void extractSections(
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
std::vector<MachOYAML::Section> &Sections, bool IsLittleEndian) {
const char *
extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
std::vector<MachOYAML::Section> &Sections,
bool IsLittleEndian) {
auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
const SectionType *Curr =
reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
@ -84,6 +91,55 @@ void extractSections(
Sections.push_back(constructSection(*Curr));
}
}
return reinterpret_cast<const char *>(Curr);
}
template <typename StructType>
const char *MachODumper::processLoadCommandData(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return LoadCmd.Ptr + sizeof(StructType);
}
template <>
const char *MachODumper::processLoadCommandData<MachO::segment_command>(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return extractSections<MachO::section, MachO::segment_command>(
LoadCmd, LC.Sections, Obj.isLittleEndian());
}
template <>
const char *MachODumper::processLoadCommandData<MachO::segment_command_64>(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return extractSections<MachO::section_64, MachO::segment_command_64>(
LoadCmd, LC.Sections, Obj.isLittleEndian());
}
template <typename StructType>
const char *
readString(MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
auto Start = LoadCmd.Ptr + sizeof(StructType);
auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
auto Size = strnlen(Start, MaxSize);
LC.PayloadString = StringRef(Start, Size).str();
return Start + Size;
}
template <>
const char *MachODumper::processLoadCommandData<MachO::dylib_command>(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return readString<MachO::dylib_command>(LC, LoadCmd);
}
template <>
const char *MachODumper::processLoadCommandData<MachO::dylinker_command>(
MachOYAML::LoadCommand &LC,
const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
return readString<MachO::dylinker_command>(LC, LoadCmd);
}
Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
@ -99,25 +155,25 @@ Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
for (auto LoadCmd : Obj.load_commands()) {
MachOYAML::LoadCommand LC;
const char *EndPtr = LoadCmd.Ptr;
switch (LoadCmd.C.cmd) {
default:
memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
sizeof(MachO::load_command));
if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
MachO::swapStruct(LC.Data.load_command_data);
EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd);
break;
#include "llvm/Support/MachO.def"
}
switch (LoadCmd.C.cmd) {
case MachO::LC_SEGMENT:
extractSections<MachO::section, MachO::segment_command>(
LoadCmd, LC.Sections, Obj.isLittleEndian());
break;
case MachO::LC_SEGMENT_64:
extractSections<MachO::section_64, MachO::segment_command_64>(
LoadCmd, LC.Sections, Obj.isLittleEndian());
break;
auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
[](const char C) { return C == 0; })) {
LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
&EndPtr[RemainingBytes]);
RemainingBytes = 0;
}
LC.ZeroPadBytes = RemainingBytes;
Y->LoadCommands.push_back(std::move(LC));
}

View File

@ -94,6 +94,60 @@ SectionType constructSection(MachOYAML::Section Sec) {
return TempSec;
}
template <typename StructType>
size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, raw_ostream &OS) {
return 0;
}
template <>
size_t writeLoadCommandData<MachO::segment_command>(MachOYAML::LoadCommand &LC,
raw_ostream &OS) {
size_t BytesWritten = 0;
for (auto Sec : LC.Sections) {
auto TempSec = constructSection<MachO::section>(Sec);
OS.write(reinterpret_cast<const char *>(&(TempSec)),
sizeof(MachO::section));
BytesWritten += sizeof(MachO::section);
}
return BytesWritten;
}
template <>
size_t
writeLoadCommandData<MachO::segment_command_64>(MachOYAML::LoadCommand &LC,
raw_ostream &OS) {
size_t BytesWritten = 0;
for (auto Sec : LC.Sections) {
auto TempSec = constructSection<MachO::section_64>(Sec);
TempSec.reserved3 = Sec.reserved3;
OS.write(reinterpret_cast<const char *>(&(TempSec)),
sizeof(MachO::section_64));
BytesWritten += sizeof(MachO::section_64);
}
return BytesWritten;
}
size_t writePayloadString(MachOYAML::LoadCommand &LC, raw_ostream &OS) {
size_t BytesWritten = 0;
if (!LC.PayloadString.empty()) {
OS.write(LC.PayloadString.c_str(), LC.PayloadString.length());
BytesWritten = LC.PayloadString.length();
}
return BytesWritten;
}
template <>
size_t writeLoadCommandData<MachO::dylib_command>(MachOYAML::LoadCommand &LC,
raw_ostream &OS) {
return writePayloadString(LC, OS);
}
template <>
size_t writeLoadCommandData<MachO::dylinker_command>(MachOYAML::LoadCommand &LC,
raw_ostream &OS) {
return writePayloadString(LC, OS);
}
Error MachOWriter::writeLoadCommands(raw_ostream &OS) {
for (auto &LC : Obj.LoadCommands) {
size_t BytesWritten = 0;
@ -102,6 +156,7 @@ Error MachOWriter::writeLoadCommands(raw_ostream &OS) {
OS.write(reinterpret_cast<const char *>(&(LC.Data.LCStruct##_data)), \
sizeof(MachO::LCStruct)); \
BytesWritten = sizeof(MachO::LCStruct); \
BytesWritten += writeLoadCommandData<MachO::LCStruct>(LC, OS); \
break;
switch (LC.Data.load_command_data.cmd) {
@ -109,27 +164,25 @@ Error MachOWriter::writeLoadCommands(raw_ostream &OS) {
OS.write(reinterpret_cast<const char *>(&(LC.Data.load_command_data)),
sizeof(MachO::load_command));
BytesWritten = sizeof(MachO::load_command);
BytesWritten += writeLoadCommandData<MachO::load_command>(LC, OS);
break;
#include "llvm/Support/MachO.def"
}
if(LC.Data.load_command_data.cmd == MachO::LC_SEGMENT) {
for(auto Sec : LC.Sections) {
auto TempSec = constructSection<MachO::section>(Sec);
OS.write(reinterpret_cast<const char *>(&(TempSec)), sizeof(MachO::section));
BytesWritten += sizeof(MachO::section);
}
} else if(LC.Data.load_command_data.cmd == MachO::LC_SEGMENT_64) {
for(auto Sec : LC.Sections) {
auto TempSec = constructSection<MachO::section_64>(Sec);
TempSec.reserved3 = Sec.reserved3;
OS.write(reinterpret_cast<const char *>(&(TempSec)), sizeof(MachO::section_64));
BytesWritten += sizeof(MachO::section_64);
}
if (LC.PayloadBytes.size() > 0) {
OS.write(reinterpret_cast<const char *>(LC.PayloadBytes.data()),
LC.PayloadBytes.size());
BytesWritten += LC.PayloadBytes.size();
}
auto BytesRemaining =
LC.Data.load_command_data.cmdsize - BytesWritten;
if (LC.ZeroPadBytes > 0) {
std::vector<uint8_t> FillData;
FillData.insert(FillData.begin(), LC.ZeroPadBytes, 0);
OS.write(reinterpret_cast<char *>(FillData.data()), LC.ZeroPadBytes);
BytesWritten += LC.ZeroPadBytes;
}
auto BytesRemaining = LC.Data.load_command_data.cmdsize - BytesWritten;
if (BytesRemaining > 0) {
// TODO: Replace all this once the load command data is present in yaml.
// For now I fill with 0xDEADBEEF because it is easy to spot on a hex