Compiler projects using llvm
//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "MachOLayoutBuilder.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"

using namespace llvm;
using namespace llvm::objcopy::macho;

StringTableBuilder::Kind
MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
  if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
    return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
  return Is64Bit ? StringTableBuilder::MachO64Linked
                 : StringTableBuilder::MachOLinked;
}

uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
  uint32_t Size = 0;
  for (const LoadCommand &LC : O.LoadCommands) {
    const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
    auto cmd = MLC.load_command_data.cmd;
    switch (cmd) {
    case MachO::LC_SEGMENT:
      Size += sizeof(MachO::segment_command) +
              sizeof(MachO::section) * LC.Sections.size();
      continue;
    case MachO::LC_SEGMENT_64:
      Size += sizeof(MachO::segment_command_64) +
              sizeof(MachO::section_64) * LC.Sections.size();
      continue;
    }

    switch (cmd) {
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
  case MachO::LCName:                                                          \
    Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
    break;
#include "llvm/BinaryFormat/MachO.def"
#undef HANDLE_LOAD_COMMAND
    }
  }

  return Size;
}

void MachOLayoutBuilder::constructStringTable() {
  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
    StrTableBuilder.add(Sym->Name);
  StrTableBuilder.finalize();
}

void MachOLayoutBuilder::updateSymbolIndexes() {
  uint32_t Index = 0;
  for (auto &Symbol : O.SymTable.Symbols)
    Symbol->Index = Index++;
}

// Updates the index and the number of local/external/undefined symbols.
void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
  assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
  // Make sure that nlist entries in the symbol table are sorted by the those
  // types. The order is: local < defined external < undefined external.
  assert(llvm::is_sorted(O.SymTable.Symbols,
                         [](const std::unique_ptr<SymbolEntry> &A,
                            const std::unique_ptr<SymbolEntry> &B) {
                           bool AL = A->isLocalSymbol(),
                                BL = B->isLocalSymbol();
                           if (AL != BL)
                             return AL;
                           return !AL && !A->isUndefinedSymbol() &&
                                  B->isUndefinedSymbol();
                         }) &&
         "Symbols are not sorted by their types.");

  uint32_t NumLocalSymbols = 0;
  auto Iter = O.SymTable.Symbols.begin();
  auto End = O.SymTable.Symbols.end();
  for (; Iter != End; ++Iter) {
    if ((*Iter)->isExternalSymbol())
      break;

    ++NumLocalSymbols;
  }

  uint32_t NumExtDefSymbols = 0;
  for (; Iter != End; ++Iter) {
    if ((*Iter)->isUndefinedSymbol())
      break;

    ++NumExtDefSymbols;
  }

  MLC.dysymtab_command_data.ilocalsym = 0;
  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
  MLC.dysymtab_command_data.nundefsym =
      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
}

// Recomputes and updates offset and size fields in load commands and sections
// since they could be modified.
uint64_t MachOLayoutBuilder::layoutSegments() {
  auto HeaderSize =
      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
  const bool IsObjectFile =
      O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
  uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
  for (LoadCommand &LC : O.LoadCommands) {
    auto &MLC = LC.MachOLoadCommand;
    StringRef Segname;
    uint64_t SegmentVmAddr;
    uint64_t SegmentVmSize;
    switch (MLC.load_command_data.cmd) {
    case MachO::LC_SEGMENT:
      SegmentVmAddr = MLC.segment_command_data.vmaddr;
      SegmentVmSize = MLC.segment_command_data.vmsize;
      Segname = StringRef(MLC.segment_command_data.segname,
                          strnlen(MLC.segment_command_data.segname,
                                  sizeof(MLC.segment_command_data.segname)));
      break;
    case MachO::LC_SEGMENT_64:
      SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
      SegmentVmSize = MLC.segment_command_64_data.vmsize;
      Segname = StringRef(MLC.segment_command_64_data.segname,
                          strnlen(MLC.segment_command_64_data.segname,
                                  sizeof(MLC.segment_command_64_data.segname)));
      break;
    default:
      continue;
    }

    if (Segname == "__LINKEDIT") {
      // We update the __LINKEDIT segment later (in layoutTail).
      assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
      LinkEditLoadCommand = &MLC;
      continue;
    }

    // Update file offsets and sizes of sections.
    uint64_t SegOffset = Offset;
    uint64_t SegFileSize = 0;
    uint64_t VMSize = 0;
    for (std::unique_ptr<Section> &Sec : LC.Sections) {
      assert(SegmentVmAddr <= Sec->Addr &&
             "Section's address cannot be smaller than Segment's one");
      uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
      if (IsObjectFile) {
        if (!Sec->hasValidOffset()) {
          Sec->Offset = 0;
        } else {
          uint64_t PaddingSize =
              offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
          Sec->Offset = SegOffset + SegFileSize + PaddingSize;
          Sec->Size = Sec->Content.size();
          SegFileSize += PaddingSize + Sec->Size;
        }
      } else {
        if (!Sec->hasValidOffset()) {
          Sec->Offset = 0;
        } else {
          Sec->Offset = SegOffset + SectOffset;
          Sec->Size = Sec->Content.size();
          SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
        }
      }
      VMSize = std::max(VMSize, SectOffset + Sec->Size);
    }

    if (IsObjectFile) {
      Offset += SegFileSize;
    } else {
      Offset = alignTo(Offset + SegFileSize, PageSize);
      SegFileSize = alignTo(SegFileSize, PageSize);
      // Use the original vmsize if the segment is __PAGEZERO.
      VMSize =
          Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
    }

    switch (MLC.load_command_data.cmd) {
    case MachO::LC_SEGMENT:
      MLC.segment_command_data.cmdsize =
          sizeof(MachO::segment_command) +
          sizeof(MachO::section) * LC.Sections.size();
      MLC.segment_command_data.nsects = LC.Sections.size();
      MLC.segment_command_data.fileoff = SegOffset;
      MLC.segment_command_data.vmsize = VMSize;
      MLC.segment_command_data.filesize = SegFileSize;
      break;
    case MachO::LC_SEGMENT_64:
      MLC.segment_command_64_data.cmdsize =
          sizeof(MachO::segment_command_64) +
          sizeof(MachO::section_64) * LC.Sections.size();
      MLC.segment_command_64_data.nsects = LC.Sections.size();
      MLC.segment_command_64_data.fileoff = SegOffset;
      MLC.segment_command_64_data.vmsize = VMSize;
      MLC.segment_command_64_data.filesize = SegFileSize;
      break;
    }
  }

  return Offset;
}

uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
  for (LoadCommand &LC : O.LoadCommands)
    for (std::unique_ptr<Section> &Sec : LC.Sections) {
      Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
      Sec->NReloc = Sec->Relocations.size();
      Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
    }

  return Offset;
}

Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
  // If we are building the layout of an executable or dynamic library
  // which does not have any segments other than __LINKEDIT,
  // the Offset can be equal to zero by this time. It happens because of the
  // convention that in such cases the file offsets specified by LC_SEGMENT
  // start with zero (unlike the case of a relocatable object file).
  const uint64_t HeaderSize =
      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
  assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
          Offset >= HeaderSize + O.Header.SizeOfCmds) &&
         "Incorrect tail offset");
  Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);

  // The order of LINKEDIT elements is as follows:
  // rebase info, binding info, weak binding info, lazy binding info, export
  // trie, data-in-code, symbol table, indirect symbol table, symbol table
  // strings, code signature.
  uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
  uint64_t StartOfLinkEdit = Offset;
  uint64_t StartOfRebaseInfo = StartOfLinkEdit;
  uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
  uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
  uint64_t StartOfLazyBindingInfo =
      StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
  uint64_t StartOfExportTrie =
      StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
  uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
  uint64_t StartOfDyldExportsTrie =
      StartOfFunctionStarts + O.FunctionStarts.Data.size();
  uint64_t StartOfChainedFixups =
      StartOfDyldExportsTrie + O.ExportsTrie.Data.size();
  uint64_t StartOfDataInCode =
      StartOfChainedFixups + O.ChainedFixups.Data.size();
  uint64_t StartOfLinkerOptimizationHint =
      StartOfDataInCode + O.DataInCode.Data.size();
  uint64_t StartOfSymbols =
      StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size();
  uint64_t StartOfIndirectSymbols =
      StartOfSymbols + NListSize * O.SymTable.Symbols.size();
  uint64_t StartOfSymbolStrings =
      StartOfIndirectSymbols +
      sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
  uint64_t StartOfCodeSignature =
      StartOfSymbolStrings + StrTableBuilder.getSize();
  uint32_t CodeSignatureSize = 0;
  if (O.CodeSignatureCommandIndex) {
    StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);

    // Note: These calculations are to be kept in sync with the same
    // calculations performed in LLD's CodeSignatureSection.
    const uint32_t AllHeadersSize =
        alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
                CodeSignature.Align);
    const uint32_t BlockCount =
        (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
        CodeSignature.BlockSize;
    const uint32_t Size =
        alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
                CodeSignature.Align);

    CodeSignature.StartOffset = StartOfCodeSignature;
    CodeSignature.AllHeadersSize = AllHeadersSize;
    CodeSignature.BlockCount = BlockCount;
    CodeSignature.OutputFileName = OutputFileName;
    CodeSignature.Size = Size;
    CodeSignatureSize = Size;
  }
  uint64_t LinkEditSize =
      StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;

  // Now we have determined the layout of the contents of the __LINKEDIT
  // segment. Update its load command.
  if (LinkEditLoadCommand) {
    MachO::macho_load_command *MLC = LinkEditLoadCommand;
    switch (LinkEditLoadCommand->load_command_data.cmd) {
    case MachO::LC_SEGMENT:
      MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
      MLC->segment_command_data.fileoff = StartOfLinkEdit;
      MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
      MLC->segment_command_data.filesize = LinkEditSize;
      break;
    case MachO::LC_SEGMENT_64:
      MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
      MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
      MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
      MLC->segment_command_64_data.filesize = LinkEditSize;
      break;
    }
  }

  for (LoadCommand &LC : O.LoadCommands) {
    auto &MLC = LC.MachOLoadCommand;
    auto cmd = MLC.load_command_data.cmd;
    switch (cmd) {
    case MachO::LC_CODE_SIGNATURE:
      MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
      MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
      break;
    case MachO::LC_SYMTAB:
      MLC.symtab_command_data.symoff = StartOfSymbols;
      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
      MLC.symtab_command_data.stroff = StartOfSymbolStrings;
      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
      break;
    case MachO::LC_DYSYMTAB: {
      if (MLC.dysymtab_command_data.ntoc != 0 ||
          MLC.dysymtab_command_data.nmodtab != 0 ||
          MLC.dysymtab_command_data.nextrefsyms != 0 ||
          MLC.dysymtab_command_data.nlocrel != 0 ||
          MLC.dysymtab_command_data.nextrel != 0)
        return createStringError(llvm::errc::not_supported,
                                 "shared library is not yet supported");

      if (!O.IndirectSymTable.Symbols.empty()) {
        MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
        MLC.dysymtab_command_data.nindirectsyms =
            O.IndirectSymTable.Symbols.size();
      }

      updateDySymTab(MLC);
      break;
    }
    case MachO::LC_DATA_IN_CODE:
      MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
      MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
      break;
    case MachO::LC_LINKER_OPTIMIZATION_HINT:
      MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
      MLC.linkedit_data_command_data.datasize =
          O.LinkerOptimizationHint.Data.size();
      break;
    case MachO::LC_FUNCTION_STARTS:
      MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
      MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
      break;
    case MachO::LC_DYLD_CHAINED_FIXUPS:
      MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
      MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
      break;
    case MachO::LC_DYLD_EXPORTS_TRIE:
      MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
      MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size();
      break;
    case MachO::LC_DYLD_INFO:
    case MachO::LC_DYLD_INFO_ONLY:
      MLC.dyld_info_command_data.rebase_off =
          O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
      MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
      MLC.dyld_info_command_data.bind_off =
          O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
      MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
      MLC.dyld_info_command_data.weak_bind_off =
          O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
      MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
      MLC.dyld_info_command_data.lazy_bind_off =
          O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
      MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
      MLC.dyld_info_command_data.export_off =
          O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
      MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
      break;
    // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
    // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
    // relative virtual address. At the moment modification of the __TEXT
    // segment of executables isn't supported anyway (e.g. data in code entries
    // are not recalculated). Moreover, in general
    // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
    // without making additional assumptions (e.g. that the entire __TEXT
    // segment should be encrypted) we do not know how to recalculate the
    // boundaries of the encrypted part. For now just copy over these load
    // commands until we encounter a real world usecase where
    // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
    case MachO::LC_ENCRYPTION_INFO:
    case MachO::LC_ENCRYPTION_INFO_64:
    case MachO::LC_LOAD_DYLINKER:
    case MachO::LC_MAIN:
    case MachO::LC_RPATH:
    case MachO::LC_SEGMENT:
    case MachO::LC_SEGMENT_64:
    case MachO::LC_VERSION_MIN_MACOSX:
    case MachO::LC_VERSION_MIN_IPHONEOS:
    case MachO::LC_VERSION_MIN_TVOS:
    case MachO::LC_VERSION_MIN_WATCHOS:
    case MachO::LC_BUILD_VERSION:
    case MachO::LC_ID_DYLIB:
    case MachO::LC_LOAD_DYLIB:
    case MachO::LC_LOAD_WEAK_DYLIB:
    case MachO::LC_UUID:
    case MachO::LC_SOURCE_VERSION:
    case MachO::LC_THREAD:
    case MachO::LC_UNIXTHREAD:
    case MachO::LC_SUB_FRAMEWORK:
    case MachO::LC_SUB_UMBRELLA:
    case MachO::LC_SUB_CLIENT:
    case MachO::LC_SUB_LIBRARY:
    case MachO::LC_LINKER_OPTION:
      // Nothing to update.
      break;
    default:
      // Abort if it's unsupported in order to prevent corrupting the object.
      return createStringError(llvm::errc::not_supported,
                               "unsupported load command (cmd=0x%x)", cmd);
    }
  }

  return Error::success();
}

Error MachOLayoutBuilder::layout() {
  O.Header.NCmds = O.LoadCommands.size();
  O.Header.SizeOfCmds = computeSizeOfCmds();
  constructStringTable();
  updateSymbolIndexes();
  uint64_t Offset = layoutSegments();
  Offset = layoutRelocations(Offset);
  return layoutTail(Offset);
}