Compiler projects using llvm
//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This code rewrites include invocations into their expansions.  This gives you
// a file with all included files merged into it.
//
//===----------------------------------------------------------------------===//

#include "clang/Rewrite/Frontend/Rewriters.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
#include "clang/Lex/Pragma.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;
using namespace llvm;

namespace {

class InclusionRewriter : public PPCallbacks {
  /// Information about which #includes were actually performed,
  /// created by preprocessor callbacks.
  struct IncludedFile {
    FileID Id;
    SrcMgr::CharacteristicKind FileType;
    IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType)
        : Id(Id), FileType(FileType) {}
  };
  Preprocessor &PP; ///< Used to find inclusion directives.
  SourceManager &SM; ///< Used to read and manage source files.
  raw_ostream &OS; ///< The destination stream for rewritten contents.
  StringRef MainEOL; ///< The line ending marker to use.
  llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines.
  bool ShowLineMarkers; ///< Show #line markers.
  bool UseLineDirectives; ///< Use of line directives or line markers.
  /// Tracks where inclusions that change the file are found.
  std::map<SourceLocation, IncludedFile> FileIncludes;
  /// Tracks where inclusions that import modules are found.
  std::map<SourceLocation, const Module *> ModuleIncludes;
  /// Tracks where inclusions that enter modules (in a module build) are found.
  std::map<SourceLocation, const Module *> ModuleEntryIncludes;
  /// Tracks where #if and #elif directives get evaluated and whether to true.
  std::map<SourceLocation, bool> IfConditions;
  /// Used transitively for building up the FileIncludes mapping over the
  /// various \c PPCallbacks callbacks.
  SourceLocation LastInclusionLocation;
public:
  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
                    bool UseLineDirectives);
  void Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
  void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) {
    PredefinesBuffer = Buf;
  }
  void detectMainFileEOL();
  void handleModuleBegin(Token &Tok) {
    assert(Tok.getKind() == tok::annot_module_begin);
    ModuleEntryIncludes.insert(
        {Tok.getLocation(), (Module *)Tok.getAnnotationValue()});
  }
private:
  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                   SrcMgr::CharacteristicKind FileType,
                   FileID PrevFID) override;
  void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
                   SrcMgr::CharacteristicKind FileType) override;
  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                          StringRef FileName, bool IsAngled,
                          CharSourceRange FilenameRange,
                          Optional<FileEntryRef> File, StringRef SearchPath,
                          StringRef RelativePath, const Module *Imported,
                          SrcMgr::CharacteristicKind FileType) override;
  void If(SourceLocation Loc, SourceRange ConditionRange,
          ConditionValueKind ConditionValue) override;
  void Elif(SourceLocation Loc, SourceRange ConditionRange,
            ConditionValueKind ConditionValue, SourceLocation IfLoc) override;
  void WriteLineInfo(StringRef Filename, int Line,
                     SrcMgr::CharacteristicKind FileType,
                     StringRef Extra = StringRef());
  void WriteImplicitModuleImport(const Module *Mod);
  void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom,
                         unsigned WriteTo, StringRef EOL, int &lines,
                         bool EnsureNewline);
  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
                           const MemoryBufferRef &FromFile, StringRef EOL,
                           unsigned &NextToWrite, int &Lines);
  const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
  const Module *FindModuleAtLocation(SourceLocation Loc) const;
  const Module *FindEnteredModule(SourceLocation Loc) const;
  bool IsIfAtLocationTrue(SourceLocation Loc) const;
  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
};

}  // end anonymous namespace

/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
                                     bool ShowLineMarkers,
                                     bool UseLineDirectives)
    : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
      ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives),
      LastInclusionLocation(SourceLocation()) {}

/// Write appropriate line information as either #line directives or GNU line
/// markers depending on what mode we're in, including the \p Filename and
/// \p Line we are located at, using the specified \p EOL line separator, and
/// any \p Extra context specifiers in GNU line directives.
void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
                                      SrcMgr::CharacteristicKind FileType,
                                      StringRef Extra) {
  if (!ShowLineMarkers)
    return;
  if (UseLineDirectives) {
    OS << "#line" << ' ' << Line << ' ' << '"';
    OS.write_escaped(Filename);
    OS << '"';
  } else {
    // Use GNU linemarkers as described here:
    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
    OS << '#' << ' ' << Line << ' ' << '"';
    OS.write_escaped(Filename);
    OS << '"';
    if (!Extra.empty())
      OS << Extra;
    if (FileType == SrcMgr::C_System)
      // "`3' This indicates that the following text comes from a system header
      // file, so certain warnings should be suppressed."
      OS << " 3";
    else if (FileType == SrcMgr::C_ExternCSystem)
      // as above for `3', plus "`4' This indicates that the following text
      // should be treated as being wrapped in an implicit extern "C" block."
      OS << " 3 4";
  }
  OS << MainEOL;
}

void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
  OS << "#pragma clang module import " << Mod->getFullModuleName(true)
     << " /* clang -frewrite-includes: implicit import */" << MainEOL;
}

/// FileChanged - Whenever the preprocessor enters or exits a #include file
/// it invokes this handler.
void InclusionRewriter::FileChanged(SourceLocation Loc,
                                    FileChangeReason Reason,
                                    SrcMgr::CharacteristicKind NewFileType,
                                    FileID) {
  if (Reason != EnterFile)
    return;
  if (LastInclusionLocation.isInvalid())
    // we didn't reach this file (eg: the main file) via an inclusion directive
    return;
  FileID Id = FullSourceLoc(Loc, SM).getFileID();
  auto P = FileIncludes.insert(
      std::make_pair(LastInclusionLocation, IncludedFile(Id, NewFileType)));
  (void)P;
  assert(P.second && "Unexpected revisitation of the same include directive");
  LastInclusionLocation = SourceLocation();
}

/// Called whenever an inclusion is skipped due to canonical header protection
/// macros.
void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
                                    const Token & /*FilenameTok*/,
                                    SrcMgr::CharacteristicKind /*FileType*/) {
  assert(LastInclusionLocation.isValid() &&
         "A file, that wasn't found via an inclusion directive, was skipped");
  LastInclusionLocation = SourceLocation();
}

/// This should be called whenever the preprocessor encounters include
/// directives. It does not say whether the file has been included, but it
/// provides more information about the directive (hash location instead
/// of location inside the included file). It is assumed that the matching
/// FileChanged() or FileSkipped() is called after this (or neither is
/// called if this #include results in an error or does not textually include
/// anything).
void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                           const Token &/*IncludeTok*/,
                                           StringRef /*FileName*/,
                                           bool /*IsAngled*/,
                                           CharSourceRange /*FilenameRange*/,
                                           Optional<FileEntryRef> /*File*/,
                                           StringRef /*SearchPath*/,
                                           StringRef /*RelativePath*/,
                                           const Module *Imported,
                                           SrcMgr::CharacteristicKind FileType){
  if (Imported) {
    auto P = ModuleIncludes.insert(std::make_pair(HashLoc, Imported));
    (void)P;
    assert(P.second && "Unexpected revisitation of the same include directive");
  } else
    LastInclusionLocation = HashLoc;
}

void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange,
                           ConditionValueKind ConditionValue) {
  auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
  (void)P;
  assert(P.second && "Unexpected revisitation of the same if directive");
}

void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange,
                             ConditionValueKind ConditionValue,
                             SourceLocation IfLoc) {
  auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
  (void)P;
  assert(P.second && "Unexpected revisitation of the same elif directive");
}

/// Simple lookup for a SourceLocation (specifically one denoting the hash in
/// an inclusion directive) in the map of inclusion information, FileChanges.
const InclusionRewriter::IncludedFile *
InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
  const auto I = FileIncludes.find(Loc);
  if (I != FileIncludes.end())
    return &I->second;
  return nullptr;
}

/// Simple lookup for a SourceLocation (specifically one denoting the hash in
/// an inclusion directive) in the map of module inclusion information.
const Module *
InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
  const auto I = ModuleIncludes.find(Loc);
  if (I != ModuleIncludes.end())
    return I->second;
  return nullptr;
}

/// Simple lookup for a SourceLocation (specifically one denoting the hash in
/// an inclusion directive) in the map of module entry information.
const Module *
InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {
  const auto I = ModuleEntryIncludes.find(Loc);
  if (I != ModuleEntryIncludes.end())
    return I->second;
  return nullptr;
}

bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
  const auto I = IfConditions.find(Loc);
  if (I != IfConditions.end())
    return I->second;
  return false;
}

void InclusionRewriter::detectMainFileEOL() {
  Optional<MemoryBufferRef> FromFile = *SM.getBufferOrNone(SM.getMainFileID());
  assert(FromFile);
  if (!FromFile)
    return; // Should never happen, but whatever.
  MainEOL = FromFile->getBuffer().detectEOL();
}

/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
/// \p WriteTo - 1.
void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile,
                                          unsigned &WriteFrom, unsigned WriteTo,
                                          StringRef LocalEOL, int &Line,
                                          bool EnsureNewline) {
  if (WriteTo <= WriteFrom)
    return;
  if (FromFile == PredefinesBuffer) {
    // Ignore the #defines of the predefines buffer.
    WriteFrom = WriteTo;
    return;
  }

  // If we would output half of a line ending, advance one character to output
  // the whole line ending.  All buffers are null terminated, so looking ahead
  // one byte is safe.
  if (LocalEOL.size() == 2 &&
      LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
      LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
    WriteTo++;

  StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
                        WriteTo - WriteFrom);

  if (MainEOL == LocalEOL) {
    OS << TextToWrite;
    // count lines manually, it's faster than getPresumedLoc()
    Line += TextToWrite.count(LocalEOL);
    if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
      OS << MainEOL;
  } else {
    // Output the file one line at a time, rewriting the line endings as we go.
    StringRef Rest = TextToWrite;
    while (!Rest.empty()) {
      StringRef LineText;
      std::tie(LineText, Rest) = Rest.split(LocalEOL);
      OS << LineText;
      Line++;
      if (!Rest.empty())
        OS << MainEOL;
    }
    if (TextToWrite.endswith(LocalEOL) || EnsureNewline)
      OS << MainEOL;
  }
  WriteFrom = WriteTo;
}

/// Print characters from \p FromFile starting at \p NextToWrite up until the
/// inclusion directive at \p StartToken, then print out the inclusion
/// inclusion directive disabled by a #if directive, updating \p NextToWrite
/// and \p Line to track the number of source lines visited and the progress
/// through the \p FromFile buffer.
void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
                                            const Token &StartToken,
                                            const MemoryBufferRef &FromFile,
                                            StringRef LocalEOL,
                                            unsigned &NextToWrite, int &Line) {
  OutputContentUpTo(FromFile, NextToWrite,
                    SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
                    false);
  Token DirectiveToken;
  do {
    DirectiveLex.LexFromRawLexer(DirectiveToken);
  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
  if (FromFile == PredefinesBuffer) {
    // OutputContentUpTo() would not output anything anyway.
    return;
  }
  OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
  OutputContentUpTo(FromFile, NextToWrite,
                    SM.getFileOffset(DirectiveToken.getLocation()) +
                        DirectiveToken.getLength(),
                    LocalEOL, Line, true);
  OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
}

/// Find the next identifier in the pragma directive specified by \p RawToken.
StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
                                                Token &RawToken) {
  RawLex.LexFromRawLexer(RawToken);
  if (RawToken.is(tok::raw_identifier))
    PP.LookUpIdentifierInfo(RawToken);
  if (RawToken.is(tok::identifier))
    return RawToken.getIdentifierInfo()->getName();
  return StringRef();
}

/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
/// and including content of included files recursively.
void InclusionRewriter::Process(FileID FileId,
                                SrcMgr::CharacteristicKind FileType) {
  MemoryBufferRef FromFile;
  {
    auto B = SM.getBufferOrNone(FileId);
    assert(B && "Attempting to process invalid inclusion");
    if (B)
      FromFile = *B;
  }
  StringRef FileName = FromFile.getBufferIdentifier();
  Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
  RawLex.SetCommentRetentionState(false);

  StringRef LocalEOL = FromFile.getBuffer().detectEOL();

  // Per the GNU docs: "1" indicates entering a new file.
  if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
    WriteLineInfo(FileName, 1, FileType, "");
  else
    WriteLineInfo(FileName, 1, FileType, " 1");

  if (SM.getFileIDSize(FileId) == 0)
    return;

  // The next byte to be copied from the source file, which may be non-zero if
  // the lexer handled a BOM.
  unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
  assert(SM.getLineNumber(FileId, NextToWrite) == 1);
  int Line = 1; // The current input file line number.

  Token RawToken;
  RawLex.LexFromRawLexer(RawToken);

  // TODO: Consider adding a switch that strips possibly unimportant content,
  // such as comments, to reduce the size of repro files.
  while (RawToken.isNot(tok::eof)) {
    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
      RawLex.setParsingPreprocessorDirective(true);
      Token HashToken = RawToken;
      RawLex.LexFromRawLexer(RawToken);
      if (RawToken.is(tok::raw_identifier))
        PP.LookUpIdentifierInfo(RawToken);
      if (RawToken.getIdentifierInfo() != nullptr) {
        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
          case tok::pp_include:
          case tok::pp_include_next:
          case tok::pp_import: {
            CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
              Line);
            if (FileId != PP.getPredefinesFileID())
              WriteLineInfo(FileName, Line - 1, FileType, "");
            StringRef LineInfoExtra;
            SourceLocation Loc = HashToken.getLocation();
            if (const Module *Mod = FindModuleAtLocation(Loc))
              WriteImplicitModuleImport(Mod);
            else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
              const Module *Mod = FindEnteredModule(Loc);
              if (Mod)
                OS << "#pragma clang module begin "
                   << Mod->getFullModuleName(true) << "\n";

              // Include and recursively process the file.
              Process(Inc->Id, Inc->FileType);

              if (Mod)
                OS << "#pragma clang module end /*"
                   << Mod->getFullModuleName(true) << "*/\n";

              // Add line marker to indicate we're returning from an included
              // file.
              LineInfoExtra = " 2";
            }
            // fix up lineinfo (since commented out directive changed line
            // numbers) for inclusions that were skipped due to header guards
            WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
            break;
          }
          case tok::pp_pragma: {
            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
            if (Identifier == "clang" || Identifier == "GCC") {
              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
                // keep the directive in, commented out
                CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
                  NextToWrite, Line);
                // update our own type
                FileType = SM.getFileCharacteristic(RawToken.getLocation());
                WriteLineInfo(FileName, Line, FileType);
              }
            } else if (Identifier == "once") {
              // keep the directive in, commented out
              CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
                NextToWrite, Line);
              WriteLineInfo(FileName, Line, FileType);
            }
            break;
          }
          case tok::pp_if:
          case tok::pp_elif: {
            bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
                         tok::pp_elif);
            bool isTrue = IsIfAtLocationTrue(RawToken.getLocation());
            OutputContentUpTo(FromFile, NextToWrite,
                              SM.getFileOffset(HashToken.getLocation()),
                              LocalEOL, Line, /*EnsureNewline=*/true);
            do {
              RawLex.LexFromRawLexer(RawToken);
            } while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof));
            // We need to disable the old condition, but that is tricky.
            // Trying to comment it out can easily lead to comment nesting.
            // So instead make the condition harmless by making it enclose
            // and empty block. Moreover, put it itself inside an #if 0 block
            // to disable it from getting evaluated (e.g. __has_include_next
            // warns if used from the primary source file).
            OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL;
            if (elif) {
              OS << "#if 0" << MainEOL;
            }
            OutputContentUpTo(FromFile, NextToWrite,
                              SM.getFileOffset(RawToken.getLocation()) +
                                  RawToken.getLength(),
                              LocalEOL, Line, /*EnsureNewline=*/true);
            // Close the empty block and the disabling block.
            OS << "#endif" << MainEOL;
            OS << "#endif /* disabled by -frewrite-includes */" << MainEOL;
            OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0")
               << " /* evaluated by -frewrite-includes */" << MainEOL;
            WriteLineInfo(FileName, Line, FileType);
            break;
          }
          case tok::pp_endif:
          case tok::pp_else: {
            // We surround every #include by #if 0 to comment it out, but that
            // changes line numbers. These are fixed up right after that, but
            // the whole #include could be inside a preprocessor conditional
            // that is not processed. So it is necessary to fix the line
            // numbers one the next line after each #else/#endif as well.
            RawLex.SetKeepWhitespaceMode(true);
            do {
              RawLex.LexFromRawLexer(RawToken);
            } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
            OutputContentUpTo(FromFile, NextToWrite,
                              SM.getFileOffset(RawToken.getLocation()) +
                                  RawToken.getLength(),
                              LocalEOL, Line, /*EnsureNewline=*/ true);
            WriteLineInfo(FileName, Line, FileType);
            RawLex.SetKeepWhitespaceMode(false);
            break;
          }
          default:
            break;
        }
      }
      RawLex.setParsingPreprocessorDirective(false);
    }
    RawLex.LexFromRawLexer(RawToken);
  }
  OutputContentUpTo(FromFile, NextToWrite,
                    SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
                    Line, /*EnsureNewline=*/true);
}

/// InclusionRewriterInInput - Implement -frewrite-includes mode.
void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
                                   const PreprocessorOutputOptions &Opts) {
  SourceManager &SM = PP.getSourceManager();
  InclusionRewriter *Rewrite = new InclusionRewriter(
      PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
  Rewrite->detectMainFileEOL();

  PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
  PP.IgnorePragmas();

  // First let the preprocessor process the entire file and call callbacks.
  // Callbacks will record which #include's were actually performed.
  PP.EnterMainSourceFile();
  Token Tok;
  // Only preprocessor directives matter here, so disable macro expansion
  // everywhere else as an optimization.
  // TODO: It would be even faster if the preprocessor could be switched
  // to a mode where it would parse only preprocessor directives and comments,
  // nothing else matters for parsing or processing.
  PP.SetMacroExpansionOnlyInDirectives();
  do {
    PP.Lex(Tok);
    if (Tok.is(tok::annot_module_begin))
      Rewrite->handleModuleBegin(Tok);
  } while (Tok.isNot(tok::eof));
  Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID()));
  Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
  OS->flush();
}