//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/Utils.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Serialization/ASTReader.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <unordered_map>
namespace clang {
namespace tooling {
namespace dependencies {
class DependencyConsumer;
/// Modular dependency that has already been built prior to the dependency scan.
struct PrebuiltModuleDep {
std::string ModuleName;
std::string PCMFile;
std::string ModuleMapFile;
explicit PrebuiltModuleDep(const Module *M)
: ModuleName(M->getTopLevelModuleName()),
PCMFile(M->getASTFile()->getName()),
ModuleMapFile(M->PresumedModuleMapFile) {}
};
/// This is used to identify a specific module.
struct ModuleID {
/// The name of the module. This may include `:` for C++20 module partitions,
/// or a header-name for C++20 header units.
std::string ModuleName;
/// The context hash of a module represents the set of compiler options that
/// may make one version of a module incompatible with another. This includes
/// things like language mode, predefined macros, header search paths, etc...
///
/// Modules with the same name but a different \c ContextHash should be
/// treated as separate modules for the purpose of a build.
std::string ContextHash;
bool operator==(const ModuleID &Other) const {
return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
}
};
struct ModuleIDHasher {
std::size_t operator()(const ModuleID &MID) const {
return llvm::hash_combine(MID.ModuleName, MID.ContextHash);
}
};
/// An output from a module compilation, such as the path of the module file.
enum class ModuleOutputKind {
/// The module file (.pcm). Required.
ModuleFile,
/// The path of the dependency file (.d), if any.
DependencyFile,
/// The null-separated list of names to use as the targets in the dependency
/// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
DependencyTargets,
/// The path of the serialized diagnostic file (.dia), if any.
DiagnosticSerializationFile,
};
struct ModuleDeps {
/// The identifier of the module.
ModuleID ID;
/// Whether this is a "system" module.
bool IsSystem;
/// The path to the modulemap file which defines this module.
///
/// This can be used to explicitly build this module. This file will
/// additionally appear in \c FileDeps as a dependency.
std::string ClangModuleMapFile;
/// The path to where an implicit build would put the PCM for this module.
std::string ImplicitModulePCMPath;
/// A collection of absolute paths to files that this module directly depends
/// on, not including transitive dependencies.
llvm::StringSet<> FileDeps;
/// A collection of absolute paths to module map files that this module needs
/// to know about.
std::vector<std::string> ModuleMapFileDeps;
/// A collection of prebuilt modular dependencies this module directly depends
/// on, not including transitive dependencies.
std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
/// A list of module identifiers this module directly depends on, not
/// including transitive dependencies.
///
/// This may include modules with a different context hash when it can be
/// determined that the differences are benign for this compilation.
std::vector<ModuleID> ClangModuleDeps;
// Used to track which modules that were discovered were directly imported by
// the primary TU.
bool ImportedByMainFile = false;
/// Whether the TU had a dependency file. The path in \c BuildInvocation is
/// cleared to avoid leaking the specific path from the TU into the module.
bool HadDependencyFile = false;
/// Whether the TU had serialized diagnostics. The path in \c BuildInvocation
/// is cleared to avoid leaking the specific path from the TU into the module.
bool HadSerializedDiagnostics = false;
/// Compiler invocation that can be used to build this module (without paths).
CompilerInvocation BuildInvocation;
/// Gets the canonical command line suitable for passing to clang.
///
/// \param LookupModuleOutput This function is called to fill in
/// "-fmodule-file=", "-o" and other output
/// arguments.
std::vector<std::string> getCanonicalCommandLine(
llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
LookupModuleOutput) const;
/// Gets the canonical command line suitable for passing to clang, excluding
/// "-fmodule-file=" and "-o" arguments.
std::vector<std::string> getCanonicalCommandLineWithoutModulePaths() const;
};
class ModuleDepCollector;
/// Callback that records textual includes and direct modular includes/imports
/// during preprocessing. At the end of the main file, it also collects
/// transitive modular dependencies and passes everything to the
/// \c DependencyConsumer of the parent \c ModuleDepCollector.
class ModuleDepCollectorPP final : public PPCallbacks {
public:
ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override;
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
StringRef FileName, bool IsAngled,
CharSourceRange FilenameRange,
Optional<FileEntryRef> File, StringRef SearchPath,
StringRef RelativePath, const Module *Imported,
SrcMgr::CharacteristicKind FileType) override;
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
const Module *Imported) override;
void EndOfMainFile() override;
private:
/// The parent dependency collector.
ModuleDepCollector &MDC;
/// Working set of direct modular dependencies.
llvm::SetVector<const Module *> DirectModularDeps;
/// Working set of direct modular dependencies that have already been built.
llvm::SetVector<const Module *> DirectPrebuiltModularDeps;
void handleImport(const Module *Imported);
/// Adds direct modular dependencies that have already been built to the
/// ModuleDeps instance.
void
addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &SeenSubmodules);
void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &SeenSubmodules);
/// Traverses the previously collected direct modular dependencies to discover
/// transitive modular dependencies and fills the parent \c ModuleDepCollector
/// with both.
ModuleID handleTopLevelModule(const Module *M);
void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
void addModuleDep(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
};
/// Collects modular and non-modular dependencies of the main file by attaching
/// \c ModuleDepCollectorPP to the preprocessor.
class ModuleDepCollector final : public DependencyCollector {
public:
ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
CompilerInstance &ScanInstance, DependencyConsumer &C,
CompilerInvocation &&OriginalCI, bool OptimizeArgs);
void attachToPreprocessor(Preprocessor &PP) override;
void attachToASTReader(ASTReader &R) override;
private:
friend ModuleDepCollectorPP;
/// The compiler instance for scanning the current translation unit.
CompilerInstance &ScanInstance;
/// The consumer of collected dependency information.
DependencyConsumer &Consumer;
/// Path to the main source file.
std::string MainFile;
/// Hash identifying the compilation conditions of the current TU.
std::string ContextHash;
/// Non-modular file dependencies. This includes the main source file and
/// textually included header files.
std::vector<std::string> FileDeps;
/// Direct and transitive modular dependencies of the main source file.
llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
/// Options that control the dependency output generation.
std::unique_ptr<DependencyOutputOptions> Opts;
/// The original Clang invocation passed to dependency scanner.
CompilerInvocation OriginalInvocation;
/// Whether to optimize the modules' command-line arguments.
bool OptimizeArgs;
/// Checks whether the module is known as being prebuilt.
bool isPrebuiltModule(const Module *M);
/// Constructs a CompilerInvocation that can be used to build the given
/// module, excluding paths to discovered modular dependencies that are yet to
/// be built.
CompilerInvocation makeInvocationForModuleBuildWithoutPaths(
const ModuleDeps &Deps,
llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
};
} // end namespace dependencies
} // end namespace tooling
} // end namespace clang
#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H