Compiler projects using llvm
//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
//   - leaf nodes correspond to tokens,
//   - tree nodes correspond to language grammar constructs.
//
// The tree is initially built from an AST. Each node of a newly built tree
// covers a continous subrange of expanded tokens (i.e. tokens after
// preprocessing), the specific tokens coverered are stored in the leaf nodes of
// a tree. A post-order traversal of a tree will visit leaf nodes in an order
// corresponding the original order of expanded tokens.
//
// This is still work in progress and highly experimental, we leave room for
// ourselves to completely change the design and/or implementation.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H

#include "clang/Basic/TokenKinds.h"
#include "clang/Tooling/Syntax/TokenManager.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>
#include <vector>

namespace clang {
namespace syntax {

/// A memory arena for syntax trees.
// FIXME: use BumpPtrAllocator directly.
class Arena {
public:
  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
private:
  /// Keeps all the allocated nodes and their intermediate data structures.
  llvm::BumpPtrAllocator Allocator;
};

class Tree;
class TreeBuilder;
class FactoryImpl;
class MutationsImpl;

enum class NodeKind : uint16_t;
enum class NodeRole : uint8_t;

/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
/// a Tree (representing language constructrs).
class Node {
protected:
  /// Newly created nodes are detached from a tree, parent and sibling links are
  /// set when the node is added as a child to another one.
  Node(NodeKind Kind);
  /// Nodes are allocated on Arenas; the destructor is never called.
  ~Node() = default;

public:
  /// Nodes cannot simply be copied without violating tree invariants.
  Node(const Node &) = delete;
  Node &operator=(const Node &) = delete;
  /// Idiomatically, nodes are allocated on an Arena and never moved.
  Node(Node &&) = delete;
  Node &operator=(Node &&) = delete;

  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
  NodeRole getRole() const { return static_cast<NodeRole>(Role); }

  /// Whether the node is detached from a tree, i.e. does not have a parent.
  bool isDetached() const;
  /// Whether the node was created from the AST backed by the source code
  /// rather than added later through mutation APIs or created with factory
  /// functions.
  /// When this flag is true, all subtrees are also original.
  /// This flag is set to false on any modifications to the node or any of its
  /// subtrees, even if this simply involves swapping existing subtrees.
  bool isOriginal() const { return Original; }
  /// If this function return false, the tree cannot be modified because there
  /// is no reasonable way to produce the corresponding textual replacements.
  /// This can happen when the node crosses macro expansion boundaries.
  ///
  /// Note that even if the node is not modifiable, its child nodes can be
  /// modifiable.
  bool canModify() const { return CanModify; }

  const Tree *getParent() const { return Parent; }
  Tree *getParent() { return Parent; }

  const Node *getNextSibling() const { return NextSibling; }
  Node *getNextSibling() { return NextSibling; }
  const Node *getPreviousSibling() const { return PreviousSibling; }
  Node *getPreviousSibling() { return PreviousSibling; }

  /// Dumps the structure of a subtree. For debugging and testing purposes.
  std::string dump(const TokenManager &SM) const;
  /// Dumps the tokens forming this subtree.
  std::string dumpTokens(const TokenManager &SM) const;

  /// Asserts invariants on this node of the tree and its immediate children.
  /// Will not recurse into the subtree. No-op if NDEBUG is set.
  void assertInvariants() const;
  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
  void assertInvariantsRecursive() const;

private:
  // Tree is allowed to change the Parent link and Role.
  friend class Tree;
  // TreeBuilder is allowed to set the Original and CanModify flags.
  friend class TreeBuilder;
  // MutationsImpl sets roles and CanModify flag.
  friend class MutationsImpl;
  // FactoryImpl sets CanModify flag.
  friend class FactoryImpl;

  void setRole(NodeRole NR);

  Tree *Parent;
  Node *NextSibling;
  Node *PreviousSibling;
  unsigned Kind : 16;
  unsigned Role : 8;
  unsigned Original : 1;
  unsigned CanModify : 1;
};

/// A leaf node points to a single token.
// FIXME: add TokenKind field (borrow some bits from the Node::kind).
class Leaf final : public Node {
public:
  Leaf(TokenManager::Key K);
  static bool classof(const Node *N);

  TokenManager::Key getTokenKey() const { return K; }

private:
  TokenManager::Key K;
};

/// A node that has children and represents a syntactic language construct.
class Tree : public Node {
  /// Iterator over children (common base for const/non-const).
  /// Not invalidated by tree mutations (holds a stable node pointer).
  template <typename DerivedT, typename NodeT>
  class ChildIteratorBase
      : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
                                          NodeT> {
  protected:
    NodeT *N = nullptr;
    using Base = ChildIteratorBase;

  public:
    ChildIteratorBase() = default;
    explicit ChildIteratorBase(NodeT *N) : N(N) {}

    friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
      return LHS.N == RHS.N;
    }

    NodeT &operator*() const { return *N; }
    DerivedT &operator++() {
      N = N->getNextSibling();
      return *static_cast<DerivedT *>(this);
    }

    /// Truthy if valid (not past-the-end).
    /// This allows: if (auto It = find_if(N.children(), ...) )
    explicit operator bool() const { return N != nullptr; }
    /// The element, or nullptr if past-the-end.
    NodeT *asPointer() const { return N; }
  };

public:
  static bool classof(const Node *N);

  Node *getFirstChild() { return FirstChild; }
  const Node *getFirstChild() const { return FirstChild; }
  Node *getLastChild() { return LastChild; }
  const Node *getLastChild() const { return LastChild; }

  const Leaf *findFirstLeaf() const;
  Leaf *findFirstLeaf() {
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
  }

  const Leaf *findLastLeaf() const;
  Leaf *findLastLeaf() {
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
  }

  /// child_iterator is not invalidated by mutations.
  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
    using Base::ChildIteratorBase;
  };
  struct ConstChildIterator
      : ChildIteratorBase<ConstChildIterator, const Node> {
    using Base::ChildIteratorBase;
    ConstChildIterator() = default;
    ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
  };

  llvm::iterator_range<ChildIterator> getChildren() {
    return {ChildIterator(getFirstChild()), ChildIterator()};
  }
  llvm::iterator_range<ConstChildIterator> getChildren() const {
    return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
  }

  /// Find the first node with a corresponding role.
  const Node *findChild(NodeRole R) const;
  Node *findChild(NodeRole R) {
    return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
  }

protected:
  using Node::Node;

private:
  /// Append \p Child to the list of children and sets the parent pointer.
  /// A very low-level operation that does not check any invariants, only used
  /// by TreeBuilder and FactoryImpl.
  /// EXPECTS: Role != Detached.
  void appendChildLowLevel(Node *Child, NodeRole Role);
  /// Similar but prepends.
  void prependChildLowLevel(Node *Child, NodeRole Role);

  /// Like the previous overloads, but does not set role for \p Child.
  /// EXPECTS: Child->Role != Detached
  void appendChildLowLevel(Node *Child);
  void prependChildLowLevel(Node *Child);
  friend class TreeBuilder;
  friend class FactoryImpl;

  /// Replace a range of children [Begin, End) with a list of
  /// new nodes starting at \p New.
  /// Only used by MutationsImpl to implement higher-level mutation operations.
  /// (!) \p New can be null to model removal of the child range.
  /// (!) \p End can be null to model one past the end.
  /// (!) \p Begin can be null to model an append.
  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
  friend class MutationsImpl;

  Node *FirstChild = nullptr;
  Node *LastChild = nullptr;
};

/// A list of Elements separated or terminated by a fixed token.
///
/// This type models the following grammar construct:
/// delimited-list(element, delimiter, termination, canBeEmpty)
class List : public Tree {
public:
  template <typename Element> struct ElementAndDelimiter {
    Element *element;
    Leaf *delimiter;
  };

  enum class TerminationKind {
    Terminated,
    MaybeTerminated,
    Separated,
  };

  using Tree::Tree;
  static bool classof(const Node *N);
  /// Returns the elements and corresponding delimiters. Missing elements
  /// and delimiters are represented as null pointers.
  ///
  /// For example, in a separated list:
  /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
  /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
  /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
  /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
  ///
  /// In a terminated or maybe-terminated list:
  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
  /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
  /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
  /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();

  /// Returns the elements of the list. Missing elements are represented
  /// as null pointers in the same way as in the return value of
  /// `getElementsAsNodesAndDelimiters()`.
  std::vector<Node *> getElementsAsNodes();

  // These can't be implemented with the information we have!

  /// Returns the appropriate delimiter for this list.
  ///
  /// Useful for discovering the correct delimiter to use when adding
  /// elements to empty or one-element lists.
  clang::tok::TokenKind getDelimiterTokenKind() const;

  TerminationKind getTerminationKind() const;

  /// Whether this list can be empty in syntactically and semantically correct
  /// code.
  ///
  /// This list may be empty when the source code has errors even if
  /// canBeEmpty() returns false.
  bool canBeEmpty() const;
};

} // namespace syntax
} // namespace clang

#endif