Compiler projects using llvm
//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"

#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"

#include "llvm/Support/FormatProviders.h"

#include <type_traits>

namespace llvm {
class raw_ostream;
namespace codeview {

/// A locally hashed type represents a straightforward hash code of a serialized
/// record.  The record is simply serialized, and then the bytes are hashed by
/// a standard algorithm.  This is sufficient for the case of de-duplicating
/// records within a single sequence of types, because if two records both have
/// a back-reference to the same type in the same stream, they will both have
/// the same numeric value for the TypeIndex of the back reference.
struct LocallyHashedType {
  hash_code Hash;
  ArrayRef<uint8_t> RecordData;

  /// Given a type, compute its local hash.
  static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);

  /// Given a sequence of types, compute all of the local hashes.
  template <typename Range>
  static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
    std::vector<LocallyHashedType> Hashes;
    Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
    for (const auto &R : Records)
      Hashes.push_back(hashType(R));

    return Hashes;
  }

  static std::vector<LocallyHashedType>
  hashTypeCollection(TypeCollection &Types) {
    std::vector<LocallyHashedType> Hashes;
    Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
      Hashes.push_back(hashType(Type.RecordData));
    });
    return Hashes;
  }
};

enum class GlobalTypeHashAlg : uint16_t {
  SHA1 = 0, // standard 20-byte SHA1 hash
  SHA1_8    // last 8-bytes of standard SHA1 hash
};

/// A globally hashed type represents a hash value that is sufficient to
/// uniquely identify a record across multiple type streams or type sequences.
/// This works by, for any given record A which references B, replacing the
/// TypeIndex that refers to B with a previously-computed global hash for B.  As
/// this is a recursive algorithm (e.g. the global hash of B also depends on the
/// global hashes of the types that B refers to), a global hash can uniquely
/// identify identify that A occurs in another stream that has a completely
/// different graph structure.  Although the hash itself is slower to compute,
/// probing is much faster with a globally hashed type, because the hash itself
/// is considered "as good as" the original type.  Since type records can be
/// quite large, this makes the equality comparison of the hash much faster than
/// equality comparison of a full record.
struct GloballyHashedType {
  GloballyHashedType() = default;
  GloballyHashedType(StringRef H)
      : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
  GloballyHashedType(ArrayRef<uint8_t> H) {
    assert(H.size() == 8);
    ::memcpy(Hash.data(), H.data(), 8);
  }
  std::array<uint8_t, 8> Hash;

  bool empty() const { return *(const uint64_t*)Hash.data() == 0; }

  friend inline bool operator==(const GloballyHashedType &L,
                                const GloballyHashedType &R) {
    return L.Hash == R.Hash;
  }

  friend inline bool operator!=(const GloballyHashedType &L,
                                const GloballyHashedType &R) {
    return !(L.Hash == R.Hash);
  }

  /// Given a sequence of bytes representing a record, compute a global hash for
  /// this record.  Due to the nature of global hashes incorporating the hashes
  /// of referenced records, this function requires a list of types and ids
  /// that RecordData might reference, indexable by TypeIndex.
  static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
                                     ArrayRef<GloballyHashedType> PreviousTypes,
                                     ArrayRef<GloballyHashedType> PreviousIds);

  /// Given a sequence of bytes representing a record, compute a global hash for
  /// this record.  Due to the nature of global hashes incorporating the hashes
  /// of referenced records, this function requires a list of types and ids
  /// that RecordData might reference, indexable by TypeIndex.
  static GloballyHashedType hashType(CVType Type,
                                     ArrayRef<GloballyHashedType> PreviousTypes,
                                     ArrayRef<GloballyHashedType> PreviousIds) {
    return hashType(Type.RecordData, PreviousTypes, PreviousIds);
  }

  /// Given a sequence of combined type and ID records, compute global hashes
  /// for each of them, returning the results in a vector of hashed types.
  template <typename Range>
  static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
    std::vector<GloballyHashedType> Hashes;
    bool UnresolvedRecords = false;
    for (const auto &R : Records) {
      GloballyHashedType H = hashType(R, Hashes, Hashes);
      if (H.empty())
        UnresolvedRecords = true;
      Hashes.push_back(H);
    }

    // In some rare cases, there might be records with forward references in the
    // stream. Several passes might be needed to fully hash each record in the
    // Type stream. However this occurs on very small OBJs generated by MASM,
    // with a dozen records at most. Therefore this codepath isn't
    // time-critical, as it isn't taken in 99% of cases.
    while (UnresolvedRecords) {
      UnresolvedRecords = false;
      auto HashIt = Hashes.begin();
      for (const auto &R : Records) {
        if (HashIt->empty()) {
          GloballyHashedType H = hashType(R, Hashes, Hashes);
          if (H.empty())
            UnresolvedRecords = true;
          else
            *HashIt = H;
        }
        ++HashIt;
      }
    }

    return Hashes;
  }

  /// Given a sequence of combined type and ID records, compute global hashes
  /// for each of them, returning the results in a vector of hashed types.
  template <typename Range>
  static std::vector<GloballyHashedType>
  hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
    std::vector<GloballyHashedType> IdHashes;
    for (const auto &R : Records)
      IdHashes.push_back(hashType(R, TypeHashes, IdHashes));

    return IdHashes;
  }

  static std::vector<GloballyHashedType>
  hashTypeCollection(TypeCollection &Types) {
    std::vector<GloballyHashedType> Hashes;
    Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
      Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
    });
    return Hashes;
  }
};
static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
              "GloballyHashedType must be trivially copyable so that we can "
              "reinterpret_cast arrays of hash data to arrays of "
              "GloballyHashedType");
} // namespace codeview

template <> struct DenseMapInfo<codeview::LocallyHashedType> {
  static codeview::LocallyHashedType Empty;
  static codeview::LocallyHashedType Tombstone;

  static codeview::LocallyHashedType getEmptyKey() { return Empty; }

  static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }

  static unsigned getHashValue(codeview::LocallyHashedType Val) {
    return Val.Hash;
  }

  static bool isEqual(codeview::LocallyHashedType LHS,
                      codeview::LocallyHashedType RHS) {
    if (LHS.Hash != RHS.Hash)
      return false;
    return LHS.RecordData == RHS.RecordData;
  }
};

template <> struct DenseMapInfo<codeview::GloballyHashedType> {
  static codeview::GloballyHashedType Empty;
  static codeview::GloballyHashedType Tombstone;

  static codeview::GloballyHashedType getEmptyKey() { return Empty; }

  static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }

  static unsigned getHashValue(codeview::GloballyHashedType Val) {
    return *reinterpret_cast<const unsigned *>(Val.Hash.data());
  }

  static bool isEqual(codeview::GloballyHashedType LHS,
                      codeview::GloballyHashedType RHS) {
    return LHS == RHS;
  }
};

template <> struct format_provider<codeview::LocallyHashedType> {
public:
  static void format(const codeview::LocallyHashedType &V,
                     llvm::raw_ostream &Stream, StringRef Style) {
    write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
  }
};

template <> struct format_provider<codeview::GloballyHashedType> {
public:
  static void format(const codeview::GloballyHashedType &V,
                     llvm::raw_ostream &Stream, StringRef Style) {
    for (uint8_t B : V.Hash) {
      write_hex(Stream, B, HexPrintStyle::Upper, 2);
    }
  }
};

} // namespace llvm

#endif