Compiler projects using llvm
//===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines CachedHashString and CachedHashStringRef.  These are
/// owning and not-owning string types that store their hash in addition to
/// their string data.
///
/// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
/// (because, unlike std::string, CachedHashString lets us have empty and
/// tombstone values).
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_CACHEDHASHSTRING_H
#define LLVM_ADT_CACHEDHASHSTRING_H

#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/StringRef.h"

namespace llvm {

/// A container which contains a StringRef plus a precomputed hash.
class CachedHashStringRef {
  const char *P;
  uint32_t Size;
  uint32_t Hash;

public:
  // Explicit because hashing a string isn't free.
  explicit CachedHashStringRef(StringRef S)
      : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}

  CachedHashStringRef(StringRef S, uint32_t Hash)
      : P(S.data()), Size(S.size()), Hash(Hash) {
    assert(S.size() <= std::numeric_limits<uint32_t>::max());
  }

  StringRef val() const { return StringRef(P, Size); }
  const char *data() const { return P; }
  uint32_t size() const { return Size; }
  uint32_t hash() const { return Hash; }
};

template <> struct DenseMapInfo<CachedHashStringRef> {
  static CachedHashStringRef getEmptyKey() {
    return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
  }
  static CachedHashStringRef getTombstoneKey() {
    return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
  }
  static unsigned getHashValue(const CachedHashStringRef &S) {
    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
    return S.hash();
  }
  static bool isEqual(const CachedHashStringRef &LHS,
                      const CachedHashStringRef &RHS) {
    return LHS.hash() == RHS.hash() &&
           DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
  }
};

/// A container which contains a string, which it owns, plus a precomputed hash.
///
/// We do not null-terminate the string.
class CachedHashString {
  friend struct DenseMapInfo<CachedHashString>;

  char *P;
  uint32_t Size;
  uint32_t Hash;

  static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
  static char *getTombstoneKeyPtr() {
    return DenseMapInfo<char *>::getTombstoneKey();
  }

  bool isEmptyOrTombstone() const {
    return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
  }

  struct ConstructEmptyOrTombstoneTy {};

  CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
      : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
    assert(isEmptyOrTombstone());
  }

  // TODO: Use small-string optimization to avoid allocating.

public:
  explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}

  // Explicit because copying and hashing a string isn't free.
  explicit CachedHashString(StringRef S)
      : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}

  CachedHashString(StringRef S, uint32_t Hash)
      : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
    memcpy(P, S.data(), S.size());
  }

  // Ideally this class would not be copyable.  But SetVector requires copyable
  // keys, and we want this to be usable there.
  CachedHashString(const CachedHashString &Other)
      : Size(Other.Size), Hash(Other.Hash) {
    if (Other.isEmptyOrTombstone()) {
      P = Other.P;
    } else {
      P = new char[Size];
      memcpy(P, Other.P, Size);
    }
  }

  CachedHashString &operator=(CachedHashString Other) {
    swap(*this, Other);
    return *this;
  }

  CachedHashString(CachedHashString &&Other) noexcept
      : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
    Other.P = getEmptyKeyPtr();
  }

  ~CachedHashString() {
    if (!isEmptyOrTombstone())
      delete[] P;
  }

  StringRef val() const { return StringRef(P, Size); }
  uint32_t size() const { return Size; }
  uint32_t hash() const { return Hash; }

  operator StringRef() const { return val(); }
  operator CachedHashStringRef() const {
    return CachedHashStringRef(val(), Hash);
  }

  friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
    using std::swap;
    swap(LHS.P, RHS.P);
    swap(LHS.Size, RHS.Size);
    swap(LHS.Hash, RHS.Hash);
  }
};

template <> struct DenseMapInfo<CachedHashString> {
  static CachedHashString getEmptyKey() {
    return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
                            CachedHashString::getEmptyKeyPtr());
  }
  static CachedHashString getTombstoneKey() {
    return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
                            CachedHashString::getTombstoneKeyPtr());
  }
  static unsigned getHashValue(const CachedHashString &S) {
    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
    return S.hash();
  }
  static bool isEqual(const CachedHashString &LHS,
                      const CachedHashString &RHS) {
    if (LHS.hash() != RHS.hash())
      return false;
    if (LHS.P == CachedHashString::getEmptyKeyPtr())
      return RHS.P == CachedHashString::getEmptyKeyPtr();
    if (LHS.P == CachedHashString::getTombstoneKeyPtr())
      return RHS.P == CachedHashString::getTombstoneKeyPtr();

    // This is safe because if RHS.P is the empty or tombstone key, it will have
    // length 0, so we'll never dereference its pointer.
    return LHS.val() == RHS.val();
  }
};

} // namespace llvm

#endif