Projet_SETI_RISC-V/riscv-gnu-toolchain/gcc/libsanitizer/tsan/tsan_clock.cpp

//===-- tsan_clock.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
//===----------------------------------------------------------------------===//
#include "tsan_clock.h"
#include "tsan_rtl.h"
#include "sanitizer_common/sanitizer_placement_new.h"

// SyncClock and ThreadClock implement vector clocks for sync variables
// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
// ThreadClock contains fixed-size vector clock for maximum number of threads.
// SyncClock contains growable vector clock for currently necessary number of
// threads.
// Together they implement very simple model of operations, namely:
//
//   void ThreadClock::acquire(const SyncClock *src) {
//     for (int i = 0; i < kMaxThreads; i++)
//       clock[i] = max(clock[i], src->clock[i]);
//   }
//
//   void ThreadClock::release(SyncClock *dst) const {
//     for (int i = 0; i < kMaxThreads; i++)
//       dst->clock[i] = max(dst->clock[i], clock[i]);
//   }
//
//   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
//     for (int i = 0; i < kMaxThreads; i++) {
//       tmp = clock[i];
//       clock[i] = max(clock[i], sc->clock[i]);
//       sc->clock[i] = tmp;
//     }
//   }
//
//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
//     for (int i = 0; i < kMaxThreads; i++)
//       dst->clock[i] = clock[i];
//   }
//
//   void ThreadClock::acq_rel(SyncClock *dst) {
//     acquire(dst);
//     release(dst);
//   }
//
// Conformance to this model is extensively verified in tsan_clock_test.cpp.
// However, the implementation is significantly more complex. The complexity
// allows to implement important classes of use cases in O(1) instead of O(N).
//
// The use cases are:
// 1. Singleton/once atomic that has a single release-store operation followed
//    by zillions of acquire-loads (the acquire-load is O(1)).
// 2. Thread-local mutex (both lock and unlock can be O(1)).
// 3. Leaf mutex (unlock is O(1)).
// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
// 5. An atomic with a single writer (writes can be O(1)).
// The implementation dynamically adopts to workload. So if an atomic is in
// read-only phase, these reads will be O(1); if it later switches to read/write
// phase, the implementation will correctly handle that by switching to O(N).
//
// Thread-safety note: all const operations on SyncClock's are conducted under
// a shared lock; all non-const operations on SyncClock's are conducted under
// an exclusive lock; ThreadClock's are private to respective threads and so
// do not need any protection.
//
// Description of SyncClock state:
// clk_ - variable size vector clock, low kClkBits hold timestamp,
//   the remaining bits hold "acquired" flag (the actual value is thread's
//   reused counter);
//   if acquired == thr->reused_, then the respective thread has already
//   acquired this clock (except possibly for dirty elements).
// dirty_ - holds up to two indices in the vector clock that other threads
//   need to acquire regardless of "acquired" flag value;
// release_store_tid_ - denotes that the clock state is a result of
//   release-store operation by the thread with release_store_tid_ index.
// release_store_reused_ - reuse count of release_store_tid_.

namespace __tsan {

static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
  return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
}

// Drop reference to the first level block idx.
static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
  ClockBlock *cb = ctx->clock_alloc.Map(idx);
  atomic_uint32_t *ref = ref_ptr(cb);
  u32 v = atomic_load(ref, memory_order_acquire);
  for (;;) {
    CHECK_GT(v, 0);
    if (v == 1)
      break;
    if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
      return;
  }
  // First level block owns second level blocks, so them as well.
  for (uptr i = 0; i < blocks; i++)
    ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
  ctx->clock_alloc.Free(c, idx);
}

ThreadClock::ThreadClock(unsigned tid, unsigned reused)
    : tid_(tid)
    , reused_(reused + 1)  // 0 has special meaning
    , last_acquire_()
    , global_acquire_()
    , cached_idx_()
    , cached_size_()
    , cached_blocks_() {
  CHECK_LT(tid, kMaxTidInClock);
  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
  nclk_ = tid_ + 1;
  internal_memset(clk_, 0, sizeof(clk_));
}

void ThreadClock::ResetCached(ClockCache *c) {
  if (cached_idx_) {
    UnrefClockBlock(c, cached_idx_, cached_blocks_);
    cached_idx_ = 0;
    cached_size_ = 0;
    cached_blocks_ = 0;
  }
}

void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(src->size_, kMaxTid);

  // Check if it's empty -> no need to do anything.
  const uptr nclk = src->size_;
  if (nclk == 0)
    return;

  bool acquired = false;
  for (unsigned i = 0; i < kDirtyTids; i++) {
    SyncClock::Dirty dirty = src->dirty_[i];
    unsigned tid = dirty.tid();
    if (tid != kInvalidTid) {
      if (clk_[tid] < dirty.epoch) {
        clk_[tid] = dirty.epoch;
        acquired = true;
      }
    }
  }

  // Check if we've already acquired src after the last release operation on src
  if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
    // O(N) acquire.
    nclk_ = max(nclk_, nclk);
    u64 *dst_pos = &clk_[0];
    for (ClockElem &src_elem : *src) {
      u64 epoch = src_elem.epoch;
      if (*dst_pos < epoch) {
        *dst_pos = epoch;
        acquired = true;
      }
      dst_pos++;
    }

    // Remember that this thread has acquired this clock.
    if (nclk > tid_)
      src->elem(tid_).reused = reused_;
  }

  if (acquired) {
    last_acquire_ = clk_[tid_];
    ResetCached(c);
  }
}

void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(sc->size_, kMaxTid);

  if (sc->size_ == 0) {
    // ReleaseStore will correctly set release_store_tid_,
    // which can be important for future operations.
    ReleaseStore(c, sc);
    return;
  }

  nclk_ = max(nclk_, (uptr) sc->size_);

  // Check if we need to resize sc.
  if (sc->size_ < nclk_)
    sc->Resize(c, nclk_);

  bool acquired = false;

  sc->Unshare(c);
  // Update sc->clk_.
  sc->FlushDirty();
  uptr i = 0;
  for (ClockElem &ce : *sc) {
    u64 tmp = clk_[i];
    if (clk_[i] < ce.epoch) {
      clk_[i] = ce.epoch;
      acquired = true;
    }
    ce.epoch = tmp;
    ce.reused = 0;
    i++;
  }
  sc->release_store_tid_ = kInvalidTid;
  sc->release_store_reused_ = 0;

  if (acquired) {
    last_acquire_ = clk_[tid_];
    ResetCached(c);
  }
}

void ThreadClock::release(ClockCache *c, SyncClock *dst) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(dst->size_, kMaxTid);

  if (dst->size_ == 0) {
    // ReleaseStore will correctly set release_store_tid_,
    // which can be important for future operations.
    ReleaseStore(c, dst);
    return;
  }

  // Check if we need to resize dst.
  if (dst->size_ < nclk_)
    dst->Resize(c, nclk_);

  // Check if we had not acquired anything from other threads
  // since the last release on dst. If so, we need to update
  // only dst->elem(tid_).
  if (!HasAcquiredAfterRelease(dst)) {
    UpdateCurrentThread(c, dst);
    if (dst->release_store_tid_ != tid_ ||
        dst->release_store_reused_ != reused_)
      dst->release_store_tid_ = kInvalidTid;
    return;
  }

  // O(N) release.
  dst->Unshare(c);
  // First, remember whether we've acquired dst.
  bool acquired = IsAlreadyAcquired(dst);
  // Update dst->clk_.
  dst->FlushDirty();
  uptr i = 0;
  for (ClockElem &ce : *dst) {
    ce.epoch = max(ce.epoch, clk_[i]);
    ce.reused = 0;
    i++;
  }
  // Clear 'acquired' flag in the remaining elements.
  dst->release_store_tid_ = kInvalidTid;
  dst->release_store_reused_ = 0;
  // If we've acquired dst, remember this fact,
  // so that we don't need to acquire it on next acquire.
  if (acquired)
    dst->elem(tid_).reused = reused_;
}

void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
  DCHECK_LE(nclk_, kMaxTid);
  DCHECK_LE(dst->size_, kMaxTid);

  if (dst->size_ == 0 && cached_idx_ != 0) {
    // Reuse the cached clock.
    // Note: we could reuse/cache the cached clock in more cases:
    // we could update the existing clock and cache it, or replace it with the
    // currently cached clock and release the old one. And for a shared
    // existing clock, we could replace it with the currently cached;
    // or unshare, update and cache. But, for simplicity, we currently reuse
    // cached clock only when the target clock is empty.
    dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
    dst->tab_idx_ = cached_idx_;
    dst->size_ = cached_size_;
    dst->blocks_ = cached_blocks_;
    CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
    // The cached clock is shared (immutable),
    // so this is where we store the current clock.
    dst->dirty_[0].set_tid(tid_);
    dst->dirty_[0].epoch = clk_[tid_];
    dst->release_store_tid_ = tid_;
    dst->release_store_reused_ = reused_;
    // Remember that we don't need to acquire it in future.
    dst->elem(tid_).reused = reused_;
    // Grab a reference.
    atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
    return;
  }

  // Check if we need to resize dst.
  if (dst->size_ < nclk_)
    dst->Resize(c, nclk_);

  if (dst->release_store_tid_ == tid_ &&
      dst->release_store_reused_ == reused_ &&
      !HasAcquiredAfterRelease(dst)) {
    UpdateCurrentThread(c, dst);
    return;
  }

  // O(N) release-store.
  dst->Unshare(c);
  // Note: dst can be larger than this ThreadClock.
  // This is fine since clk_ beyond size is all zeros.
  uptr i = 0;
  for (ClockElem &ce : *dst) {
    ce.epoch = clk_[i];
    ce.reused = 0;
    i++;
  }
  for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
  dst->release_store_tid_ = tid_;
  dst->release_store_reused_ = reused_;
  // Remember that we don't need to acquire it in future.
  dst->elem(tid_).reused = reused_;

  // If the resulting clock is cachable, cache it for future release operations.
  // The clock is always cachable if we released to an empty sync object.
  if (cached_idx_ == 0 && dst->Cachable()) {
    // Grab a reference to the ClockBlock.
    atomic_uint32_t *ref = ref_ptr(dst->tab_);
    if (atomic_load(ref, memory_order_acquire) == 1)
      atomic_store_relaxed(ref, 2);
    else
      atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
    cached_idx_ = dst->tab_idx_;
    cached_size_ = dst->size_;
    cached_blocks_ = dst->blocks_;
  }
}

void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
  acquire(c, dst);
  ReleaseStore(c, dst);
}

// Updates only single element related to the current thread in dst->clk_.
void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
  // Update the threads time, but preserve 'acquired' flag.
  for (unsigned i = 0; i < kDirtyTids; i++) {
    SyncClock::Dirty *dirty = &dst->dirty_[i];
    const unsigned tid = dirty->tid();
    if (tid == tid_ || tid == kInvalidTid) {
      dirty->set_tid(tid_);
      dirty->epoch = clk_[tid_];
      return;
    }
  }
  // Reset all 'acquired' flags, O(N).
  // We are going to touch dst elements, so we need to unshare it.
  dst->Unshare(c);
  dst->elem(tid_).epoch = clk_[tid_];
  for (uptr i = 0; i < dst->size_; i++)
    dst->elem(i).reused = 0;
  dst->FlushDirty();
}

// Checks whether the current thread has already acquired src.
bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
  if (src->elem(tid_).reused != reused_)
    return false;
  for (unsigned i = 0; i < kDirtyTids; i++) {
    SyncClock::Dirty dirty = src->dirty_[i];
    if (dirty.tid() != kInvalidTid) {
      if (clk_[dirty.tid()] < dirty.epoch)
        return false;
    }
  }
  return true;
}

// Checks whether the current thread has acquired anything
// from other clocks after releasing to dst (directly or indirectly).
bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
  const u64 my_epoch = dst->elem(tid_).epoch;
  return my_epoch <= last_acquire_ ||
      my_epoch <= atomic_load_relaxed(&global_acquire_);
}

// Sets a single element in the vector clock.
// This function is called only from weird places like AcquireGlobal.
void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
  DCHECK_LT(tid, kMaxTid);
  DCHECK_GE(v, clk_[tid]);
  clk_[tid] = v;
  if (nclk_ <= tid)
    nclk_ = tid + 1;
  last_acquire_ = clk_[tid_];
  ResetCached(c);
}

void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
  printf("clock=[");
  for (uptr i = 0; i < nclk_; i++)
    printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
  printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
}

SyncClock::SyncClock() {
  ResetImpl();
}

SyncClock::~SyncClock() {
  // Reset must be called before dtor.
  CHECK_EQ(size_, 0);
  CHECK_EQ(blocks_, 0);
  CHECK_EQ(tab_, 0);
  CHECK_EQ(tab_idx_, 0);
}

void SyncClock::Reset(ClockCache *c) {
  if (size_)
    UnrefClockBlock(c, tab_idx_, blocks_);
  ResetImpl();
}

void SyncClock::ResetImpl() {
  tab_ = 0;
  tab_idx_ = 0;
  size_ = 0;
  blocks_ = 0;
  release_store_tid_ = kInvalidTid;
  release_store_reused_ = 0;
  for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
}

void SyncClock::Resize(ClockCache *c, uptr nclk) {
  Unshare(c);
  if (nclk <= capacity()) {
    // Memory is already allocated, just increase the size.
    size_ = nclk;
    return;
  }
  if (size_ == 0) {
    // Grow from 0 to one-level table.
    CHECK_EQ(size_, 0);
    CHECK_EQ(blocks_, 0);
    CHECK_EQ(tab_, 0);
    CHECK_EQ(tab_idx_, 0);
    tab_idx_ = ctx->clock_alloc.Alloc(c);
    tab_ = ctx->clock_alloc.Map(tab_idx_);
    internal_memset(tab_, 0, sizeof(*tab_));
    atomic_store_relaxed(ref_ptr(tab_), 1);
    size_ = 1;
  } else if (size_ > blocks_ * ClockBlock::kClockCount) {
    u32 idx = ctx->clock_alloc.Alloc(c);
    ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
    uptr top = size_ - blocks_ * ClockBlock::kClockCount;
    CHECK_LT(top, ClockBlock::kClockCount);
    const uptr move = top * sizeof(tab_->clock[0]);
    internal_memcpy(&new_cb->clock[0], tab_->clock, move);
    internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
    internal_memset(tab_->clock, 0, move);
    append_block(idx);
  }
  // At this point we have first level table allocated and all clock elements
  // are evacuated from it to a second level block.
  // Add second level tables as necessary.
  while (nclk > capacity()) {
    u32 idx = ctx->clock_alloc.Alloc(c);
    ClockBlock *cb = ctx->clock_alloc.Map(idx);
    internal_memset(cb, 0, sizeof(*cb));
    append_block(idx);
  }
  size_ = nclk;
}

// Flushes all dirty elements into the main clock array.
void SyncClock::FlushDirty() {
  for (unsigned i = 0; i < kDirtyTids; i++) {
    Dirty *dirty = &dirty_[i];
    if (dirty->tid() != kInvalidTid) {
      CHECK_LT(dirty->tid(), size_);
      elem(dirty->tid()).epoch = dirty->epoch;
      dirty->set_tid(kInvalidTid);
    }
  }
}

bool SyncClock::IsShared() const {
  if (size_ == 0)
    return false;
  atomic_uint32_t *ref = ref_ptr(tab_);
  u32 v = atomic_load(ref, memory_order_acquire);
  CHECK_GT(v, 0);
  return v > 1;
}

// Unshares the current clock if it's shared.
// Shared clocks are immutable, so they need to be unshared before any updates.
// Note: this does not apply to dirty entries as they are not shared.
void SyncClock::Unshare(ClockCache *c) {
  if (!IsShared())
    return;
  // First, copy current state into old.
  SyncClock old;
  old.tab_ = tab_;
  old.tab_idx_ = tab_idx_;
  old.size_ = size_;
  old.blocks_ = blocks_;
  old.release_store_tid_ = release_store_tid_;
  old.release_store_reused_ = release_store_reused_;
  for (unsigned i = 0; i < kDirtyTids; i++)
    old.dirty_[i] = dirty_[i];
  // Then, clear current object.
  ResetImpl();
  // Allocate brand new clock in the current object.
  Resize(c, old.size_);
  // Now copy state back into this object.
  Iter old_iter(&old);
  for (ClockElem &ce : *this) {
    ce = *old_iter;
    ++old_iter;
  }
  release_store_tid_ = old.release_store_tid_;
  release_store_reused_ = old.release_store_reused_;
  for (unsigned i = 0; i < kDirtyTids; i++)
    dirty_[i] = old.dirty_[i];
  // Drop reference to old and delete if necessary.
  old.Reset(c);
}

// Can we cache this clock for future release operations?
ALWAYS_INLINE bool SyncClock::Cachable() const {
  if (size_ == 0)
    return false;
  for (unsigned i = 0; i < kDirtyTids; i++) {
    if (dirty_[i].tid() != kInvalidTid)
      return false;
  }
  return atomic_load_relaxed(ref_ptr(tab_)) == 1;
}

// elem linearizes the two-level structure into linear array.
// Note: this is used only for one time accesses, vector operations use
// the iterator as it is much faster.
ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
  DCHECK_LT(tid, size_);
  const uptr block = tid / ClockBlock::kClockCount;
  DCHECK_LE(block, blocks_);
  tid %= ClockBlock::kClockCount;
  if (block == blocks_)
    return tab_->clock[tid];
  u32 idx = get_block(block);
  ClockBlock *cb = ctx->clock_alloc.Map(idx);
  return cb->clock[tid];
}

ALWAYS_INLINE uptr SyncClock::capacity() const {
  if (size_ == 0)
    return 0;
  uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
  // How many clock elements we can fit into the first level block.
  // +1 for ref counter.
  uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
  return blocks_ * ClockBlock::kClockCount + top;
}

ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
  DCHECK(size_);
  DCHECK_LT(bi, blocks_);
  return tab_->table[ClockBlock::kBlockIdx - bi];
}

ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
  uptr bi = blocks_++;
  CHECK_EQ(get_block(bi), 0);
  tab_->table[ClockBlock::kBlockIdx - bi] = idx;
}

// Used only by tests.
u64 SyncClock::get(unsigned tid) const {
  for (unsigned i = 0; i < kDirtyTids; i++) {
    Dirty dirty = dirty_[i];
    if (dirty.tid() == tid)
      return dirty.epoch;
  }
  return elem(tid).epoch;
}

// Used only by Iter test.
u64 SyncClock::get_clean(unsigned tid) const {
  return elem(tid).epoch;
}

void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
  printf("clock=[");
  for (uptr i = 0; i < size_; i++)
    printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
  printf("] reused=[");
  for (uptr i = 0; i < size_; i++)
    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
  printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
         release_store_tid_, release_store_reused_, dirty_[0].tid(),
         dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
}

void SyncClock::Iter::Next() {
  // Finished with the current block, move on to the next one.
  block_++;
  if (block_ < parent_->blocks_) {
    // Iterate over the next second level block.
    u32 idx = parent_->get_block(block_);
    ClockBlock *cb = ctx->clock_alloc.Map(idx);
    pos_ = &cb->clock[0];
    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
        ClockBlock::kClockCount);
    return;
  }
  if (block_ == parent_->blocks_ &&
      parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
    // Iterate over elements in the first level block.
    pos_ = &parent_->tab_->clock[0];
    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
        ClockBlock::kClockCount);
    return;
  }
  parent_ = nullptr;  // denotes end
}
}  // namespace __tsan
projet 2023-03-06 14:48:14 +01:00			`//===-- tsan_clock.cpp ----------------------------------------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file is a part of ThreadSanitizer (TSan), a race detector.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`#include "tsan_clock.h"`
			`#include "tsan_rtl.h"`
			`#include "sanitizer_common/sanitizer_placement_new.h"`

			`// SyncClock and ThreadClock implement vector clocks for sync variables`
			`// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.`
			`// ThreadClock contains fixed-size vector clock for maximum number of threads.`
			`// SyncClock contains growable vector clock for currently necessary number of`
			`// threads.`
			`// Together they implement very simple model of operations, namely:`
			`//`
			`// void ThreadClock::acquire(const SyncClock *src) {`
			`// for (int i = 0; i < kMaxThreads; i++)`
			`// clock[i] = max(clock[i], src->clock[i]);`
			`// }`
			`//`
			`// void ThreadClock::release(SyncClock *dst) const {`
			`// for (int i = 0; i < kMaxThreads; i++)`
			`// dst->clock[i] = max(dst->clock[i], clock[i]);`
			`// }`
			`//`
			`// void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {`
			`// for (int i = 0; i < kMaxThreads; i++) {`
			`// tmp = clock[i];`
			`// clock[i] = max(clock[i], sc->clock[i]);`
			`// sc->clock[i] = tmp;`
			`// }`
			`// }`
			`//`
			`// void ThreadClock::ReleaseStore(SyncClock *dst) const {`
			`// for (int i = 0; i < kMaxThreads; i++)`
			`// dst->clock[i] = clock[i];`
			`// }`
			`//`
			`// void ThreadClock::acq_rel(SyncClock *dst) {`
			`// acquire(dst);`
			`// release(dst);`
			`// }`
			`//`
			`// Conformance to this model is extensively verified in tsan_clock_test.cpp.`
			`// However, the implementation is significantly more complex. The complexity`
			`// allows to implement important classes of use cases in O(1) instead of O(N).`
			`//`
			`// The use cases are:`
			`// 1. Singleton/once atomic that has a single release-store operation followed`
			`// by zillions of acquire-loads (the acquire-load is O(1)).`
			`// 2. Thread-local mutex (both lock and unlock can be O(1)).`
			`// 3. Leaf mutex (unlock is O(1)).`
			`// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).`
			`// 5. An atomic with a single writer (writes can be O(1)).`
			`// The implementation dynamically adopts to workload. So if an atomic is in`
			`// read-only phase, these reads will be O(1); if it later switches to read/write`
			`// phase, the implementation will correctly handle that by switching to O(N).`
			`//`
			`// Thread-safety note: all const operations on SyncClock's are conducted under`
			`// a shared lock; all non-const operations on SyncClock's are conducted under`
			`// an exclusive lock; ThreadClock's are private to respective threads and so`
			`// do not need any protection.`
			`//`
			`// Description of SyncClock state:`
			`// clk_ - variable size vector clock, low kClkBits hold timestamp,`
			`// the remaining bits hold "acquired" flag (the actual value is thread's`
			`// reused counter);`
			`// if acquired == thr->reused_, then the respective thread has already`
			`// acquired this clock (except possibly for dirty elements).`
			`// dirty_ - holds up to two indices in the vector clock that other threads`
			`// need to acquire regardless of "acquired" flag value;`
			`// release_store_tid_ - denotes that the clock state is a result of`
			`// release-store operation by the thread with release_store_tid_ index.`
			`// release_store_reused_ - reuse count of release_store_tid_.`

			`namespace __tsan {`

			`static atomic_uint32_t ref_ptr(ClockBlock cb) {`
			`return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);`
			`}`

			`// Drop reference to the first level block idx.`
			`static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {`
			`ClockBlock *cb = ctx->clock_alloc.Map(idx);`
			`atomic_uint32_t *ref = ref_ptr(cb);`
			`u32 v = atomic_load(ref, memory_order_acquire);`
			`for (;;) {`
			`CHECK_GT(v, 0);`
			`if (v == 1)`
			`break;`
			`if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))`
			`return;`
			`}`
			`// First level block owns second level blocks, so them as well.`
			`for (uptr i = 0; i < blocks; i++)`
			`ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);`
			`ctx->clock_alloc.Free(c, idx);`
			`}`

			`ThreadClock::ThreadClock(unsigned tid, unsigned reused)`
			`: tid_(tid)`
			`, reused_(reused + 1) // 0 has special meaning`
			`, last_acquire_()`
			`, global_acquire_()`
			`, cached_idx_()`
			`, cached_size_()`
			`, cached_blocks_() {`
			`CHECK_LT(tid, kMaxTidInClock);`
			`CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);`
			`nclk_ = tid_ + 1;`
			`internal_memset(clk_, 0, sizeof(clk_));`
			`}`

			`void ThreadClock::ResetCached(ClockCache *c) {`
			`if (cached_idx_) {`
			`UnrefClockBlock(c, cached_idx_, cached_blocks_);`
			`cached_idx_ = 0;`
			`cached_size_ = 0;`
			`cached_blocks_ = 0;`
			`}`
			`}`

			`void ThreadClock::acquire(ClockCache c, SyncClock src) {`
			`DCHECK_LE(nclk_, kMaxTid);`
			`DCHECK_LE(src->size_, kMaxTid);`

			`// Check if it's empty -> no need to do anything.`
			`const uptr nclk = src->size_;`
			`if (nclk == 0)`
			`return;`

			`bool acquired = false;`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`SyncClock::Dirty dirty = src->dirty_[i];`
			`unsigned tid = dirty.tid();`
			`if (tid != kInvalidTid) {`
			`if (clk_[tid] < dirty.epoch) {`
			`clk_[tid] = dirty.epoch;`
			`acquired = true;`
			`}`
			`}`
			`}`

			`// Check if we've already acquired src after the last release operation on src`
			`if (tid_ >= nclk \|\| src->elem(tid_).reused != reused_) {`
			`// O(N) acquire.`
			`nclk_ = max(nclk_, nclk);`
			`u64 *dst_pos = &clk_[0];`
			`for (ClockElem &src_elem : *src) {`
			`u64 epoch = src_elem.epoch;`
			`if (*dst_pos < epoch) {`
			`*dst_pos = epoch;`
			`acquired = true;`
			`}`
			`dst_pos++;`
			`}`

			`// Remember that this thread has acquired this clock.`
			`if (nclk > tid_)`
			`src->elem(tid_).reused = reused_;`
			`}`

			`if (acquired) {`
			`last_acquire_ = clk_[tid_];`
			`ResetCached(c);`
			`}`
			`}`

			`void ThreadClock::releaseStoreAcquire(ClockCache c, SyncClock sc) {`
			`DCHECK_LE(nclk_, kMaxTid);`
			`DCHECK_LE(sc->size_, kMaxTid);`

			`if (sc->size_ == 0) {`
			`// ReleaseStore will correctly set release_store_tid_,`
			`// which can be important for future operations.`
			`ReleaseStore(c, sc);`
			`return;`
			`}`

			`nclk_ = max(nclk_, (uptr) sc->size_);`

			`// Check if we need to resize sc.`
			`if (sc->size_ < nclk_)`
			`sc->Resize(c, nclk_);`

			`bool acquired = false;`

			`sc->Unshare(c);`
			`// Update sc->clk_.`
			`sc->FlushDirty();`
			`uptr i = 0;`
			`for (ClockElem &ce : *sc) {`
			`u64 tmp = clk_[i];`
			`if (clk_[i] < ce.epoch) {`
			`clk_[i] = ce.epoch;`
			`acquired = true;`
			`}`
			`ce.epoch = tmp;`
			`ce.reused = 0;`
			`i++;`
			`}`
			`sc->release_store_tid_ = kInvalidTid;`
			`sc->release_store_reused_ = 0;`

			`if (acquired) {`
			`last_acquire_ = clk_[tid_];`
			`ResetCached(c);`
			`}`
			`}`

			`void ThreadClock::release(ClockCache c, SyncClock dst) {`
			`DCHECK_LE(nclk_, kMaxTid);`
			`DCHECK_LE(dst->size_, kMaxTid);`

			`if (dst->size_ == 0) {`
			`// ReleaseStore will correctly set release_store_tid_,`
			`// which can be important for future operations.`
			`ReleaseStore(c, dst);`
			`return;`
			`}`

			`// Check if we need to resize dst.`
			`if (dst->size_ < nclk_)`
			`dst->Resize(c, nclk_);`

			`// Check if we had not acquired anything from other threads`
			`// since the last release on dst. If so, we need to update`
			`// only dst->elem(tid_).`
			`if (!HasAcquiredAfterRelease(dst)) {`
			`UpdateCurrentThread(c, dst);`
			`if (dst->release_store_tid_ != tid_ \|\|`
			`dst->release_store_reused_ != reused_)`
			`dst->release_store_tid_ = kInvalidTid;`
			`return;`
			`}`

			`// O(N) release.`
			`dst->Unshare(c);`
			`// First, remember whether we've acquired dst.`
			`bool acquired = IsAlreadyAcquired(dst);`
			`// Update dst->clk_.`
			`dst->FlushDirty();`
			`uptr i = 0;`
			`for (ClockElem &ce : *dst) {`
			`ce.epoch = max(ce.epoch, clk_[i]);`
			`ce.reused = 0;`
			`i++;`
			`}`
			`// Clear 'acquired' flag in the remaining elements.`
			`dst->release_store_tid_ = kInvalidTid;`
			`dst->release_store_reused_ = 0;`
			`// If we've acquired dst, remember this fact,`
			`// so that we don't need to acquire it on next acquire.`
			`if (acquired)`
			`dst->elem(tid_).reused = reused_;`
			`}`

			`void ThreadClock::ReleaseStore(ClockCache c, SyncClock dst) {`
			`DCHECK_LE(nclk_, kMaxTid);`
			`DCHECK_LE(dst->size_, kMaxTid);`

			`if (dst->size_ == 0 && cached_idx_ != 0) {`
			`// Reuse the cached clock.`
			`// Note: we could reuse/cache the cached clock in more cases:`
			`// we could update the existing clock and cache it, or replace it with the`
			`// currently cached clock and release the old one. And for a shared`
			`// existing clock, we could replace it with the currently cached;`
			`// or unshare, update and cache. But, for simplicity, we currently reuse`
			`// cached clock only when the target clock is empty.`
			`dst->tab_ = ctx->clock_alloc.Map(cached_idx_);`
			`dst->tab_idx_ = cached_idx_;`
			`dst->size_ = cached_size_;`
			`dst->blocks_ = cached_blocks_;`
			`CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);`
			`// The cached clock is shared (immutable),`
			`// so this is where we store the current clock.`
			`dst->dirty_[0].set_tid(tid_);`
			`dst->dirty_[0].epoch = clk_[tid_];`
			`dst->release_store_tid_ = tid_;`
			`dst->release_store_reused_ = reused_;`
			`// Remember that we don't need to acquire it in future.`
			`dst->elem(tid_).reused = reused_;`
			`// Grab a reference.`
			`atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);`
			`return;`
			`}`

			`// Check if we need to resize dst.`
			`if (dst->size_ < nclk_)`
			`dst->Resize(c, nclk_);`

			`if (dst->release_store_tid_ == tid_ &&`
			`dst->release_store_reused_ == reused_ &&`
			`!HasAcquiredAfterRelease(dst)) {`
			`UpdateCurrentThread(c, dst);`
			`return;`
			`}`

			`// O(N) release-store.`
			`dst->Unshare(c);`
			`// Note: dst can be larger than this ThreadClock.`
			`// This is fine since clk_ beyond size is all zeros.`
			`uptr i = 0;`
			`for (ClockElem &ce : *dst) {`
			`ce.epoch = clk_[i];`
			`ce.reused = 0;`
			`i++;`
			`}`
			`for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);`
			`dst->release_store_tid_ = tid_;`
			`dst->release_store_reused_ = reused_;`
			`// Remember that we don't need to acquire it in future.`
			`dst->elem(tid_).reused = reused_;`

			`// If the resulting clock is cachable, cache it for future release operations.`
			`// The clock is always cachable if we released to an empty sync object.`
			`if (cached_idx_ == 0 && dst->Cachable()) {`
			`// Grab a reference to the ClockBlock.`
			`atomic_uint32_t *ref = ref_ptr(dst->tab_);`
			`if (atomic_load(ref, memory_order_acquire) == 1)`
			`atomic_store_relaxed(ref, 2);`
			`else`
			`atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);`
			`cached_idx_ = dst->tab_idx_;`
			`cached_size_ = dst->size_;`
			`cached_blocks_ = dst->blocks_;`
			`}`
			`}`

			`void ThreadClock::acq_rel(ClockCache c, SyncClock dst) {`
			`acquire(c, dst);`
			`ReleaseStore(c, dst);`
			`}`

			`// Updates only single element related to the current thread in dst->clk_.`
			`void ThreadClock::UpdateCurrentThread(ClockCache c, SyncClock dst) const {`
			`// Update the threads time, but preserve 'acquired' flag.`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`SyncClock::Dirty *dirty = &dst->dirty_[i];`
			`const unsigned tid = dirty->tid();`
			`if (tid == tid_ \|\| tid == kInvalidTid) {`
			`dirty->set_tid(tid_);`
			`dirty->epoch = clk_[tid_];`
			`return;`
			`}`
			`}`
			`// Reset all 'acquired' flags, O(N).`
			`// We are going to touch dst elements, so we need to unshare it.`
			`dst->Unshare(c);`
			`dst->elem(tid_).epoch = clk_[tid_];`
			`for (uptr i = 0; i < dst->size_; i++)`
			`dst->elem(i).reused = 0;`
			`dst->FlushDirty();`
			`}`

			`// Checks whether the current thread has already acquired src.`
			`bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {`
			`if (src->elem(tid_).reused != reused_)`
			`return false;`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`SyncClock::Dirty dirty = src->dirty_[i];`
			`if (dirty.tid() != kInvalidTid) {`
			`if (clk_[dirty.tid()] < dirty.epoch)`
			`return false;`
			`}`
			`}`
			`return true;`
			`}`

			`// Checks whether the current thread has acquired anything`
			`// from other clocks after releasing to dst (directly or indirectly).`
			`bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {`
			`const u64 my_epoch = dst->elem(tid_).epoch;`
			`return my_epoch <= last_acquire_ \|\|`
			`my_epoch <= atomic_load_relaxed(&global_acquire_);`
			`}`

			`// Sets a single element in the vector clock.`
			`// This function is called only from weird places like AcquireGlobal.`
			`void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {`
			`DCHECK_LT(tid, kMaxTid);`
			`DCHECK_GE(v, clk_[tid]);`
			`clk_[tid] = v;`
			`if (nclk_ <= tid)`
			`nclk_ = tid + 1;`
			`last_acquire_ = clk_[tid_];`
			`ResetCached(c);`
			`}`

			`void ThreadClock::DebugDump(int(printf)(const char s, ...)) {`
			`printf("clock=[");`
			`for (uptr i = 0; i < nclk_; i++)`
			`printf("%s%llu", i == 0 ? "" : ",", clk_[i]);`
			`printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);`
			`}`

			`SyncClock::SyncClock() {`
			`ResetImpl();`
			`}`

			`SyncClock::~SyncClock() {`
			`// Reset must be called before dtor.`
			`CHECK_EQ(size_, 0);`
			`CHECK_EQ(blocks_, 0);`
			`CHECK_EQ(tab_, 0);`
			`CHECK_EQ(tab_idx_, 0);`
			`}`

			`void SyncClock::Reset(ClockCache *c) {`
			`if (size_)`
			`UnrefClockBlock(c, tab_idx_, blocks_);`
			`ResetImpl();`
			`}`

			`void SyncClock::ResetImpl() {`
			`tab_ = 0;`
			`tab_idx_ = 0;`
			`size_ = 0;`
			`blocks_ = 0;`
			`release_store_tid_ = kInvalidTid;`
			`release_store_reused_ = 0;`
			`for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);`
			`}`

			`void SyncClock::Resize(ClockCache *c, uptr nclk) {`
			`Unshare(c);`
			`if (nclk <= capacity()) {`
			`// Memory is already allocated, just increase the size.`
			`size_ = nclk;`
			`return;`
			`}`
			`if (size_ == 0) {`
			`// Grow from 0 to one-level table.`
			`CHECK_EQ(size_, 0);`
			`CHECK_EQ(blocks_, 0);`
			`CHECK_EQ(tab_, 0);`
			`CHECK_EQ(tab_idx_, 0);`
			`tab_idx_ = ctx->clock_alloc.Alloc(c);`
			`tab_ = ctx->clock_alloc.Map(tab_idx_);`
			`internal_memset(tab_, 0, sizeof(*tab_));`
			`atomic_store_relaxed(ref_ptr(tab_), 1);`
			`size_ = 1;`
			`} else if (size_ > blocks_ * ClockBlock::kClockCount) {`
			`u32 idx = ctx->clock_alloc.Alloc(c);`
			`ClockBlock *new_cb = ctx->clock_alloc.Map(idx);`
			`uptr top = size_ - blocks_ * ClockBlock::kClockCount;`
			`CHECK_LT(top, ClockBlock::kClockCount);`
			`const uptr move = top * sizeof(tab_->clock[0]);`
			`internal_memcpy(&new_cb->clock[0], tab_->clock, move);`
			`internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);`
			`internal_memset(tab_->clock, 0, move);`
			`append_block(idx);`
			`}`
			`// At this point we have first level table allocated and all clock elements`
			`// are evacuated from it to a second level block.`
			`// Add second level tables as necessary.`
			`while (nclk > capacity()) {`
			`u32 idx = ctx->clock_alloc.Alloc(c);`
			`ClockBlock *cb = ctx->clock_alloc.Map(idx);`
			`internal_memset(cb, 0, sizeof(*cb));`
			`append_block(idx);`
			`}`
			`size_ = nclk;`
			`}`

			`// Flushes all dirty elements into the main clock array.`
			`void SyncClock::FlushDirty() {`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`Dirty *dirty = &dirty_[i];`
			`if (dirty->tid() != kInvalidTid) {`
			`CHECK_LT(dirty->tid(), size_);`
			`elem(dirty->tid()).epoch = dirty->epoch;`
			`dirty->set_tid(kInvalidTid);`
			`}`
			`}`
			`}`

			`bool SyncClock::IsShared() const {`
			`if (size_ == 0)`
			`return false;`
			`atomic_uint32_t *ref = ref_ptr(tab_);`
			`u32 v = atomic_load(ref, memory_order_acquire);`
			`CHECK_GT(v, 0);`
			`return v > 1;`
			`}`

			`// Unshares the current clock if it's shared.`
			`// Shared clocks are immutable, so they need to be unshared before any updates.`
			`// Note: this does not apply to dirty entries as they are not shared.`
			`void SyncClock::Unshare(ClockCache *c) {`
			`if (!IsShared())`
			`return;`
			`// First, copy current state into old.`
			`SyncClock old;`
			`old.tab_ = tab_;`
			`old.tab_idx_ = tab_idx_;`
			`old.size_ = size_;`
			`old.blocks_ = blocks_;`
			`old.release_store_tid_ = release_store_tid_;`
			`old.release_store_reused_ = release_store_reused_;`
			`for (unsigned i = 0; i < kDirtyTids; i++)`
			`old.dirty_[i] = dirty_[i];`
			`// Then, clear current object.`
			`ResetImpl();`
			`// Allocate brand new clock in the current object.`
			`Resize(c, old.size_);`
			`// Now copy state back into this object.`
			`Iter old_iter(&old);`
			`for (ClockElem &ce : *this) {`
			`ce = *old_iter;`
			`++old_iter;`
			`}`
			`release_store_tid_ = old.release_store_tid_;`
			`release_store_reused_ = old.release_store_reused_;`
			`for (unsigned i = 0; i < kDirtyTids; i++)`
			`dirty_[i] = old.dirty_[i];`
			`// Drop reference to old and delete if necessary.`
			`old.Reset(c);`
			`}`

			`// Can we cache this clock for future release operations?`
			`ALWAYS_INLINE bool SyncClock::Cachable() const {`
			`if (size_ == 0)`
			`return false;`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`if (dirty_[i].tid() != kInvalidTid)`
			`return false;`
			`}`
			`return atomic_load_relaxed(ref_ptr(tab_)) == 1;`
			`}`

			`// elem linearizes the two-level structure into linear array.`
			`// Note: this is used only for one time accesses, vector operations use`
			`// the iterator as it is much faster.`
			`ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {`
			`DCHECK_LT(tid, size_);`
			`const uptr block = tid / ClockBlock::kClockCount;`
			`DCHECK_LE(block, blocks_);`
			`tid %= ClockBlock::kClockCount;`
			`if (block == blocks_)`
			`return tab_->clock[tid];`
			`u32 idx = get_block(block);`
			`ClockBlock *cb = ctx->clock_alloc.Map(idx);`
			`return cb->clock[tid];`
			`}`

			`ALWAYS_INLINE uptr SyncClock::capacity() const {`
			`if (size_ == 0)`
			`return 0;`
			`uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);`
			`// How many clock elements we can fit into the first level block.`
			`// +1 for ref counter.`
			`uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;`
			`return blocks_ * ClockBlock::kClockCount + top;`
			`}`

			`ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {`
			`DCHECK(size_);`
			`DCHECK_LT(bi, blocks_);`
			`return tab_->table[ClockBlock::kBlockIdx - bi];`
			`}`

			`ALWAYS_INLINE void SyncClock::append_block(u32 idx) {`
			`uptr bi = blocks_++;`
			`CHECK_EQ(get_block(bi), 0);`
			`tab_->table[ClockBlock::kBlockIdx - bi] = idx;`
			`}`

			`// Used only by tests.`
			`u64 SyncClock::get(unsigned tid) const {`
			`for (unsigned i = 0; i < kDirtyTids; i++) {`
			`Dirty dirty = dirty_[i];`
			`if (dirty.tid() == tid)`
			`return dirty.epoch;`
			`}`
			`return elem(tid).epoch;`
			`}`

			`// Used only by Iter test.`
			`u64 SyncClock::get_clean(unsigned tid) const {`
			`return elem(tid).epoch;`
			`}`

			`void SyncClock::DebugDump(int(printf)(const char s, ...)) {`
			`printf("clock=[");`
			`for (uptr i = 0; i < size_; i++)`
			`printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);`
			`printf("] reused=[");`
			`for (uptr i = 0; i < size_; i++)`
			`printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);`
			`printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",`
			`release_store_tid_, release_store_reused_, dirty_[0].tid(),`
			`dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);`
			`}`

			`void SyncClock::Iter::Next() {`
			`// Finished with the current block, move on to the next one.`
			`block_++;`
			`if (block_ < parent_->blocks_) {`
			`// Iterate over the next second level block.`
			`u32 idx = parent_->get_block(block_);`
			`ClockBlock *cb = ctx->clock_alloc.Map(idx);`
			`pos_ = &cb->clock[0];`
			`end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,`
			`ClockBlock::kClockCount);`
			`return;`
			`}`
			`if (block_ == parent_->blocks_ &&`
			`parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {`
			`// Iterate over elements in the first level block.`
			`pos_ = &parent_->tab_->clock[0];`
			`end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,`
			`ClockBlock::kClockCount);`
			`return;`
			`}`
			`parent_ = nullptr; // denotes end`
			`}`
			`} // namespace __tsan`