31#ifndef TSAN_ANNOTATIONS
32#define TSAN_ANNOTATIONS 0
37const unsigned __tsan_mutex_linker_init = 1 << 0;
38void __tsan_mutex_pre_lock(
void *addr,
unsigned flags);
39void __tsan_mutex_post_lock(
void *addr,
unsigned flags,
int recursion);
40int __tsan_mutex_pre_unlock(
void *addr,
unsigned flags);
41void __tsan_mutex_post_unlock(
void *addr,
unsigned flags);
42void __tsan_mutex_pre_signal(
void *addr,
unsigned flags);
43void __tsan_mutex_post_signal(
void *addr,
unsigned flags);
51namespace Synchronization {
57 __tsan_mutex_pre_lock(mutex, __tsan_mutex_linker_init);
61 __tsan_mutex_post_lock(mutex, __tsan_mutex_linker_init, 1);
65 (void)__tsan_mutex_pre_unlock(mutex, __tsan_mutex_linker_init);
68 __tsan_mutex_post_unlock(mutex, __tsan_mutex_linker_init);
71 __tsan_mutex_pre_signal(cond, 0);
74 __tsan_mutex_post_signal(cond, 0);
92ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
93 return __sync_and_and_fetch(addr, val);
97ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
98 return __sync_fetch_and_add(addr, val);
102ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, T *expected, T *desired) {
103 T oldval = *expected;
104 T gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
106 return oldval == gotval;
109ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
110 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
113ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
114 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
118ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
119 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
122ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
123 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
126ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
127 return cas_strong_sequentially_consistent_helper(addr, expected, desired);
130ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
131 return __sync_fetch_and_and(addr, val);
141 __sync_synchronize();
145ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
146 return __sync_or_and_fetch(addr, val);
149ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
156 __sync_synchronize();
160 __sync_synchronize();
165ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
166 return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
170ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
171 return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
174ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
175 return __atomic_compare_exchange(addr, expected, desired,
false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
179ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
180 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
183ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
184 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
187ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
188 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
191ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192 return __atomic_compare_exchange(addr, expected, desired,
true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
195ALWAYS_INLINE uintptr_t atomic_fetch_and_release(uintptr_t *addr, uintptr_t val) {
196 return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
201 __atomic_load(addr, val, __ATOMIC_RELAXED);
206 __atomic_load(addr, val, __ATOMIC_ACQUIRE);
209ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
210 return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
213ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
214 __atomic_store(addr, val, __ATOMIC_RELAXED);
219 __atomic_store(addr, val, __ATOMIC_RELEASE);
223 __atomic_thread_fence(__ATOMIC_ACQUIRE);
236 if (spin_count > 0) {
239 return spin_count > 0;
248static constexpr uint8_t lock_bit = 0x01;
249static constexpr uint8_t queue_lock_bit = 0x02;
250static constexpr uint8_t parked_bit = 0x02;
284 if_tsan_pre_lock(
this);
286 uintptr_t expected = 0;
287 uintptr_t desired = lock_bit;
289 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
293 if_tsan_post_lock(
this);
297 if_tsan_pre_unlock(
this);
299 uintptr_t val = atomic_fetch_and_release(&state, ~(uintptr_t)lock_bit);
302 bool no_thread_queuing = (val & queue_lock_bit) == 0;
304 bool some_queued = (val & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0;
305 if (no_thread_queuing && some_queued) {
309 if_tsan_post_unlock(
this);
313WEAK void word_lock::lock_full() {
314 spin_control spinner;
316 atomic_load_relaxed(&state, &expected);
319 if (!(expected & lock_bit)) {
320 uintptr_t desired = expected | lock_bit;
322 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
328 if (((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) != 0) && spinner.should_spin()) {
330 atomic_load_relaxed(&state, &expected);
334 word_lock_queue_data node;
336 node.parker.prepare_park();
339 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
340 if (head ==
nullptr) {
351 uintptr_t desired = ((uintptr_t)&node) | (expected & (queue_lock_bit | lock_bit));
352 if (atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
355 atomic_load_relaxed(&state, &expected);
360WEAK void word_lock::unlock_full() {
362 atomic_load_relaxed(&state, &expected);
367 bool thread_queuing = (expected & queue_lock_bit);
369 bool none_queued = (expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0;
370 if (thread_queuing || none_queued) {
374 uintptr_t desired = expected | queue_lock_bit;
375 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
381 word_lock_queue_data *head = (word_lock_queue_data *)(expected & ~(uintptr_t)(queue_lock_bit | lock_bit));
382 word_lock_queue_data *current = head;
383 word_lock_queue_data *tail = current->tail;
384 while (tail ==
nullptr) {
385 word_lock_queue_data *next = current->next;
387 next->prev = current;
389 tail = current->tail;
395 if (expected & lock_bit) {
396 uintptr_t desired = expected & ~(uintptr_t)queue_lock_bit;
397 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
400 atomic_thread_fence_acquire();
404 word_lock_queue_data *new_tail = tail->prev;
405 if (new_tail ==
nullptr) {
406 bool continue_outer =
false;
407 while (!continue_outer) {
408 uintptr_t desired = expected & lock_bit;
409 if (atomic_cas_weak_relacq_relaxed(&state, &expected, &desired)) {
412 if ((expected & ~(uintptr_t)(queue_lock_bit | lock_bit)) == 0) {
415 atomic_thread_fence_acquire();
416 continue_outer =
true;
420 if (continue_outer) {
424 head->tail = new_tail;
425 atomic_and_fetch_release(&state, ~(uintptr_t)queue_lock_bit);
431 tail->parker.unpark_start();
432 tail->parker.unpark();
433 tail->parker.unpark_finish();
466WEAK void dump_hash() {
470 while (head !=
nullptr) {
471 print(
nullptr) <<
"Bucket index " << i <<
" addr " << (
void *)head->
sleep_address <<
"\n";
482 if (
sizeof(uintptr_t) >= 8) {
483 return (addr * (uintptr_t)0x9E3779B97F4A7C15) >> (64 -
HASH_TABLE_BITS);
490 uintptr_t hash = addr_hash(addr);
513 uintptr_t hash_from = addr_hash(addr_from);
514 uintptr_t hash_to = addr_hash(addr_to);
521 if (hash_from == hash_to) {
525 }
else if (hash_from < hash_to) {
545 if (&buckets.
from == &buckets.
to) {
547 }
else if (&buckets.
from > &buckets.
to) {
562 uintptr_t
park(uintptr_t addr);
564 int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info);
573 virtual uintptr_t
unpark(
int unparked,
bool more_waiters) {
596 if (bucket.
head !=
nullptr) {
619 while (data !=
nullptr) {
622 if (cur_addr == addr) {
624 *data_location = next;
626 bool more_waiters =
false;
628 if (bucket.
tail == data) {
632 while (data2 !=
nullptr && !more_waiters) {
635 more_waiters = (cur_addr2 == addr);
642 data->
parker.unpark_start();
645 data->
parker.unpark_finish();
648 return more_waiters ? 1 : 0;
650 data_location = &data->
next;
680 while (data !=
nullptr) {
685 if (cur_addr == addr_from) {
686 *data_location = next;
695 if (requeue ==
nullptr) {
698 requeue_tail->
next = data;
707 data_location = &data->
next;
713 if (requeue !=
nullptr) {
714 requeue_tail->
next =
nullptr;
715 if (buckets.
to.
head ==
nullptr) {
716 buckets.
to.
head = requeue;
720 buckets.
to.
tail = requeue_tail;
725 if (wakeup !=
nullptr) {
727 wakeup->
parker.unpark_start();
730 wakeup->
parker.unpark_finish();
735 return wakeup !=
nullptr && action.
unpark_one;
749 return result == (lock_bit | parked_bit);
752 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
754 uintptr_t return_state = more_waiters ? parked_bit : 0;
755 atomic_store_release(
lock_state, &return_state);
767 atomic_load_relaxed(&state, &expected);
770 if (!(expected & lock_bit)) {
771 uintptr_t desired = expected | lock_bit;
772 if (atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
783 atomic_load_relaxed(&state, &expected);
788 if ((expected & parked_bit) == 0) {
789 uintptr_t desired = expected | parked_bit;
790 if (!atomic_cas_weak_relaxed_relaxed(&state, &expected, &desired)) {
797 uintptr_t result = control.
park((uintptr_t)
this);
798 if (result == (uintptr_t)
this) {
803 atomic_load_relaxed(&state, &expected);
808 uintptr_t expected = lock_bit;
809 uintptr_t desired = 0;
812 if (atomic_cas_strong_release_relaxed(&state, &expected, &desired)) {
822 uintptr_t expected = 0;
823 uintptr_t desired = lock_bit;
825 if (!atomic_cas_weak_acquire_relaxed(&state, &expected, &desired)) {
831 uintptr_t expected = lock_bit;
832 uintptr_t desired = 0;
834 if (!atomic_cas_weak_release_relaxed(&state, &expected, &desired)) {
841 atomic_load_relaxed(&state, &val);
843 if (!(val & lock_bit)) {
847 uintptr_t desired = val | parked_bit;
848 if (atomic_cas_weak_relaxed_relaxed(&state, &val, &desired)) {
855 atomic_or_fetch_relaxed(&state, parked_bit);
868 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
875 return (uintptr_t)
mutex;
897 if (val != (uintptr_t)
mutex) {
908 if (action.unpark_one && some_requeued) {
928 val = (uintptr_t)
mutex;
930 }
else if (val != (uintptr_t)
mutex) {
932 action.invalid_unpark_info = (uintptr_t)
mutex;
943 uintptr_t
unpark(
int unparked,
bool more_waiters)
final {
957 if_tsan_pre_signal(
this);
960 atomic_load_relaxed(&state, &val);
962 if_tsan_post_signal(
this);
967 if_tsan_post_signal(
this);
971 if_tsan_pre_signal(
this);
973 atomic_load_relaxed(&state, &val);
975 if_tsan_post_signal(
this);
980 if_tsan_post_signal(
this);
985 uintptr_t result = control.
park((uintptr_t)
this);
986 if (result != (uintptr_t)mutex) {
989 if_tsan_pre_lock(mutex);
993 atomic_load_relaxed((uintptr_t *)mutex, &val);
996 if_tsan_post_lock(mutex);
1038 fast_cond->
wait(fast_mutex);
1051 if (array ==
nullptr) {
1057 if (array->
array ==
nullptr) {
This file declares the routines used by Halide internally in its runtime.
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
void halide_free(void *user_context, void *ptr)
ALWAYS_INLINE void signal()
ALWAYS_INLINE void broadcast()
ALWAYS_INLINE void wait(fast_mutex *mutex)
ALWAYS_INLINE void make_parked()
ALWAYS_INLINE void lock()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE bool make_parked_if_locked()
ALWAYS_INLINE bool should_spin()
ALWAYS_INLINE void reset()
ALWAYS_INLINE void unlock()
ALWAYS_INLINE void lock()
WEAK hash_bucket & lock_bucket(uintptr_t addr)
WEAK void unlock_bucket_pair(bucket_pair &buckets)
constexpr int HASH_TABLE_SIZE
constexpr int HASH_TABLE_BITS
WEAK bucket_pair lock_bucket_pair(uintptr_t addr_from, uintptr_t addr_to)
constexpr int LOAD_FACTOR
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
#define halide_debug_assert(user_context, cond)
halide_debug_assert() is like halide_assert(), but only expands into a check when DEBUG_RUNTIME is de...
unsigned __INT8_TYPE__ uint8_t
void halide_thread_yield()
void * memset(void *s, int val, size_t n)
#define halide_abort_if_false(user_context, cond)
uintptr_t *const cond_state
ALWAYS_INLINE broadcast_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
bool validate(validate_action &action) final
void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued) final
ALWAYS_INLINE bucket_pair(hash_bucket &from, hash_bucket &to)
hash_bucket buckets[HASH_TABLE_SIZE]
uintptr_t *const lock_state
bool validate(validate_action &action) final
uintptr_t unpark(int unparked, bool more_waiters) final
ALWAYS_INLINE mutex_parking_control(uintptr_t *lock_state)
virtual uintptr_t unpark(int unparked, bool more_waiters)
virtual void requeue_callback(const validate_action &action, bool one_to_wake, bool some_requeued)
uintptr_t park(uintptr_t addr)
int unpark_requeue(uintptr_t addr_from, uintptr_t addr_to, uintptr_t unpark_info)
virtual void before_sleep()
uintptr_t unpark_one(uintptr_t addr)
virtual bool validate(validate_action &action)
ALWAYS_INLINE signal_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
uintptr_t unpark(int unparked, bool more_waiters) final
uintptr_t *const cond_state
uintptr_t invalid_unpark_info
uintptr_t unpark(int unparked, bool more_waiters) final
ALWAYS_INLINE wait_parking_control(uintptr_t *cond_state, fast_mutex *mutex)
uintptr_t *const cond_state
void before_sleep() final
bool validate(validate_action &action) final
word_lock_queue_data * prev
word_lock_queue_data * tail
word_lock_queue_data * next
Cross platform condition variable.
struct halide_mutex * array
WEAK int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_array_destroy(void *user_context, void *array)
WEAK void halide_mutex_unlock(halide_mutex *mutex)
WEAK void halide_cond_signal(struct halide_cond *cond)
WEAK int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
WEAK void halide_mutex_lock(halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
WEAK void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
WEAK void halide_cond_broadcast(struct halide_cond *cond)
WEAK halide_mutex_array * halide_mutex_array_create(int sz)