1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <trace-engine/handler.h>
6 
7 #include <atomic>
8 #include <stdio.h>
9 #include <string.h>
10 #include <utility>
11 
12 #include <zircon/assert.h>
13 
14 #include <fbl/auto_lock.h>
15 #include <fbl/mutex.h>
16 #include <fbl/vector.h>
17 #include <lib/async/cpp/task.h>
18 #include <lib/async/cpp/wait.h>
19 #include <lib/zx/event.h>
20 #include <trace-engine/instrumentation.h>
21 
22 #include "context_impl.h"
23 
24 namespace {
25 
26 // Amount of time to allow for other threads to release their references
27 // to the trace buffer during shutdown.  See point of use for details.
28 constexpr zx::duration kSynchronousShutdownTimeout = zx::msec(1000);
29 
30 // Trace engine lock.
31 // See rules below for how this is used.
32 fbl::Mutex g_engine_mutex;
33 
34 // Trace instrumentation state.
35 // Rules:
36 //   - can only be modified while holding g_engine_mutex
37 //   - can be read atomically at any time
38 std::atomic<int> g_state{TRACE_STOPPED};
39 
40 // Trace disposition.
41 // This is the status that will be reported to the trace handler when the
42 // trace finishes.
43 // Rules:
44 //   - can only be accessed or modified while holding g_engine_mutex
__TA_GUARDED(g_engine_mutex)45 zx_status_t g_disposition __TA_GUARDED(g_engine_mutex) {ZX_OK};
46 
47 // Trace asynchronous dispatcher.
48 // Rules:
49 //   - can only be modified while holding g_engine_mutex and engine is stopped
50 //   - can be read outside the lock only while the engine is not stopped
51 async_dispatcher_t* g_dispatcher{nullptr};
52 
53 // Trace handler.
54 // Rules:
55 //   - can only be modified while holding g_engine_mutex and engine is stopped
56 //   - can be read outside the lock only while the engine is not stopped
57 trace_handler_t* g_handler{nullptr};
58 
59 // Trace observer table.
60 // Rules:
61 //   - can only be accessed or modified while holding g_engine_mutex
62 struct Observer {
63     // The event handle that we notify the observer through.
64     zx_handle_t event;
65     // Set to true when the engine starts to indicate we're waiting for this
66     // observer to call us back, via |trace_notify_observer_updated()|, that
67     // it has started. When it does call us back this is set back to false.
68     bool awaiting_update_after_start;
69 };
70 fbl::Vector<Observer> g_observers __TA_GUARDED(g_engine_mutex);
71 
72 // Trace context reference count.
73 // This functions as a non-exclusive lock for the engine's trace context.
74 // Rules:
75 //   - acquiring a reference acts as an ACQUIRE fence
76 //   - releasing a reference acts as a RELEASE fence
77 //   - always 0 when engine stopped
78 //   - transition from 0 to non-zero only happens when engine is started
79 //   - the engine stops when the reference count goes to 0
80 //     (in other words, holding a context reference prevents the engine from stopping)
81 //
82 // There are two separate counters here that collectively provide the full
83 // count: buffer acquisitions and prolonged acquisitions. Buffer acquisitions
84 // are for the purpose of writing to the trace buffer. Prolonged acquisitions
85 // are for things like adhoc trace providers where they want to maintain a
86 // reference to the context for the duration of the trace.
87 // Buffer acquisitions increment/decrement the count by
88 // |kBufferCounterIncrement|. Prolonged acquisitions increment/decrement the
89 // count by |kProlongedCounterIncrement|.
90 // To maintain the property that the full count only transitions from 0 to 1
91 // when the engine is started |kProlongedCounterIncrement| == 1.
92 std::atomic_uint32_t g_context_refs{0u};
93 
94 // The uint32_t context ref count is split this way:
95 // |31 ... 8| = buffer acquisition count
96 // |7 ... 0| = prolonged acquisition count
97 // There are generally only a handful of prolonged acquisitions. The code will
98 // assert-fail if there are more. This allows for 2^24 buffer acquisitions
99 // which is basically 2^24 threads. The values are also chosen so that the
100 // full count is easily interpreted when printed in hex.
101 constexpr uint32_t kProlongedCounterShift = 0;
102 constexpr uint32_t kProlongedCounterIncrement = 1 << kProlongedCounterShift;
103 constexpr uint32_t kMaxProlongedCounter = 127;
104 constexpr uint32_t kProlongedCounterMask = 0xff;
105 constexpr uint32_t kBufferCounterShift = 8;
106 constexpr uint32_t kBufferCounterIncrement = 1 << kBufferCounterShift;
107 constexpr uint32_t kBufferCounterMask = 0xffffff00;
108 
109 // Trace context.
110 // Rules:
111 //   - can only be modified while holding g_engine_mutex and engine is stopped
112 //   - can be accessed outside the lock while holding a context reference
113 trace_context_t* g_context{nullptr};
114 
115 // Event for tracking:
116 // - when all observers has started
117 //   (SIGNAL_ALL_OBSERVERS_STARTED)
118 // - when the trace context reference count has dropped to zero
119 //   (SIGNAL_CONTEXT_RELEASED)
120 // Rules:
121 //   - can only be modified while holding g_engine_mutex and engine is stopped
122 //   - can be read outside the lock while the engine is not stopped
123 zx::event g_event;
124 constexpr zx_signals_t SIGNAL_ALL_OBSERVERS_STARTED = ZX_USER_SIGNAL_0;
125 constexpr zx_signals_t SIGNAL_CONTEXT_RELEASED = ZX_USER_SIGNAL_1;
126 
127 // Asynchronous operations posted to the asynchronous dispatcher while the
128 // engine is running.  Use of these structures is guarded by the engine lock.
129 async_wait_t g_event_wait;
130 
get_prolonged_context_refs(uint32_t raw)131 inline uint32_t get_prolonged_context_refs(uint32_t raw) {
132     return (raw & kProlongedCounterMask) >> kProlongedCounterShift;
133 }
134 
get_buffer_context_refs(uint32_t raw)135 inline uint32_t get_buffer_context_refs(uint32_t raw) {
136     return (raw & kBufferCounterMask) >> kBufferCounterShift;
137 }
138 
139 void handle_event(async_dispatcher_t* dispatcher, async_wait_t* wait,
140                   zx_status_t status, const zx_packet_signal_t* signal);
141 
142 // must hold g_engine_mutex
update_disposition_locked(zx_status_t disposition)143 inline void update_disposition_locked(zx_status_t disposition) __TA_REQUIRES(g_engine_mutex) {
144     if (g_disposition == ZX_OK)
145         g_disposition = disposition;
146 }
147 
notify_observers_locked()148 void notify_observers_locked() __TA_REQUIRES(g_engine_mutex) {
149     for (auto& observer : g_observers) {
150         zx_status_t status = zx_object_signal(observer.event, 0u, ZX_EVENT_SIGNALED);
151         ZX_DEBUG_ASSERT(status == ZX_OK);
152     }
153 }
154 
notify_engine_all_observers_started_if_needed_locked()155 void notify_engine_all_observers_started_if_needed_locked() __TA_REQUIRES(g_engine_mutex) {
156     for (auto& item : g_observers) {
157         if (item.awaiting_update_after_start)
158             return;
159     }
160     g_event.signal(0u, SIGNAL_ALL_OBSERVERS_STARTED);
161 }
162 
163 // Table of per-call-site cached category enabled/disabled flags.
164 // This is done by chaining all the
165 // |trace_acquire_context_for_category_cached()| call sites at runtime,
166 // and recording with the pointer the enabled/disabled flag.
167 //
168 // Operation:
169 // 1. When tracing starts each value is zero (kSiteStateUnknown).
170 //    The value is generally a static local at the call site.
171 //    Note that while tracing was off various call sites may have been cached,
172 //    they are all reset to zero.
173 // 2. When a TRACE_*() macro is called, it calls
174 //    trace_acquire_context_for_category_cached().
175 // 3. If the DISABLED bit is set, skip, we're done.
176 // 4. Call trace_acquire_context_for_category()
177 // 5. If the ENABLED bit is set, return, we're done.
178 // 6. Insert the call site to the head of the chain with the
179 //    enabled/disabled bits set appropriately.
180 // 7. When tracing stops, empty the list. This includes resetting all chained
181 //    values to "unknown". We know they're actually disabled, but the important
182 //    part here is to flush the cache. A minor improvement would be to keep
183 //    the current list.
184 //    This is done both when the state transitions to STOPPING and again when
185 //    the state transitions to STOPPED.
186 // 8. When tracing starts again, reset all chained values to "unknown" and
187 //    flush the cache.
188 //
189 // The trick is doing this in as lock-free way as possible.
190 // Atomics are used for accessing the static local at the call site, and when
191 // the list needs to be traversed it is first atomically unchained from the
192 // main list and then operated on.
193 // Generally there aren't that many call sites, and we only need to traverse
194 // the list at trace start/stop time; so using a list isn't that much of a
195 // performance issue.
196 
197 using trace_site_atomic_state_t = std::atomic<trace_site_state_t>;
198 
199 // A sentinel is used so that there is no ambiguity between a null value
200 // being the end of the chain and a null value being the initial value of
201 // a chain slot.
202 trace_site_t g_site_cache_sentinel{};
203 
204 std::atomic<trace_site_t*> g_site_cache{&g_site_cache_sentinel};
205 
206 // Extra bits that are combined with the chain pointer to provide
207 // the full state.
208 constexpr trace_site_state_t kSiteStateUnknown = 0u;
209 constexpr trace_site_state_t kSiteStateDisabled = 1u;
210 constexpr trace_site_state_t kSiteStateEnabled = 2u;
211 constexpr trace_site_state_t kSiteStateFlagsMask = 3u;
212 // We don't export this value to the API, the API just says these values
213 // must be initialized to zero.
214 static_assert(kSiteStateUnknown == 0u);
215 
216 // For clarity when reading the source.
217 using trace_site_flags_t = trace_site_state_t;
218 
get_trace_site_raw_successor(trace_site_state_t state)219 trace_site_state_t get_trace_site_raw_successor(trace_site_state_t state) {
220     return state & ~kSiteStateFlagsMask;
221 }
222 
get_trace_site_successor(trace_site_state_t state)223 trace_site_t* get_trace_site_successor(trace_site_state_t state) {
224     return reinterpret_cast<trace_site_t*>(get_trace_site_raw_successor(state));
225 }
226 
get_trace_site_flags(trace_site_state_t state)227 trace_site_flags_t get_trace_site_flags(trace_site_state_t state) {
228     return state & kSiteStateFlagsMask;
229 }
230 
get_trace_site_state_as_atomic(trace_site_t * site)231 trace_site_atomic_state_t* get_trace_site_state_as_atomic(trace_site_t* site) {
232     return reinterpret_cast<trace_site_atomic_state_t*>(&site->state);
233 }
234 
make_trace_site_state(trace_site_state_t successor,trace_site_flags_t flags)235 trace_site_state_t make_trace_site_state(trace_site_state_t successor,
236                                          trace_site_flags_t flags) {
237     return successor | flags;
238 }
239 
make_trace_site_state(trace_site_t * successor,trace_site_flags_t flags)240 trace_site_state_t make_trace_site_state(trace_site_t* successor,
241                                          trace_site_flags_t flags) {
242     return reinterpret_cast<trace_site_state_t>(successor) | flags;
243 }
244 
unchain_site_cache()245 trace_site_t* unchain_site_cache() {
246     trace_site_t* empty_cache = &g_site_cache_sentinel;
247     return g_site_cache.exchange(empty_cache, std::memory_order_relaxed);
248 }
249 
flush_site_cache()250 void flush_site_cache() {
251     // Atomically swap in an empty cache with the current one.
252     trace_site_t* chain_head = unchain_site_cache();
253 
254     trace_site_t* chain = chain_head;
255     while (chain != &g_site_cache_sentinel) {
256         trace_site_atomic_state_t* state_ptr = get_trace_site_state_as_atomic(chain);
257         trace_site_state_t curr_state = state_ptr->load(std::memory_order_relaxed);
258         trace_site_state_t new_state = kSiteStateUnknown;
259         state_ptr->store(new_state, std::memory_order_relaxed);
260         chain = get_trace_site_successor(curr_state);
261     }
262 }
263 
264 // Update the state at |*site|.
265 // Note that multiple threads may race here for the same site.
add_to_site_cache(trace_site_t * site,trace_site_state_t current_state,bool enabled)266 void add_to_site_cache(trace_site_t* site, trace_site_state_t current_state, bool enabled) {
267     trace_site_atomic_state_t* state_ptr = get_trace_site_state_as_atomic(site);
268 
269     // Even when tracing is on generally only a subset of categories
270     // are traced, so the test uses "unlikely".
271     trace_site_flags_t new_flags;
272     if (unlikely(enabled)) {
273         new_flags = kSiteStateEnabled;
274     } else {
275         new_flags = kSiteStateDisabled;
276     }
277 
278     // At this point the recorded flags are zero. If we're the first to set
279     // them then we're good to add our entry to the cache (if not already in
280     // the cache). Otherwise punt. Note that this first setting of the flags
281     // won't be the last if we need to also chain this entry into the cache.
282     ZX_DEBUG_ASSERT(get_trace_site_flags(current_state) == kSiteStateUnknown);
283 
284     trace_site_state_t new_state =
285         make_trace_site_state(get_trace_site_raw_successor(current_state),
286                               new_flags);
287     // If someone else changed our state punt. This can happen when another
288     // thread is tracing and gets there first.
289     if (unlikely(!state_ptr->compare_exchange_strong(current_state, new_state,
290                                                      std::memory_order_acquire,
291                                                      std::memory_order_relaxed))) {
292         return;
293     }
294 
295     if (get_trace_site_raw_successor(new_state)) {
296         // Already in chain.
297         return;
298     }
299 
300     // Add to chain.
301     trace_site_t* old_cache_ptr =
302         g_site_cache.load(std::memory_order_relaxed);
303     new_state = make_trace_site_state(old_cache_ptr, new_flags);
304     state_ptr->store(new_state, std::memory_order_relaxed);
305 
306     // Atomically update both:
307     // - |g_site_cache| to point to |new_cache_ptr| (which is our entry)
308     // - |*state_ptr| (our entry) to point to the old |g_site_cache|
309     // This works because until our entry is live only its flag values
310     // matter to other threads. See the discussion in |trace_stop_engine()|.
311     trace_site_t* new_cache_ptr = site;
312     while (!g_site_cache.compare_exchange_weak(
313                old_cache_ptr, new_cache_ptr,
314                std::memory_order_relaxed,
315                std::memory_order_relaxed)) {
316         // Someone else updated |g_site_cache|. Reset our chain pointer
317         // and try again.
318         new_state = make_trace_site_state(old_cache_ptr, new_flags);
319         state_ptr->store(new_state, std::memory_order_relaxed);
320     }
321 }
322 
323 } // namespace
324 
325 /*** Trace engine functions ***/
326 
327 // thread-safe
trace_start_engine(async_dispatcher_t * dispatcher,trace_handler_t * handler,trace_buffering_mode_t buffering_mode,void * buffer,size_t buffer_num_bytes)328 EXPORT_NO_DDK zx_status_t trace_start_engine(
329         async_dispatcher_t* dispatcher, trace_handler_t* handler,
330         trace_buffering_mode_t buffering_mode,
331         void* buffer, size_t buffer_num_bytes) {
332     ZX_DEBUG_ASSERT(dispatcher);
333     ZX_DEBUG_ASSERT(handler);
334     ZX_DEBUG_ASSERT(buffer);
335 
336     switch (buffering_mode) {
337     case TRACE_BUFFERING_MODE_ONESHOT:
338     case TRACE_BUFFERING_MODE_CIRCULAR:
339     case TRACE_BUFFERING_MODE_STREAMING:
340         break;
341     default:
342         return ZX_ERR_INVALID_ARGS;
343     }
344 
345     // The buffer size must be a multiple of 4096 (simplifies buffer size
346     // calcs).
347     if ((buffer_num_bytes & 0xfff) != 0) {
348         return ZX_ERR_INVALID_ARGS;
349     }
350     if (buffer_num_bytes < trace_context::min_buffer_size() ||
351         buffer_num_bytes > trace_context::max_buffer_size()) {
352         return ZX_ERR_INVALID_ARGS;
353     }
354 
355     fbl::AutoLock lock(&g_engine_mutex);
356 
357     // We must have fully stopped a prior tracing session before starting a new one.
358     if (g_state.load(std::memory_order_relaxed) != TRACE_STOPPED)
359         return ZX_ERR_BAD_STATE;
360     ZX_DEBUG_ASSERT(g_context_refs.load(std::memory_order_relaxed) == 0u);
361 
362     zx::event event;
363     zx_status_t status = zx::event::create(0u, &event);
364     if (status != ZX_OK)
365         return status;
366 
367     // Schedule a waiter for |event|.
368     g_event_wait = {
369         .state = {ASYNC_STATE_INIT},
370         .handler = &handle_event,
371         .object = event.get(),
372         .trigger = (SIGNAL_ALL_OBSERVERS_STARTED |
373                     SIGNAL_CONTEXT_RELEASED)};
374     status = async_begin_wait(dispatcher, &g_event_wait);
375     if (status != ZX_OK)
376         return status;
377 
378     // Initialize the trace engine state and context.
379     g_state.store(TRACE_STARTED, std::memory_order_relaxed);
380     g_dispatcher = dispatcher;
381     g_handler = handler;
382     g_disposition = ZX_OK;
383     g_context = new trace_context(buffer, buffer_num_bytes, buffering_mode, handler);
384     g_event = std::move(event);
385 
386     g_context->InitBufferHeader();
387 
388     // Write the trace initialization record first before allowing clients to
389     // get in and write their own trace records.
390     trace_context_write_initialization_record(g_context, zx_ticks_per_second());
391 
392     // After this point clients can acquire references to the trace context.
393     g_context_refs.store(kProlongedCounterIncrement, std::memory_order_release);
394 
395     // Flush the call-site cache.
396     // Do this after clients can acquire the trace context so that any cached
397     // values that got recorded prior to this are reset, and any new values
398     // from this point on will see that tracing is on.
399     flush_site_cache();
400 
401     // Notify observers that the state changed.
402     if (g_observers.is_empty()) {
403         g_event.signal(0u, SIGNAL_ALL_OBSERVERS_STARTED);
404     } else {
405         for (auto& observer : g_observers)
406             observer.awaiting_update_after_start = true;
407         notify_observers_locked();
408     }
409 
410     return ZX_OK;
411 }
412 
413 // thread-safe
trace_stop_engine(zx_status_t disposition)414 EXPORT_NO_DDK zx_status_t trace_stop_engine(zx_status_t disposition) {
415     fbl::AutoLock lock(&g_engine_mutex);
416 
417     // We must have have an active trace in order to stop it.
418     int state = g_state.load(std::memory_order_relaxed);
419     if (state == TRACE_STOPPED)
420         return ZX_ERR_BAD_STATE;
421 
422     update_disposition_locked(disposition);
423     if (state == TRACE_STOPPING)
424         return ZX_OK; // already stopping
425 
426     ZX_DEBUG_ASSERT(state == TRACE_STARTED);
427     ZX_DEBUG_ASSERT(g_context_refs.load(std::memory_order_relaxed) != 0u);
428 
429     // Begin stopping the trace.
430     g_state.store(TRACE_STOPPING, std::memory_order_relaxed);
431 
432     // Flush the call-site cache.
433     // Do this after tracing is marked as stopping so that any cached
434     // values that got recorded prior to this are reset, and any new
435     // values from this point on will see that tracing is stopping.
436     // It's still possible that a cached value could be in the process of
437     // being recorded as being enabled. So we might reset the site's state
438     // and then it gets subsequently marked as enabled by another thread.
439     // This is perhaps clumsy but ok: If the site got marked as enabled then a
440     // trace context was acquired and engine state cannot change to STOPPED
441     // until that context is released after which we will reset the state back
442     // to disabled.
443     flush_site_cache();
444 
445     // Notify observers that the state changed.
446     notify_observers_locked();
447 
448     // Release the trace engine's own reference to the trace context.
449     // |handle_context_released()| will be called asynchronously when the last
450     // reference is released.
451     trace_release_prolonged_context(reinterpret_cast<trace_prolonged_context_t*>(g_context));
452 
453     return ZX_OK;
454 }
455 
456 // This is an internal function, only called from context.cpp.
457 // thread-safe
trace_engine_is_buffer_context_released()458 bool trace_engine_is_buffer_context_released() {
459     return (g_context_refs.load(std::memory_order_relaxed) &
460             kBufferCounterMask) == 0;
461 }
462 
463 // This is an internal function, only called from context.cpp.
464 // thread-safe
trace_engine_request_save_buffer(uint32_t wrapped_count,uint64_t durable_data_end)465 void trace_engine_request_save_buffer(uint32_t wrapped_count,
466                                       uint64_t durable_data_end) {
467     // Handle the request on the engine's async loop. This may be get called
468     // while servicing a client trace request, and we don't want to handle it
469     // there.
470     async::PostTask(g_dispatcher, [wrapped_count, durable_data_end] () {
471         auto context = trace_acquire_prolonged_context();
472         if (context) {
473             auto tcontext = reinterpret_cast<trace_context_t*>(context);
474             tcontext->HandleSaveRollingBufferRequest(wrapped_count, durable_data_end);
475             trace_release_prolonged_context(context);
476         }
477     });
478 }
479 
480 // This is called by the handler after it has saved a buffer.
481 // |wrapped_count| and |durable_end| are the values that were passed to it,
482 // and are passed back to us for sanity checking purposes.
483 // thread-safe
trace_engine_mark_buffer_saved(uint32_t wrapped_count,uint64_t durable_data_end)484 EXPORT_NO_DDK zx_status_t trace_engine_mark_buffer_saved(
485     uint32_t wrapped_count, uint64_t durable_data_end) {
486     auto context = trace_acquire_prolonged_context();
487 
488     // No point in updating if there's no active trace.
489     if (!context) {
490         return ZX_ERR_BAD_STATE;
491     }
492 
493     // Do this now, instead of as a separate iteration on the async loop.
494     // The concern is that we want to update buffer state ASAP to reduce the
495     // window where records might be dropped because the buffer is full.
496     auto tcontext = reinterpret_cast<trace_context_t*>(context);
497     tcontext->MarkRollingBufferSaved(wrapped_count, durable_data_end);
498 
499     trace_release_prolonged_context(context);
500     return ZX_OK;
501 }
502 
503 namespace {
504 
handle_all_observers_started()505 void handle_all_observers_started() {
506     // TODO(TO-530): Allow indicating an observer failed to start.
507 
508     // Clear the signal, otherwise we'll keep getting called.
509     g_event.signal(SIGNAL_ALL_OBSERVERS_STARTED, 0u);
510 
511     // Note: There's no race in the use of |g_handler| here. If it will be set
512     // to NULL that will be done later (handle_context_released is called by
513     // handle_event after we are).
514     if (g_handler) {
515         g_handler->ops->trace_started(g_handler);
516     }
517 }
518 
handle_context_released(async_dispatcher_t * dispatcher)519 void handle_context_released(async_dispatcher_t* dispatcher) {
520     // All ready to clean up.
521     // Grab the mutex while modifying shared state.
522     zx_status_t disposition;
523     trace_handler_t* handler;
524     size_t buffer_bytes_written;
525     {
526         fbl::AutoLock lock(&g_engine_mutex);
527 
528         ZX_DEBUG_ASSERT(g_state.load(std::memory_order_relaxed) == TRACE_STOPPING);
529         ZX_DEBUG_ASSERT(g_context_refs.load(std::memory_order_relaxed) == 0u);
530         ZX_DEBUG_ASSERT(g_context != nullptr);
531 
532         // Update final buffer state.
533         g_context->UpdateBufferHeaderAfterStopped();
534 
535         // Get final disposition.
536         if (g_context->WasRecordDropped())
537             update_disposition_locked(ZX_ERR_NO_MEMORY);
538         disposition = g_disposition;
539         handler = g_handler;
540         buffer_bytes_written = (g_context->RollingBytesAllocated() +
541                                 g_context->DurableBytesAllocated());
542 
543         // Tidy up.
544         g_dispatcher = nullptr;
545         g_handler = nullptr;
546         g_disposition = ZX_OK;
547         g_event.reset();
548         delete g_context;
549         g_context = nullptr;
550 
551         // After this point, it's possible for the engine to be restarted
552         // (once we release the lock).
553         g_state.store(TRACE_STOPPED, std::memory_order_relaxed);
554 
555         // Flush the call-site cache.
556         // Do this after tracing is marked as stopped so that any cached
557         // values that got recorded prior to this are reset, and any new
558         // values from this point on will see that tracing is stopped.
559         // Any call sites already chained are ok, the concern is with the
560         // timing of call sites about to be added to the chain. We're ok
561         // here because at this point it's impossible to acquire a trace
562         // context, and therefore it's impossible for a category to be
563         // cached as enabled.
564         flush_site_cache();
565 
566         // Notify observers that the state changed.
567         notify_observers_locked();
568     }
569 
570     // Notify the handler about the final disposition.
571     handler->ops->trace_stopped(handler, dispatcher, disposition, buffer_bytes_written);
572 }
573 
574 // Handles the case where the asynchronous dispatcher has encountered an error
575 // and will no longer be servicing the wait callback.  Consequently, this is
576 // our last chance to stop the engine and await for all contexts to be released.
handle_hard_shutdown(async_dispatcher_t * dispatcher)577 void handle_hard_shutdown(async_dispatcher_t* dispatcher) {
578     // Stop the engine, in case it hasn't noticed yet.
579     trace_stop_engine(ZX_ERR_CANCELED);
580 
581     // There may still be outstanding references to the trace context.
582     // We don't know when or whether they will be cleared but we can't complete
583     // shut down until they are gone since there might still be live pointers
584     // into the trace buffer so allow a brief timeout.  If the release event
585     // hasn't been signaled by then, declare the trace engine dead in the water
586     // to prevent dangling pointers.  This situations should be very rare as it
587     // only occurs when the asynchronous dispatcher is shutting down, typically
588     // just prior to process exit.
589     auto status = g_event.wait_one(
590         SIGNAL_CONTEXT_RELEASED,
591         zx::deadline_after(kSynchronousShutdownTimeout),
592         nullptr);
593     if (status == ZX_OK) {
594         handle_context_released(dispatcher);
595         return;
596     }
597 
598     // Uh oh.
599     auto context_refs = g_context_refs.load(std::memory_order_relaxed);
600     fprintf(stderr,
601             "TraceEngine: Timed out waiting for %u buffer, %u prolonged trace context\n"
602             "references (raw 0x%x) to be released after %lu ns\n"
603             "while the asynchronous dispatcher was shutting down.\n"
604             "Tracing will no longer be available in this process.",
605             get_buffer_context_refs(context_refs),
606             get_prolonged_context_refs(context_refs),
607             context_refs,
608             kSynchronousShutdownTimeout.get());
609 }
610 
handle_event(async_dispatcher_t * dispatcher,async_wait_t * wait,zx_status_t status,const zx_packet_signal_t * signal)611 void handle_event(async_dispatcher_t* dispatcher, async_wait_t* wait,
612                   zx_status_t status, const zx_packet_signal_t* signal) {
613     // Note: This function may get all signals at the same time.
614 
615     if (status == ZX_OK) {
616         if (signal->observed & SIGNAL_ALL_OBSERVERS_STARTED) {
617             handle_all_observers_started();
618         }
619         if (signal->observed & SIGNAL_CONTEXT_RELEASED) {
620             handle_context_released(dispatcher);
621             return; // trace engine is completely stopped now
622         }
623         status = async_begin_wait(dispatcher, &g_event_wait);
624     }
625 
626     if (status != ZX_OK) {
627         handle_hard_shutdown(dispatcher);
628     }
629 }
630 
631 } // namespace
632 
633 /*** Trace instrumentation functions ***/
634 
635 // thread-safe, lock-free
trace_state()636 EXPORT trace_state_t trace_state() {
637     return static_cast<trace_state_t>(g_state.load(std::memory_order_relaxed));
638 }
639 
640 // thread-safe
trace_is_category_enabled(const char * category_literal)641 EXPORT bool trace_is_category_enabled(const char* category_literal) {
642     trace_context_t* context = trace_acquire_context();
643     if (likely(!context))
644         return false;
645     bool result = trace_context_is_category_enabled(context, category_literal);
646     trace_release_context(context);
647     return result;
648 }
649 
650 // thread-safe, fail-fast, lock-free
trace_acquire_context()651 EXPORT trace_context_t* trace_acquire_context() {
652     // Fail fast: Check whether we could possibly write into the trace buffer.
653     // The count must be at least 1 to indicate that the buffer is initialized.
654     // This is marked likely because tracing is usually disabled and we want
655     // to return as quickly as possible from this function.
656     uint32_t count = g_context_refs.load(std::memory_order_relaxed);
657     if (likely(count == 0u))
658         return nullptr;
659 
660     // Attempt to increment the reference count.
661     // This also acts as a fence for future access to buffer state variables.
662     //
663     // Note the ACQUIRE fence here since the trace context may have changed
664     // from the perspective of this thread.
665     while (!g_context_refs.compare_exchange_weak(count,
666                                                  count + kBufferCounterIncrement,
667                                                  std::memory_order_acquire,
668                                                  std::memory_order_relaxed)) {
669         if (unlikely(count == 0u))
670             return nullptr;
671     }
672     return g_context;
673 }
674 
675 // thread-safe, fail-fast, lock-free
trace_acquire_context_for_category(const char * category_literal,trace_string_ref_t * out_ref)676 EXPORT trace_context_t* trace_acquire_context_for_category(
677         const char* category_literal, trace_string_ref_t* out_ref) {
678     // This is marked likely because tracing is usually disabled and we want
679     // to return as quickly as possible from this function.
680     trace_context_t* context = trace_acquire_context();
681     if (likely(!context))
682         return nullptr;
683 
684     if (!trace_context_register_category_literal(context, category_literal, out_ref)) {
685         trace_release_context(context);
686         return nullptr;
687     }
688 
689     return context;
690 }
691 
692 // TODO(PT-84): This function is split out from
693 // |trace_acquire_context_for_category_cached()| because gcc doesn't
694 // optimize the prologue as well as it could: It creates the stack frame
695 // for the entire function prior to the "is disabled?" early-exit test.
696 // Clang does fine, but for now to achieve optimum performance for the common
697 // case of tracing off, regardless of compiler, we employ this workaround.
698 // Both gcc and clang do the expected tail-call optimization, so all this
699 // costs is an extra branch when tracing is on.
700 //
701 // |current_state| is appended as an argument, violating the convention to
702 // put output parameters last to minimize the changes in the caller's tail
703 // call.
trace_acquire_context_for_category_cached_worker(const char * category_literal,trace_site_t * site,trace_string_ref_t * out_ref,trace_site_state_t current_state)704 static __NO_INLINE trace_context_t* trace_acquire_context_for_category_cached_worker(
705     const char* category_literal, trace_site_t* site,
706     trace_string_ref_t* out_ref, trace_site_state_t current_state) {
707     trace_context_t* context =
708         trace_acquire_context_for_category(category_literal, out_ref);
709 
710     if (likely((current_state & kSiteStateFlagsMask) != kSiteStateUnknown)) {
711         return context;
712     }
713 
714     // First time through for this trace run. Note that multiple threads may
715     // get to this point for the same call-site.
716     add_to_site_cache(site, current_state, context != nullptr);
717 
718     return context;
719 }
720 
721 // thread-safe, fail-fast, lock-free
trace_acquire_context_for_category_cached(const char * category_literal,trace_site_t * site,trace_string_ref_t * out_ref)722 EXPORT trace_context_t* trace_acquire_context_for_category_cached(
723     const char* category_literal, trace_site_t* site,
724     trace_string_ref_t* out_ref) {
725     trace_site_atomic_state_t* state_ptr = get_trace_site_state_as_atomic(site);
726 
727     trace_site_state_t current_state =
728         state_ptr->load(std::memory_order_relaxed);
729     if (likely(current_state & kSiteStateDisabled)) {
730         return nullptr;
731     }
732 
733     return trace_acquire_context_for_category_cached_worker(
734         category_literal, site, out_ref, current_state);
735 }
736 
737 // thread-safe
trace_engine_flush_category_cache(void)738 EXPORT zx_status_t trace_engine_flush_category_cache(void) {
739     fbl::AutoLock lock(&g_engine_mutex);
740 
741     if (g_state.load(std::memory_order_relaxed) != TRACE_STOPPED)
742         return ZX_ERR_BAD_STATE;
743 
744     // Empty the site cache. The next time the app tries to emit a trace event
745     // it will get re-added to the cache, but that's ok.
746     flush_site_cache();
747 
748     return ZX_OK;
749 }
750 
751 // thread-safe, never-fail, lock-free
trace_release_context(trace_context_t * context)752 EXPORT void trace_release_context(trace_context_t* context) {
753     ZX_DEBUG_ASSERT(context == g_context);
754     ZX_DEBUG_ASSERT(get_buffer_context_refs(g_context_refs.load(std::memory_order_relaxed)) != 0u);
755 
756     // Note the RELEASE fence here since the trace context and trace buffer
757     // contents may have changes from the perspective of other threads.
758     auto previous = g_context_refs.fetch_sub(kBufferCounterIncrement,
759                                              std::memory_order_release);
760     if (unlikely(previous == kBufferCounterIncrement)) {
761         // Notify the engine that the last reference was released.
762         zx_status_t status = g_event.signal(0u, SIGNAL_CONTEXT_RELEASED);
763         ZX_DEBUG_ASSERT(status == ZX_OK);
764     }
765 }
766 
767 // thread-safe, fail-fast, lock-free
trace_acquire_prolonged_context()768 EXPORT_NO_DDK trace_prolonged_context_t* trace_acquire_prolonged_context() {
769     // There's no need for extreme efficiency here, but for consistency with
770     // |trace_acquire_context()| we copy what it does.
771     uint32_t count = g_context_refs.load(std::memory_order_relaxed);
772     if (likely(count == 0u))
773         return nullptr;
774 
775     // Attempt to increment the reference count.
776     // This also acts as a fence for future access to buffer state variables.
777     //
778     // Note the ACQUIRE fence here since the trace context may have changed
779     // from the perspective of this thread.
780     while (!g_context_refs.compare_exchange_weak(count,
781                                                  count + kProlongedCounterIncrement,
782                                                  std::memory_order_acquire,
783                                                  std::memory_order_relaxed)) {
784         if (likely(count == 0u))
785             return nullptr;
786     }
787     ZX_DEBUG_ASSERT(get_prolonged_context_refs(g_context_refs.load(std::memory_order_relaxed)) <=
788                     kMaxProlongedCounter);
789     return reinterpret_cast<trace_prolonged_context_t*>(g_context);
790 }
791 
792 // thread-safe, never-fail, lock-free
trace_release_prolonged_context(trace_prolonged_context_t * context)793 EXPORT_NO_DDK void trace_release_prolonged_context(trace_prolonged_context_t* context) {
794     auto tcontext = reinterpret_cast<trace_context_t*>(context);
795     ZX_DEBUG_ASSERT(tcontext == g_context);
796     ZX_DEBUG_ASSERT(get_prolonged_context_refs(g_context_refs.load(std::memory_order_relaxed)) != 0u);
797 
798     // Note the RELEASE fence here since the trace context and trace buffer
799     // contents may have changes from the perspective of other threads.
800     auto previous = g_context_refs.fetch_sub(kProlongedCounterIncrement,
801                                              std::memory_order_release);
802     if (unlikely(previous == kProlongedCounterIncrement)) {
803         // Notify the engine that the last reference was released.
804         zx_status_t status = g_event.signal(0u, SIGNAL_CONTEXT_RELEASED);
805         ZX_DEBUG_ASSERT(status == ZX_OK);
806     }
807 }
808 
809 /*** Asynchronous observers ***/
810 
trace_register_observer(zx_handle_t event)811 EXPORT zx_status_t trace_register_observer(zx_handle_t event) {
812     fbl::AutoLock lock(&g_engine_mutex);
813 
814     for (const auto& item : g_observers) {
815         if (item.event == event)
816             return ZX_ERR_INVALID_ARGS;
817     }
818 
819     g_observers.push_back(Observer{event, false});
820     return ZX_OK;
821 }
822 
trace_unregister_observer(zx_handle_t event)823 EXPORT zx_status_t trace_unregister_observer(zx_handle_t event) {
824     fbl::AutoLock lock(&g_engine_mutex);
825 
826     for (size_t i = 0; i < g_observers.size(); i++) {
827         if (g_observers[i].event == event) {
828             bool awaited = g_observers[i].awaiting_update_after_start;
829             g_observers.erase(i);
830             if (awaited) {
831                 notify_engine_all_observers_started_if_needed_locked();
832             }
833             return ZX_OK;
834         }
835     }
836     return ZX_ERR_NOT_FOUND;
837 }
838 
trace_notify_observer_updated(zx_handle_t event)839 EXPORT void trace_notify_observer_updated(zx_handle_t event) {
840     fbl::AutoLock lock(&g_engine_mutex);
841 
842     for (auto& item : g_observers) {
843         if (item.event == event) {
844             if (item.awaiting_update_after_start) {
845                 item.awaiting_update_after_start = false;
846                 notify_engine_all_observers_started_if_needed_locked();
847             }
848             return;
849         }
850     }
851 }
852