1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #pragma once
6 
7 #include <atomic>
8 
9 #include <zircon/assert.h>
10 
11 #include <fbl/mutex.h>
12 #include <lib/zx/event.h>
13 #include <trace-engine/buffer_internal.h>
14 #include <trace-engine/context.h>
15 #include <trace-engine/handler.h>
16 
17 // Two preprocessor symbols control what symbols we export in a .so:
18 // EXPORT and EXPORT_NO_DDK:
19 // - EXPORT is for symbols exported to both driver and non-driver versions of
20 //   the library ("non-driver" is the normal case).
21 // - EXPORT_NO_DDK is for symbols *not* exported in the DDK.
22 // A third variant is supported which is to export nothing. This is for cases
23 // like libvulkan which want tracing but do not have access to
24 // libtrace-engine.so.
25 // Two preprocessor symbols are provided by the build system to select which
26 // variant we are building: STATIC_LIBRARY and DDK_TRACING. Either neither of
27 // them are defined (normal case), or exactly one of them is defined.
28 #if defined(STATIC_LIBRARY)
29 #define EXPORT
30 #define EXPORT_NO_DDK
31 #elif defined(DDK_TRACING)
32 #define EXPORT __EXPORT
33 #define EXPORT_NO_DDK
34 #else
35 #define EXPORT __EXPORT
36 #define EXPORT_NO_DDK __EXPORT
37 #endif
38 
39 using trace::internal::trace_buffer_header;
40 
41 // Return true if there are no buffer acquisitions of the trace context.
42 bool trace_engine_is_buffer_context_released();
43 
44 // Called from trace_context to notify the engine a buffer needs saving.
45 void trace_engine_request_save_buffer(uint32_t wrapped_count,
46                                       uint64_t durable_data_end);
47 
48 // Maintains state for a single trace session.
49 // This structure is accessed concurrently from many threads which hold trace
50 // context references.
51 // Implements the opaque type declared in <trace-engine/context.h>.
52 struct trace_context {
53     trace_context(void* buffer, size_t buffer_num_bytes, trace_buffering_mode_t buffering_mode,
54                   trace_handler_t* handler);
55 
56     ~trace_context();
57 
buffer_headertrace_context58     const trace_buffer_header* buffer_header() const { return header_; }
59 
min_buffer_sizetrace_context60     static size_t min_buffer_size() { return kMinPhysicalBufferSize; }
61 
max_buffer_sizetrace_context62     static size_t max_buffer_size() { return kMaxPhysicalBufferSize; }
63 
MaxUsableBufferOffsettrace_context64     static size_t MaxUsableBufferOffset() {
65         return (1ull << kUsableBufferOffsetBits) - sizeof(uint64_t);
66     }
67 
generationtrace_context68     uint32_t generation() const { return generation_; }
69 
handlertrace_context70     trace_handler_t* handler() const { return handler_; }
71 
buffering_modetrace_context72     trace_buffering_mode_t buffering_mode() const { return buffering_mode_; }
73 
num_records_droppedtrace_context74     uint64_t num_records_dropped() const {
75         return num_records_dropped_.load(std::memory_order_relaxed);
76     }
77 
UsingDurableBuffertrace_context78     bool UsingDurableBuffer() const {
79         return buffering_mode_ != TRACE_BUFFERING_MODE_ONESHOT;
80     }
81 
82     // Return true if at least one record was dropped.
WasRecordDroppedtrace_context83     bool WasRecordDropped() const { return num_records_dropped() != 0u; }
84 
85     // Return the number of bytes currently allocated in the rolling buffer(s).
86     size_t RollingBytesAllocated() const;
87 
88     size_t DurableBytesAllocated() const;
89 
90     void InitBufferHeader();
91     void UpdateBufferHeaderAfterStopped();
92 
93     uint64_t* AllocRecord(size_t num_bytes);
94     uint64_t* AllocDurableRecord(size_t num_bytes);
95     bool AllocThreadIndex(trace_thread_index_t* out_index);
96     bool AllocStringIndex(trace_string_index_t* out_index);
97 
98     // This is called by the handler when it has been notified that a buffer
99     // has been saved.
100     // |wrapped_count| is the wrapped count at the time the buffer save request
101     // was made. Similarly for |durable_data_end|.
102     void MarkRollingBufferSaved(uint32_t wrapped_count, uint64_t durable_data_end);
103 
104     // This is only called from the engine to initiate a buffer save.
105     void HandleSaveRollingBufferRequest(uint32_t wrapped_count,
106                                         uint64_t durable_data_end);
107 
108 private:
109     // The maximum rolling buffer size in bits.
110     static constexpr size_t kRollingBufferSizeBits = 32;
111 
112     // Maximum size, in bytes, of a rolling buffer.
113     static constexpr size_t kMaxRollingBufferSize = 1ull << kRollingBufferSizeBits;
114 
115     // The number of usable bits in the buffer pointer.
116     // This is several bits more than the maximum buffer size to allow a
117     // buffer pointer to grow without overflow while TraceManager is saving a
118     // buffer in streaming mode.
119     // In this case we don't snap the offset to the end as doing so requires
120     // modifying state and thus obtaining the lock (streaming mode is not
121     // lock-free). Instead the offset keeps growing.
122     // kUsableBufferOffsetBits = 40 bits = 1TB.
123     // Max rolling buffer size = 32 bits = 4GB.
124     // Thus we assume TraceManager can save 4GB of trace before the client
125     // writes 1TB of trace data (lest the offset part of
126     // |rolling_buffer_current_| overflows). But, just in case, if
127     // TraceManager still can't keep up we stop tracing when the offset
128     // approaches overflowing. See AllocRecord().
129     static constexpr int kUsableBufferOffsetBits = kRollingBufferSizeBits + 8;
130 
131     // The number of bits used to record the buffer pointer.
132     // This includes one more bit to support overflow in offset calcs.
133     static constexpr int kBufferOffsetBits = kUsableBufferOffsetBits + 1;
134 
135     // The number of bits in the wrapped counter.
136     // It important that this counter not wrap (well, technically it can,
137     // the lost information isn't that important, but if it wraps too
138     // quickly the transition from one buffer to the other can break.
139     // The current values allow for a 20 bit counter which is plenty.
140     // A value of 20 also has the benefit that when the entire
141     // offset_plus_counter value is printed in hex the counter is easily read.
142     static constexpr int kWrappedCounterBits = 20;
143     static constexpr int kWrappedCounterShift = 64 - kWrappedCounterBits;
144 
145     static_assert(kBufferOffsetBits + kWrappedCounterBits <= 64, "");
146 
147     // The physical buffer must be at least this big.
148     // Mostly this is here to simplify buffer size calculations.
149     // It's as small as it is to simplify some testcases.
150     static constexpr size_t kMinPhysicalBufferSize = 4096;
151 
152     // The physical buffer can be at most this big.
153     // To keep things simple we ignore the header.
154     static constexpr size_t kMaxPhysicalBufferSize = kMaxRollingBufferSize;
155 
156     // The minimum size of the durable buffer.
157     // There must be enough space for at least the initialization record.
158     static constexpr size_t kMinDurableBufferSize = 16;
159 
160     // The maximum size of the durable buffer.
161     // We need enough space for:
162     // - initialization record = 16 bytes
163     // - string table (max TRACE_ENCODED_STRING_REF_MAX_INDEX = 0x7fffu entries)
164     // - thread table (max TRACE_ENCODED_THREAD_REF_MAX_INDEX = 0xff entries)
165     // String entries are 8 bytes + length-round-to-8-bytes.
166     // Strings have a max size of TRACE_ENCODED_STRING_REF_MAX_LENGTH bytes
167     // = 32000. We assume most are < 64 bytes.
168     // Thread entries are 8 bytes + pid + tid = 24 bytes.
169     // If we assume 10000 registered strings, typically 64 bytes, plus max
170     // number registered threads, that works out to:
171     // 16 /*initialization record*/
172     // + 10000 * (8 + 64) /*strings*/
173     // + 255 * 24 /*threads*/
174     // = 726136.
175     // We round this up to 1MB.
176     static constexpr size_t kMaxDurableBufferSize = 1024 * 1024;
177 
178     // Given a buffer of size |SIZE| in bytes, not including the header,
179     // return how much to use for the durable buffer. This is further adjusted
180     // to be at most |kMaxDurableBufferSize|, and to account for rolling
181     // buffer size alignment constraints.
182 #define GET_DURABLE_BUFFER_SIZE(size) ((size) / 16)
183 
184     // Ensure the smallest buffer is still large enough to hold
185     // |kMinDurableBufferSize|.
186     static_assert(GET_DURABLE_BUFFER_SIZE(kMinPhysicalBufferSize - sizeof(trace_buffer_header)) >=
187                   kMinDurableBufferSize, "");
188 
GetBufferOffsettrace_context189     static uintptr_t GetBufferOffset(uint64_t offset_plus_counter) {
190         return offset_plus_counter & ((1ul << kBufferOffsetBits) - 1);
191     }
192 
GetWrappedCounttrace_context193     static uint32_t GetWrappedCount(uint64_t offset_plus_counter) {
194         return static_cast<uint32_t>(offset_plus_counter >> kWrappedCounterShift);
195     }
196 
MakeOffsetPlusCountertrace_context197     static uint64_t MakeOffsetPlusCounter(uintptr_t offset, uint32_t counter) {
198         return offset | (static_cast<uint64_t>(counter) << kWrappedCounterShift);
199     }
200 
GetBufferNumbertrace_context201     static int GetBufferNumber(uint32_t wrapped_count) {
202         return wrapped_count & 1;
203     }
204 
IsDurableBufferFulltrace_context205     bool IsDurableBufferFull() const {
206         return durable_buffer_full_mark_.load(std::memory_order_relaxed) != 0;
207     }
208 
209     // Return true if |buffer_number| is ready to be written to.
IsRollingBufferReadytrace_context210     bool IsRollingBufferReady(int buffer_number) const {
211         return rolling_buffer_full_mark_[buffer_number].load(std::memory_order_relaxed) == 0;
212     }
213 
214     // Return true if the other rolling buffer is ready to be written to.
IsOtherRollingBufferReadytrace_context215     bool IsOtherRollingBufferReady(int buffer_number) const {
216         return IsRollingBufferReady(!buffer_number);
217     }
218 
CurrentWrappedCounttrace_context219     uint32_t CurrentWrappedCount() const {
220         auto current = rolling_buffer_current_.load(std::memory_order_relaxed);
221         return GetWrappedCount(current);
222     }
223 
224     void ComputeBufferSizes();
225 
226     void MarkDurableBufferFull(uint64_t last_offset);
227 
228     void MarkOneshotBufferFull(uint64_t last_offset);
229 
230     void MarkRollingBufferFull(uint32_t wrapped_count, uint64_t last_offset);
231 
232     bool SwitchRollingBuffer(uint32_t wrapped_count, uint64_t buffer_offset);
233 
234     void SwitchRollingBufferLocked(uint32_t prev_wrapped_count, uint64_t prev_last_offset)
235         __TA_REQUIRES(buffer_switch_mutex_);
236 
237     void StreamingBufferFullCheck(uint32_t wrapped_count,
238                                   uint64_t buffer_offset);
239 
240     void MarkTracingArtificiallyStopped();
241 
SnapToEndtrace_context242     void SnapToEnd(uint32_t wrapped_count) {
243         // Snap to the endpoint for simplicity.
244         // Several threads could all hit buffer-full with each one
245         // continually incrementing the offset.
246         uint64_t full_offset_plus_counter =
247             MakeOffsetPlusCounter(rolling_buffer_size_, wrapped_count);
248         rolling_buffer_current_.store(full_offset_plus_counter,
249                                       std::memory_order_relaxed);
250     }
251 
MarkRecordDroppedtrace_context252     void MarkRecordDropped() {
253         num_records_dropped_.fetch_add(1, std::memory_order_relaxed);
254     }
255 
256     void NotifyRollingBufferFullLocked(uint32_t wrapped_count,
257                                        uint64_t durable_data_end)
258         __TA_REQUIRES(buffer_switch_mutex_);
259 
260     // The generation counter associated with this context to distinguish
261     // it from previously created contexts.
262     uint32_t const generation_;
263 
264     // The buffering mode.
265     trace_buffering_mode_t const buffering_mode_;
266 
267     // Buffer start and end pointers.
268     // These encapsulate the entire physical buffer.
269     uint8_t* const buffer_start_;
270     uint8_t* const buffer_end_;
271 
272     // Same as |buffer_start_|, but as a header pointer.
273     trace_buffer_header* const header_;
274 
275     // Durable-record buffer start.
276     uint8_t* durable_buffer_start_;
277 
278     // The size of the durable buffer;
279     size_t durable_buffer_size_;
280 
281     // Rolling buffer start.
282     // To simplify switching between them we don't record the buffer end,
283     // and instead record their size (which is identical).
284     uint8_t* rolling_buffer_start_[2];
285 
286     // The size of both rolling buffers.
287     size_t rolling_buffer_size_;
288 
289     // Current allocation pointer for durable records.
290     // This only used in circular and streaming modes.
291     // Starts at |durable_buffer_start| and grows from there.
292     // May exceed |durable_buffer_end| when the buffer is full.
293     std::atomic<uint64_t> durable_buffer_current_;
294 
295     // Offset beyond the last successful allocation, or zero if not full.
296     // This only used in circular and streaming modes: There is no separate
297     // buffer for durable records in oneshot mode.
298     // Only ever set to non-zero once in the lifetime of the trace context.
299     std::atomic<uint64_t> durable_buffer_full_mark_;
300 
301     // Allocation pointer of the current buffer for non-durable records,
302     // plus a wrapped counter. These are combined into one so that they can
303     // be atomically fetched together.
304     // The lower |kBufferOffsetBits| bits comprise the offset into the buffer
305     // of the next record to write. The upper |kWrappedCountBits| comprise
306     // the wrapped counter. Bit zero of this counter is the number of the
307     // buffer currently being written to. The counter is used in part for
308     // record keeping purposes, and to support transition from one buffer to
309     // the next.
310     //
311     // To construct: make_offset_plus_counter
312     // To get buffer offset: get_buffer_offset
313     // To get wrapped count: get_wrapped_count
314     //
315     // This value is also used for durable records in oneshot mode: in
316     // oneshot mode durable and non-durable records share the same buffer.
317     std::atomic<uint64_t> rolling_buffer_current_;
318 
319     // Offset beyond the last successful allocation, or zero if not full.
320     // Only ever set to non-zero once when the buffer fills.
321     // This will only be set in oneshot and streaming modes.
322     std::atomic<uint64_t> rolling_buffer_full_mark_[2];
323 
324     // A count of the number of records that have been dropped.
325     std::atomic<uint64_t> num_records_dropped_{0};
326 
327     // A count of the number of records that have been dropped.
328     std::atomic<uint64_t> num_records_dropped_after_buffer_switch_{0};
329 
330     // Set to true if the engine needs to stop tracing for some reason.
331     bool tracing_artificially_stopped_ __TA_GUARDED(buffer_switch_mutex_) = false;
332 
333     // This is used when switching rolling buffers.
334     // It's a relatively rare operation, and this simplifies reasoning about
335     // correctness.
336     mutable fbl::Mutex buffer_switch_mutex_; // TODO(dje): more guards?
337 
338     // Handler associated with the trace session.
339     trace_handler_t* const handler_;
340 
341     // The next thread index to be assigned.
342     std::atomic<trace_thread_index_t> next_thread_index_{
343         TRACE_ENCODED_THREAD_REF_MIN_INDEX};
344 
345     // The next string table index to be assigned.
346     std::atomic<trace_string_index_t> next_string_index_{
347         TRACE_ENCODED_STRING_REF_MIN_INDEX};
348 };
349