1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #ifndef _I915_PERF_TYPES_H_ 7 #define _I915_PERF_TYPES_H_ 8 9 #include <linux/atomic.h> 10 #include <linux/device.h> 11 #include <linux/hrtimer.h> 12 #include <linux/llist.h> 13 #include <linux/poll.h> 14 #include <linux/sysfs.h> 15 #include <linux/types.h> 16 #include <linux/uuid.h> 17 #include <linux/wait.h> 18 #include <uapi/drm/i915_drm.h> 19 20 #include "gt/intel_sseu.h" 21 #include "i915_reg_defs.h" 22 #include "intel_wakeref.h" 23 24 struct drm_i915_private; 25 struct file; 26 struct i915_active; 27 struct i915_gem_context; 28 struct i915_perf; 29 struct i915_vma; 30 struct intel_context; 31 struct intel_engine_cs; 32 33 struct i915_oa_format { 34 u32 format; 35 int size; 36 }; 37 38 struct i915_oa_reg { 39 i915_reg_t addr; 40 u32 value; 41 }; 42 43 struct i915_oa_config { 44 struct i915_perf *perf; 45 46 char uuid[UUID_STRING_LEN + 1]; 47 int id; 48 49 const struct i915_oa_reg *mux_regs; 50 u32 mux_regs_len; 51 const struct i915_oa_reg *b_counter_regs; 52 u32 b_counter_regs_len; 53 const struct i915_oa_reg *flex_regs; 54 u32 flex_regs_len; 55 56 struct attribute_group sysfs_metric; 57 struct attribute *attrs[2]; 58 struct kobj_attribute sysfs_metric_id; 59 60 struct kref ref; 61 struct rcu_head rcu; 62 }; 63 64 struct i915_perf_stream; 65 66 /** 67 * struct i915_perf_stream_ops - the OPs to support a specific stream type 68 */ 69 struct i915_perf_stream_ops { 70 /** 71 * @enable: Enables the collection of HW samples, either in response to 72 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 73 * without `I915_PERF_FLAG_DISABLED`. 74 */ 75 void (*enable)(struct i915_perf_stream *stream); 76 77 /** 78 * @disable: Disables the collection of HW samples, either in response 79 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 80 * the stream. 81 */ 82 void (*disable)(struct i915_perf_stream *stream); 83 84 /** 85 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 86 * once there is something ready to read() for the stream 87 */ 88 void (*poll_wait)(struct i915_perf_stream *stream, 89 struct file *file, 90 poll_table *wait); 91 92 /** 93 * @wait_unlocked: For handling a blocking read, wait until there is 94 * something to ready to read() for the stream. E.g. wait on the same 95 * wait queue that would be passed to poll_wait(). 96 */ 97 int (*wait_unlocked)(struct i915_perf_stream *stream); 98 99 /** 100 * @read: Copy buffered metrics as records to userspace 101 * **buf**: the userspace, destination buffer 102 * **count**: the number of bytes to copy, requested by userspace 103 * **offset**: zero at the start of the read, updated as the read 104 * proceeds, it represents how many bytes have been copied so far and 105 * the buffer offset for copying the next record. 106 * 107 * Copy as many buffered i915 perf samples and records for this stream 108 * to userspace as will fit in the given buffer. 109 * 110 * Only write complete records; returning -%ENOSPC if there isn't room 111 * for a complete record. 112 * 113 * Return any error condition that results in a short read such as 114 * -%ENOSPC or -%EFAULT, even though these may be squashed before 115 * returning to userspace. 116 */ 117 int (*read)(struct i915_perf_stream *stream, 118 char __user *buf, 119 size_t count, 120 size_t *offset); 121 122 /** 123 * @destroy: Cleanup any stream specific resources. 124 * 125 * The stream will always be disabled before this is called. 126 */ 127 void (*destroy)(struct i915_perf_stream *stream); 128 }; 129 130 /** 131 * struct i915_perf_stream - state for a single open stream FD 132 */ 133 struct i915_perf_stream { 134 /** 135 * @perf: i915_perf backpointer 136 */ 137 struct i915_perf *perf; 138 139 /** 140 * @uncore: mmio access path 141 */ 142 struct intel_uncore *uncore; 143 144 /** 145 * @engine: Engine associated with this performance stream. 146 */ 147 struct intel_engine_cs *engine; 148 149 /** 150 * @lock: Lock associated with operations on stream 151 */ 152 struct mutex lock; 153 154 /** 155 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` 156 * properties given when opening a stream, representing the contents 157 * of a single sample as read() by userspace. 158 */ 159 u32 sample_flags; 160 161 /** 162 * @sample_size: Considering the configured contents of a sample 163 * combined with the required header size, this is the total size 164 * of a single sample record. 165 */ 166 int sample_size; 167 168 /** 169 * @ctx: %NULL if measuring system-wide across all contexts or a 170 * specific context that is being monitored. 171 */ 172 struct i915_gem_context *ctx; 173 174 /** 175 * @enabled: Whether the stream is currently enabled, considering 176 * whether the stream was opened in a disabled state and based 177 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. 178 */ 179 bool enabled; 180 181 /** 182 * @hold_preemption: Whether preemption is put on hold for command 183 * submissions done on the @ctx. This is useful for some drivers that 184 * cannot easily post process the OA buffer context to subtract delta 185 * of performance counters not associated with @ctx. 186 */ 187 bool hold_preemption; 188 189 /** 190 * @ops: The callbacks providing the implementation of this specific 191 * type of configured stream. 192 */ 193 const struct i915_perf_stream_ops *ops; 194 195 /** 196 * @oa_config: The OA configuration used by the stream. 197 */ 198 struct i915_oa_config *oa_config; 199 200 /** 201 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily 202 * each time @oa_config changes. 203 */ 204 struct llist_head oa_config_bos; 205 206 /** 207 * @pinned_ctx: The OA context specific information. 208 */ 209 struct intel_context *pinned_ctx; 210 211 /** 212 * @specific_ctx_id: The id of the specific context. 213 */ 214 u32 specific_ctx_id; 215 216 /** 217 * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. 218 */ 219 u32 specific_ctx_id_mask; 220 221 /** 222 * @poll_check_timer: High resolution timer that will periodically 223 * check for data in the circular OA buffer for notifying userspace 224 * (e.g. during a read() or poll()). 225 */ 226 struct hrtimer poll_check_timer; 227 228 /** 229 * @poll_wq: The wait queue that hrtimer callback wakes when it 230 * sees data ready to read in the circular OA buffer. 231 */ 232 wait_queue_head_t poll_wq; 233 234 /** 235 * @pollin: Whether there is data available to read. 236 */ 237 bool pollin; 238 239 /** 240 * @periodic: Whether periodic sampling is currently enabled. 241 */ 242 bool periodic; 243 244 /** 245 * @period_exponent: The OA unit sampling frequency is derived from this. 246 */ 247 int period_exponent; 248 249 /** 250 * @oa_buffer: State of the OA buffer. 251 */ 252 struct { 253 const struct i915_oa_format *format; 254 struct i915_vma *vma; 255 u8 *vaddr; 256 u32 last_ctx_id; 257 int size_exponent; 258 259 /** 260 * @ptr_lock: Locks reads and writes to all head/tail state 261 * 262 * Consider: the head and tail pointer state needs to be read 263 * consistently from a hrtimer callback (atomic context) and 264 * read() fop (user context) with tail pointer updates happening 265 * in atomic context and head updates in user context and the 266 * (unlikely) possibility of read() errors needing to reset all 267 * head/tail state. 268 * 269 * Note: Contention/performance aren't currently a significant 270 * concern here considering the relatively low frequency of 271 * hrtimer callbacks (5ms period) and that reads typically only 272 * happen in response to a hrtimer event and likely complete 273 * before the next callback. 274 * 275 * Note: This lock is not held *while* reading and copying data 276 * to userspace so the value of head observed in htrimer 277 * callbacks won't represent any partial consumption of data. 278 */ 279 spinlock_t ptr_lock; 280 281 /** 282 * @aging_tail: The last HW tail reported by HW. The data 283 * might not have made it to memory yet though. 284 */ 285 u32 aging_tail; 286 287 /** 288 * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer 289 * was read; used to determine when it is old enough to trust. 290 */ 291 u64 aging_timestamp; 292 293 /** 294 * @head: Although we can always read back the head pointer register, 295 * we prefer to avoid trusting the HW state, just to avoid any 296 * risk that some hardware condition could * somehow bump the 297 * head pointer unpredictably and cause us to forward the wrong 298 * OA buffer data to userspace. 299 */ 300 u32 head; 301 302 /** 303 * @tail: The last verified tail that can be read by userspace. 304 */ 305 u32 tail; 306 } oa_buffer; 307 308 /** 309 * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be 310 * reprogrammed. 311 */ 312 struct i915_vma *noa_wait; 313 314 /** 315 * @poll_oa_period: The period in nanoseconds at which the OA 316 * buffer should be checked for available data. 317 */ 318 u64 poll_oa_period; 319 }; 320 321 /** 322 * struct i915_oa_ops - Gen specific implementation of an OA unit stream 323 */ 324 struct i915_oa_ops { 325 /** 326 * @is_valid_b_counter_reg: Validates register's address for 327 * programming boolean counters for a particular platform. 328 */ 329 bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); 330 331 /** 332 * @is_valid_mux_reg: Validates register's address for programming mux 333 * for a particular platform. 334 */ 335 bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); 336 337 /** 338 * @is_valid_flex_reg: Validates register's address for programming 339 * flex EU filtering for a particular platform. 340 */ 341 bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); 342 343 /** 344 * @enable_metric_set: Selects and applies any MUX configuration to set 345 * up the Boolean and Custom (B/C) counters that are part of the 346 * counter reports being sampled. May apply system constraints such as 347 * disabling EU clock gating as required. 348 */ 349 int (*enable_metric_set)(struct i915_perf_stream *stream, 350 struct i915_active *active); 351 352 /** 353 * @disable_metric_set: Remove system constraints associated with using 354 * the OA unit. 355 */ 356 void (*disable_metric_set)(struct i915_perf_stream *stream); 357 358 /** 359 * @oa_enable: Enable periodic sampling 360 */ 361 void (*oa_enable)(struct i915_perf_stream *stream); 362 363 /** 364 * @oa_disable: Disable periodic sampling 365 */ 366 void (*oa_disable)(struct i915_perf_stream *stream); 367 368 /** 369 * @read: Copy data from the circular OA buffer into a given userspace 370 * buffer. 371 */ 372 int (*read)(struct i915_perf_stream *stream, 373 char __user *buf, 374 size_t count, 375 size_t *offset); 376 377 /** 378 * @oa_hw_tail_read: read the OA tail pointer register 379 * 380 * In particular this enables us to share all the fiddly code for 381 * handling the OA unit tail pointer race that affects multiple 382 * generations. 383 */ 384 u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); 385 }; 386 387 struct i915_perf_gt { 388 /* 389 * Lock associated with anything below within this structure. 390 */ 391 struct mutex lock; 392 393 /** 394 * @sseu: sseu configuration selected to run while perf is active, 395 * applies to all contexts. 396 */ 397 struct intel_sseu sseu; 398 399 /* 400 * @exclusive_stream: The stream currently using the OA unit. This is 401 * sometimes accessed outside a syscall associated to its file 402 * descriptor. 403 */ 404 struct i915_perf_stream *exclusive_stream; 405 }; 406 407 struct i915_perf { 408 struct drm_i915_private *i915; 409 410 struct kobject *metrics_kobj; 411 412 /* 413 * Lock associated with adding/modifying/removing OA configs 414 * in perf->metrics_idr. 415 */ 416 struct mutex metrics_lock; 417 418 /* 419 * List of dynamic configurations (struct i915_oa_config), you 420 * need to hold perf->metrics_lock to access it. 421 */ 422 struct idr metrics_idr; 423 424 /** 425 * For rate limiting any notifications of spurious 426 * invalid OA reports 427 */ 428 struct ratelimit_state spurious_report_rs; 429 430 /** 431 * For rate limiting any notifications of tail pointer 432 * race. 433 */ 434 struct ratelimit_state tail_pointer_race; 435 436 u32 gen7_latched_oastatus1; 437 u32 ctx_oactxctrl_offset; 438 u32 ctx_flexeu0_offset; 439 440 /** 441 * The RPT_ID/reason field for Gen8+ includes a bit 442 * to determine if the CTX ID in the report is valid 443 * but the specific bit differs between Gen 8 and 9 444 */ 445 u32 gen8_valid_ctx_bit; 446 447 struct i915_oa_ops ops; 448 const struct i915_oa_format *oa_formats; 449 450 /** 451 * Use a format mask to store the supported formats 452 * for a platform. 453 */ 454 #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) 455 unsigned long format_mask[FORMAT_MASK_SIZE]; 456 457 atomic64_t noa_programming_delay; 458 }; 459 460 #endif /* _I915_PERF_TYPES_H_ */ 461