1// Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// This schema defines how to configure TFLite for delegation. These 16// definitions can be used in multiple ways: as output of a compatibility list, 17// in benchmarking tools and to decouple delegate instantiation from code. 18// 19// The schema is work-in-progress, covering the most broadly used delegates and 20// options. 21 22syntax = "proto2"; 23 24package tflite.proto; 25 26// ExecutionPreference is used to match accelerators against the preferences of 27// the current application or usecase. Some of the values here can appear both 28// in the compatibility list and as input, some only as input. 29// 30// These are separate from NNAPIExecutionPreference - the compatibility list 31// design doesn't assume a one-to-one mapping between which usecases 32// compatibility list entries have been developed for and what settings are used 33// for NNAPI. 34enum ExecutionPreference { 35 // Match any selected preference. Allowlist (semantically - value is same as 36 // on input). 37 ANY = 0; 38 // Match low latency preference. Both compatibility list and input. 39 LOW_LATENCY = 1; 40 // Math low power preference. Both compatibility list and input. 41 LOW_POWER = 2; 42 // Never accelerate. Can be used for input to compatibility list or for 43 // standalone Acceleration configuration. 44 FORCE_CPU = 3; 45} 46 47// TFLite accelerator to use. 48enum Delegate { 49 NONE = 0; 50 51 NNAPI = 1; 52 GPU = 2; 53 HEXAGON = 3; 54 XNNPACK = 4; 55 // The EdgeTpu in Pixel devices. 56 EDGETPU = 5; 57 // The Coral EdgeTpu Dev Board / USB accelerator. 58 EDGETPU_CORAL = 6; 59} 60 61enum NNAPIExecutionPreference { 62 // Undefined. 63 UNDEFINED = 0; 64 // Prefer executing in a way that minimizes battery drain. 65 NNAPI_LOW_POWER = 1; 66 // Prefer returning a single answer as fast as possible, even if this causes 67 // more power consumption. 68 NNAPI_FAST_SINGLE_ANSWER = 2; 69 // Prefer maximizing the throughput of successive frames, for example when 70 // processing successive frames coming from the camera. 71 NNAPI_SUSTAINED_SPEED = 3; 72} 73 74enum NNAPIExecutionPriority { 75 NNAPI_PRIORITY_UNDEFINED = 0; 76 NNAPI_PRIORITY_LOW = 1; 77 NNAPI_PRIORITY_MEDIUM = 2; 78 NNAPI_PRIORITY_HIGH = 3; 79} 80 81// One possible acceleration configuration. 82message ComputeSettings { 83 // Which preference to use this accelerator for. 84 optional ExecutionPreference preference = 1; 85 // How to configure TFLite 86 optional TFLiteSettings tflite_settings = 2; 87 // Identifiers to use for instrumentation and telemetry. 88 optional string model_namespace_for_statistics = 3; 89 optional string model_identifier_for_statistics = 4; 90} 91 92// NNAPI delegate settings. 93message NNAPISettings { 94 // Which instance (NNAPI accelerator) to use. One driver may provide several 95 // accelerators (though a driver may also hide several back-ends behind one 96 // name, at the choice of the driver vendor). 97 // Note that driver introspection is only available in Android Q and later. 98 optional string accelerator_name = 1; 99 100 // NNAPI model compilation caching settings to be passed to 101 // tflite::StatefulNnApiDelegate 102 optional string cache_directory = 2; 103 optional string model_token = 3; 104 105 // NNAPI execution preference to pass. See 106 // https://developer.android.com/ndk/reference/group/neural-networks.html 107 optional NNAPIExecutionPreference execution_preference = 4; 108 109 // Number of instances to cache for the same model (for input size 110 // changes). This is mandatory for getting reasonable performance in that 111 // case. 112 optional int32 no_of_nnapi_instances_to_cache = 5; 113 114 // Deprecated; use the fallback_settings in TFLiteSettings. 115 // 116 // Whether to automatically fall back to TFLite CPU path. 117 optional FallbackSettings fallback_settings = 6 [deprecated = true]; 118 119 // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android 120 // 10+ when an accelerator name is not specified. The NNAPI CPU typically 121 // performs less well than the TfLite built-in kernels; but allowing allows a 122 // model to be partially accelerated which may be a win. 123 optional bool allow_nnapi_cpu_on_android_10_plus = 7; 124 125 optional NNAPIExecutionPriority execution_priority = 8; 126 127 // Whether to allow dynamic dimension sizes without re-compilation. 128 // A tensor of with dynamic dimension must have a valid dims_signature 129 // defined. 130 // Only supported in NNAPI 1.1 and newer versions. 131 // WARNING: Setting this flag to true may result in model being rejected by 132 // accelerator. This should only be enabled if the target device supports 133 // dynamic dimensions of the model. 134 // By default this is set to false. 135 optional bool allow_dynamic_dimensions = 9; 136 137 // Whether to allow the NNAPI accelerator to optionally use lower-precision 138 // float16 (16-bit floating point) arithmetic when doing calculations on 139 // float32 (32-bit floating point). 140 optional bool allow_fp16_precision_for_fp32 = 10; 141 142 // Whether to use NNAPI Burst mode. 143 // Burst mode allows accelerators to efficiently manage resources, which 144 // would significantly reduce overhead especially if the same delegate 145 // instance is to be used for multiple inferences. 146 optional bool use_burst_computation = 11; 147} 148 149// Which GPU backend to select. Default behaviour on Android is to try OpenCL 150// and if it's not available fall back to OpenGL. 151enum GPUBackend { 152 UNSET = 0; 153 OPENCL = 1; 154 OPENGL = 2; 155 // Not yet supported. 156 // VULKAN = 3; 157 // METAL = 4; 158} 159 160// GPU Delegate settings. 161// 162// See 163// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h 164message GPUSettings { 165 optional bool is_precision_loss_allowed = 1; 166 optional bool enable_quantized_inference = 2 [default = true]; 167 optional GPUBackend force_backend = 3; 168 // TODO(b/152019007): add remaining options. 169} 170 171// Hexagon Delegate settings. 172// 173// See 174// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h 175message HexagonSettings { 176 optional int32 debug_level = 1; 177 optional int32 powersave_level = 2; 178 optional bool print_graph_profile = 3; 179 optional bool print_graph_debug = 4; 180} 181 182// XNNPack Delegate settings. 183// 184// See 185// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h 186message XNNPackSettings { 187 optional int32 num_threads = 1; 188} 189 190// EdgeTPU device spec. 191// 192message EdgeTpuDeviceSpec { 193 // EdgeTPU platform types. 194 enum PlatformType { 195 MMIO = 0; 196 REFERENCE = 1; 197 SIMULATOR = 2; 198 REMOTE_SIMULATOR = 3; 199 } 200 201 // Execution platform for the EdgeTPU device. 202 optional PlatformType platform_type = 1; 203 204 // Number of chips to use for the EdgeTPU device. 205 optional int32 num_chips = 2; 206 207 // Paths to the EdgeTPU devices; 208 repeated string device_paths = 3; 209 210 // Chip family used by the EdgeTpu device. 211 optional int32 chip_family = 4; 212} 213 214// Generic definitions of EdgeTPU power states. 215enum EdgeTpuPowerState { 216 // Undefined power state. 217 UNDEFINED_POWERSTATE = 0; 218 219 // TPU core is off but control cluster is on. 220 TPU_CORE_OFF = 1; 221 222 // A non-active low-power state that has much smaller transition time to 223 // active compared to off. 224 READY = 2; 225 226 // Minimum power active state. 227 ACTIVE_MIN_POWER = 3; 228 229 // Very low performance, very low power. 230 ACTIVE_VERY_LOW_POWER = 4; 231 232 // Low performance, low power. 233 ACTIVE_LOW_POWER = 5; 234 235 // The normal performance and power. This setting usually provides the 236 // optimal perf/power trade-off for the average use-case. 237 ACTIVE = 6; 238 239 // Maximum performance level. Potentially higher power and thermal. This 240 // setting may not be allowed in production depending on the system. 241 OVER_DRIVE = 7; 242} 243 244message EdgeTpuInactivePowerConfig { 245 // Inactive power states between inferences. 246 optional EdgeTpuPowerState inactive_power_state = 1; 247 248 // Inactive timeout in microseconds between inferences. 249 optional int64 inactive_timeout_us = 2; 250} 251 252// EdgeTPU Delegate settings. 253// 254message EdgeTpuSettings { 255 // Target inference power state for running the model. 256 optional EdgeTpuPowerState inference_power_state = 1; 257 258 // Inactive power states between inferences. 259 repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2; 260 261 // Priority for the inference request. 262 optional int32 inference_priority = 3 [default = -1]; 263 264 // Device spec for creating the EdgeTpu device. 265 optional EdgeTpuDeviceSpec edgetpu_device_spec = 4; 266 267 // A unique identifier of the input TfLite model. 268 optional string model_token = 5; 269} 270 271// Coral Dev Board / USB accelerator delegate settings. 272// 273// See 274// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h 275message CoralSettings { 276 enum Performance { 277 UNDEFINED = 0; 278 MAXIMUM = 1; 279 HIGH = 2; 280 MEDIUM = 3; 281 LOW = 4; 282 } 283 284 // The Edge Tpu device to be used. See 285 // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137 286 optional string device = 1; 287 // The desired performance level. This setting adjusts the internal clock 288 // rate to achieve different performance / power balance. Higher performance 289 // values improve speed, but increase power usage. 290 optional Performance performance = 2 [default = MAXIMUM]; 291 // If true, always perform device firmware update (DFU) after reset. DFU is 292 // usually only necessary after power cycle. 293 optional bool usb_always_dfu = 3; 294 // The maximum bulk in queue length. Larger queue length may improve USB 295 // performance on the direction from device to host. When not specified (or 296 // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the 297 // current EdgeTpu Coral implementation. 298 optional int32 usb_max_bulk_in_queue_length = 4; 299} 300 301message CPUSettings { 302 // Set to -1 to let the interpreter choose. Otherwise, must be > 0. 303 optional int32 num_threads = 1 [default = -1]; 304} 305 306// How to configure TFLite. 307message TFLiteSettings { 308 // Which delegate to use. 309 optional Delegate delegate = 1; 310 311 // How to configure the chosen delegate. 312 // (In principle we would like to use 'oneof', but flatc turns that into an 313 // nested anonymous table rather than a union. See 314 // https://github.com/google/flatbuffers/issues/4628). 315 optional NNAPISettings nnapi_settings = 2; 316 optional GPUSettings gpu_settings = 3; 317 optional HexagonSettings hexagon_settings = 4; 318 optional XNNPackSettings xnnpack_settings = 5; 319 320 // How to configure CPU execution. 321 optional CPUSettings cpu_settings = 6; 322 323 // Shared delegation settings. 324 optional int32 max_delegated_partitions = 7; 325 326 // For configuring the EdgeTpuDelegate. 327 optional EdgeTpuSettings edgetpu_settings = 8; 328 329 // For configuring the Coral EdgeTpu Delegate. 330 optional CoralSettings coral_settings = 10; 331 332 // Whether to automatically fall back to TFLite CPU path. 333 optional FallbackSettings fallback_settings = 9; 334} 335 336// Whether to automatically fallback to TFLite CPU path on delegation errors. 337// 338// Typically fallback is enabled in production use but disabled in tests and 339// benchmarks to ensure they test the intended path. 340message FallbackSettings { 341 // Whether to allow automatically falling back to TfLite CPU path on 342 // compilation failure. Default is not allowing automatic fallback. 343 // 344 // This is useful in naive production usecases where the caller would prefer 345 // for the model to run even if it's not accelerated. More advanced users will 346 // implement fallback themselves; e.g., by using a different model on CPU. 347 // 348 // Note that compilation errors may occur either at initial 349 // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after 350 // resizing. 351 optional bool allow_automatic_fallback_on_compilation_error = 7; 352 // Whether to allow automatically falling back to TfLite CPU path on 353 // execution error. Default is not allowing automatic fallback. 354 // 355 // Experimental, use with care (only when you have complete control over the 356 // client code). 357 // 358 // The caveat above for compilation error holds. Additionally, execution-time 359 // errors are harder to handle automatically as they require invalidating the 360 // TfLite interpreter which most client code has not been designed to deal 361 // with. 362 optional bool allow_automatic_fallback_on_execution_error = 8; 363} 364 365// On-device mini-benchmark result storage. The following definitions are used 366// to keep an append-only log of benchmark results on-device. (Hence there is 367// single top-level event that is used for all data). 368// 369// These definitions don't need a proto-to-flatbuffer conversion, since they are 370// not used for specifying configuration in the Tasks library. 371 372// Which stage of benchmarking the event is for. 373// There might be multiple events with the same type, if a benchmark is run 374// multiple times. 375enum BenchmarkEventType { 376 UNDEFINED_BENCHMARK_EVENT_TYPE = 0; 377 // Benchmark start. A start without an end can be interpreted as a test that 378 // has crashed or hung. 379 START = 1; 380 // Benchmarking completion. A model was successfully loaded, acceleration 381 // configured and inference run without errors. There may still be an issue 382 // with correctness of results, or with performance. 383 END = 2; 384 // Benchmark was not completed due to an error. The error may be a handled 385 // error (e.g., failure in a delegate), or a crash. 386 ERROR = 3; 387 // Benchmark data has been sent for logging. 388 LOGGED = 4; 389} 390 391// A correctness metric from a benchmark, for example KL-divergence between 392// known-good CPU output and on-device output. These are primarily used for 393// telemetry and monitored server-side. 394message BenchmarkMetric { 395 optional string name = 1; 396 repeated float values = 2 [packed = true]; 397} 398 399// Outcome of a successfully complete benchmark run. This information is 400// intended to both be used on-device to select best compute configuration as 401// well as sent to server for monitoring. 402// 403// Used with event type END. 404message BenchmarkResult { 405 // Time to load model and apply acceleration. Initialization may get run 406 // multiple times to get information on variance. 407 repeated int64 initialization_time_us = 1 [packed = true]; 408 // Time to run inference (call Invoke()). Inference may get run multiple times 409 // to get information on variance. 410 repeated int64 inference_time_us = 2 [packed = true]; 411 // Maximum memory used. Measures size of application heap (does not 412 // necessarily take into account driver-side allocation. 413 optional int32 max_memory_kb = 3; 414 // Whether the inference produced correct results (validation graph output 415 // 'ok' for all test inputs). Used on-device to disallow configurations that 416 // produce incorrect results (e.g., due to OpenCL driver bugs). 417 optional bool ok = 4; 418 // Metrics that were used to determine the 'ok' status. 419 repeated BenchmarkMetric metrics = 5; 420} 421 422// A handled error. 423message ErrorCode { 424 // Which delegate the error comes from (or NONE, if it comes from the tflite 425 // framework). 426 optional Delegate source = 1; 427 // What the tflite level error is. 428 optional int32 tflite_error = 2; 429 // What the underlying error is (e.g., NNAPI or OpenGL error). 430 optional int64 underlying_api_error = 3; 431} 432 433// When during benchmark execution an error occurred. 434enum BenchmarkStage { 435 UNKNOWN = 0; 436 // During model loading or delegation. 437 INITIALIZATION = 1; 438 // During inference. 439 INFERENCE = 2; 440} 441 442// An error that occurred during benchmarking. 443// 444// Used with event type ERROR. 445message BenchmarkError { 446 // How far benchmarking got. 447 optional BenchmarkStage stage = 1; 448 // Process exit code. 449 optional int32 exit_code = 2; 450 // Signal the process received. 451 optional int32 signal = 3; 452 // Handled error. 453 repeated ErrorCode error_code = 4; 454} 455 456// Top-level benchmarking event stored on-device. All events for a model are 457// parsed to detect the status. 458message BenchmarkEvent { 459 // Which settings were used for benchmarking. 460 optional TFLiteSettings tflite_settings = 1; 461 // Type of the event. 462 optional BenchmarkEventType event_type = 2; 463 // Result of benchmark, used when type is END. 464 optional BenchmarkResult result = 3; 465 // Error during benchmark, used when type is ERROR. 466 optional BenchmarkError error = 4; 467 // Start timestamps. These are used for 468 // 1. Checking whether a test was started but not completed within a given 469 // deadline. 470 // 2. Optionally, telemetry timestamps. 471 optional int64 boottime_us = 5; 472 optional int64 wallclock_us = 6; 473} 474