1// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This schema defines how to configure TFLite for delegation. These
16// definitions can be used in multiple ways: as output of a compatibility list,
17// in benchmarking tools and to decouple delegate instantiation from code.
18//
19// The schema is work-in-progress, covering the most broadly used delegates and
20// options.
21
22syntax = "proto2";
23
24package tflite.proto;
25
26// ExecutionPreference is used to match accelerators against the preferences of
27// the current application or usecase. Some of the values here can appear both
28// in the compatibility list and as input, some only as input.
29//
30// These are separate from NNAPIExecutionPreference - the compatibility list
31// design doesn't assume a one-to-one mapping between which usecases
32// compatibility list entries have been developed for and what settings are used
33// for NNAPI.
34enum ExecutionPreference {
35  // Match any selected preference. Allowlist (semantically - value is same as
36  // on input).
37  ANY = 0;
38  // Match low latency preference. Both compatibility list and input.
39  LOW_LATENCY = 1;
40  // Math low power preference. Both compatibility list and input.
41  LOW_POWER = 2;
42  // Never accelerate. Can be used for input to compatibility list or for
43  // standalone Acceleration configuration.
44  FORCE_CPU = 3;
45}
46
47// TFLite accelerator to use.
48enum Delegate {
49  NONE = 0;
50
51  NNAPI = 1;
52  GPU = 2;
53  HEXAGON = 3;
54  XNNPACK = 4;
55  // The EdgeTpu in Pixel devices.
56  EDGETPU = 5;
57  // The Coral EdgeTpu Dev Board / USB accelerator.
58  EDGETPU_CORAL = 6;
59}
60
61enum NNAPIExecutionPreference {
62  // Undefined.
63  UNDEFINED = 0;
64  // Prefer executing in a way that minimizes battery drain.
65  NNAPI_LOW_POWER = 1;
66  // Prefer returning a single answer as fast as possible, even if this causes
67  // more power consumption.
68  NNAPI_FAST_SINGLE_ANSWER = 2;
69  // Prefer maximizing the throughput of successive frames, for example when
70  // processing successive frames coming from the camera.
71  NNAPI_SUSTAINED_SPEED = 3;
72}
73
74enum NNAPIExecutionPriority {
75  NNAPI_PRIORITY_UNDEFINED = 0;
76  NNAPI_PRIORITY_LOW = 1;
77  NNAPI_PRIORITY_MEDIUM = 2;
78  NNAPI_PRIORITY_HIGH = 3;
79}
80
81// One possible acceleration configuration.
82message ComputeSettings {
83  // Which preference to use this accelerator for.
84  optional ExecutionPreference preference = 1;
85  // How to configure TFLite
86  optional TFLiteSettings tflite_settings = 2;
87  // Identifiers to use for instrumentation and telemetry.
88  optional string model_namespace_for_statistics = 3;
89  optional string model_identifier_for_statistics = 4;
90}
91
92// NNAPI delegate settings.
93message NNAPISettings {
94  // Which instance (NNAPI accelerator) to use. One driver may provide several
95  // accelerators (though a driver may also hide several back-ends behind one
96  // name, at the choice of the driver vendor).
97  // Note that driver introspection is only available in Android Q and later.
98  optional string accelerator_name = 1;
99
100  // NNAPI model compilation caching settings to be passed to
101  // tflite::StatefulNnApiDelegate
102  optional string cache_directory = 2;
103  optional string model_token = 3;
104
105  // NNAPI execution preference to pass. See
106  // https://developer.android.com/ndk/reference/group/neural-networks.html
107  optional NNAPIExecutionPreference execution_preference = 4;
108
109  // Number of instances to cache for the same model (for input size
110  // changes). This is mandatory for getting reasonable performance in that
111  // case.
112  optional int32 no_of_nnapi_instances_to_cache = 5;
113
114  // Deprecated; use the fallback_settings in TFLiteSettings.
115  //
116  // Whether to automatically fall back to TFLite CPU path.
117  optional FallbackSettings fallback_settings = 6 [deprecated = true];
118
119  // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
120  // 10+ when an accelerator name is not specified. The NNAPI CPU typically
121  // performs less well than the TfLite built-in kernels; but allowing allows a
122  // model to be partially accelerated which may be a win.
123  optional bool allow_nnapi_cpu_on_android_10_plus = 7;
124
125  optional NNAPIExecutionPriority execution_priority = 8;
126
127  // Whether to allow dynamic dimension sizes without re-compilation.
128  // A tensor of with dynamic dimension must have a valid dims_signature
129  // defined.
130  // Only supported in NNAPI 1.1 and newer versions.
131  // WARNING: Setting this flag to true may result in model being rejected by
132  // accelerator. This should only be enabled if the target device supports
133  // dynamic dimensions of the model.
134  // By default this is set to false.
135  optional bool allow_dynamic_dimensions = 9;
136
137  // Whether to allow the NNAPI accelerator to optionally use lower-precision
138  // float16 (16-bit floating point) arithmetic when doing calculations on
139  // float32 (32-bit floating point).
140  optional bool allow_fp16_precision_for_fp32 = 10;
141
142  // Whether to use NNAPI Burst mode.
143  // Burst mode allows accelerators to efficiently manage resources, which
144  // would significantly reduce overhead especially if the same delegate
145  // instance is to be used for multiple inferences.
146  optional bool use_burst_computation = 11;
147}
148
149// Which GPU backend to select. Default behaviour on Android is to try OpenCL
150// and if it's not available fall back to OpenGL.
151enum GPUBackend {
152  UNSET = 0;
153  OPENCL = 1;
154  OPENGL = 2;
155  // Not yet supported.
156  // VULKAN = 3;
157  // METAL = 4;
158}
159
160// GPU Delegate settings.
161//
162// See
163// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
164message GPUSettings {
165  optional bool is_precision_loss_allowed = 1;
166  optional bool enable_quantized_inference = 2 [default = true];
167  optional GPUBackend force_backend = 3;
168  // TODO(b/152019007): add remaining options.
169}
170
171// Hexagon Delegate settings.
172//
173// See
174// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
175message HexagonSettings {
176  optional int32 debug_level = 1;
177  optional int32 powersave_level = 2;
178  optional bool print_graph_profile = 3;
179  optional bool print_graph_debug = 4;
180}
181
182// XNNPack Delegate settings.
183//
184// See
185// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
186message XNNPackSettings {
187  optional int32 num_threads = 1;
188}
189
190// EdgeTPU device spec.
191//
192message EdgeTpuDeviceSpec {
193  // EdgeTPU platform types.
194  enum PlatformType {
195    MMIO = 0;
196    REFERENCE = 1;
197    SIMULATOR = 2;
198    REMOTE_SIMULATOR = 3;
199  }
200
201  // Execution platform for the EdgeTPU device.
202  optional PlatformType platform_type = 1;
203
204  // Number of chips to use for the EdgeTPU device.
205  optional int32 num_chips = 2;
206
207  // Paths to the EdgeTPU devices;
208  repeated string device_paths = 3;
209
210  // Chip family used by the EdgeTpu device.
211  optional int32 chip_family = 4;
212}
213
214// Generic definitions of EdgeTPU power states.
215enum EdgeTpuPowerState {
216  // Undefined power state.
217  UNDEFINED_POWERSTATE = 0;
218
219  // TPU core is off but control cluster is on.
220  TPU_CORE_OFF = 1;
221
222  // A non-active low-power state that has much smaller transition time to
223  // active compared to off.
224  READY = 2;
225
226  // Minimum power active state.
227  ACTIVE_MIN_POWER = 3;
228
229  // Very low performance, very low power.
230  ACTIVE_VERY_LOW_POWER = 4;
231
232  // Low performance, low power.
233  ACTIVE_LOW_POWER = 5;
234
235  // The normal performance and power. This setting usually provides the
236  // optimal perf/power trade-off for the average use-case.
237  ACTIVE = 6;
238
239  // Maximum performance level. Potentially higher power and thermal. This
240  // setting may not be allowed in production depending on the system.
241  OVER_DRIVE = 7;
242}
243
244message EdgeTpuInactivePowerConfig {
245  // Inactive power states between inferences.
246  optional EdgeTpuPowerState inactive_power_state = 1;
247
248  // Inactive timeout in microseconds between inferences.
249  optional int64 inactive_timeout_us = 2;
250}
251
252// EdgeTPU Delegate settings.
253//
254message EdgeTpuSettings {
255  // Target inference power state for running the model.
256  optional EdgeTpuPowerState inference_power_state = 1;
257
258  // Inactive power states between inferences.
259  repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;
260
261  // Priority for the inference request.
262  optional int32 inference_priority = 3 [default = -1];
263
264  // Device spec for creating the EdgeTpu device.
265  optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
266
267  // A unique identifier of the input TfLite model.
268  optional string model_token = 5;
269}
270
271// Coral Dev Board / USB accelerator delegate settings.
272//
273// See
274// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
275message CoralSettings {
276  enum Performance {
277    UNDEFINED = 0;
278    MAXIMUM = 1;
279    HIGH = 2;
280    MEDIUM = 3;
281    LOW = 4;
282  }
283
284  // The Edge Tpu device to be used. See
285  // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
286  optional string device = 1;
287  // The desired performance level. This setting adjusts the internal clock
288  // rate to achieve different performance / power balance. Higher performance
289  // values improve speed, but increase power usage.
290  optional Performance performance = 2 [default = MAXIMUM];
291  // If true, always perform device firmware update (DFU) after reset. DFU is
292  // usually only necessary after power cycle.
293  optional bool usb_always_dfu = 3;
294  // The maximum bulk in queue length. Larger queue length may improve USB
295  // performance on the direction from device to host. When not specified (or
296  // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
297  // current EdgeTpu Coral implementation.
298  optional int32 usb_max_bulk_in_queue_length = 4;
299}
300
301message CPUSettings {
302  // Set to -1 to let the interpreter choose. Otherwise, must be > 0.
303  optional int32 num_threads = 1 [default = -1];
304}
305
306// How to configure TFLite.
307message TFLiteSettings {
308  // Which delegate to use.
309  optional Delegate delegate = 1;
310
311  // How to configure the chosen delegate.
312  // (In principle we would like to use 'oneof', but flatc turns that into an
313  // nested anonymous table rather than a union. See
314  // https://github.com/google/flatbuffers/issues/4628).
315  optional NNAPISettings nnapi_settings = 2;
316  optional GPUSettings gpu_settings = 3;
317  optional HexagonSettings hexagon_settings = 4;
318  optional XNNPackSettings xnnpack_settings = 5;
319
320  // How to configure CPU execution.
321  optional CPUSettings cpu_settings = 6;
322
323  // Shared delegation settings.
324  optional int32 max_delegated_partitions = 7;
325
326  // For configuring the EdgeTpuDelegate.
327  optional EdgeTpuSettings edgetpu_settings = 8;
328
329  // For configuring the Coral EdgeTpu Delegate.
330  optional CoralSettings coral_settings = 10;
331
332  // Whether to automatically fall back to TFLite CPU path.
333  optional FallbackSettings fallback_settings = 9;
334}
335
336// Whether to automatically fallback to TFLite CPU path on delegation errors.
337//
338// Typically fallback is enabled in production use but disabled in tests and
339// benchmarks to ensure they test the intended path.
340message FallbackSettings {
341  // Whether to allow automatically falling back to TfLite CPU path on
342  // compilation failure. Default is not allowing automatic fallback.
343  //
344  // This is useful in naive production usecases where the caller would prefer
345  // for the model to run even if it's not accelerated. More advanced users will
346  // implement fallback themselves; e.g., by using a different model on CPU.
347  //
348  // Note that compilation errors may occur either at initial
349  // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
350  // resizing.
351  optional bool allow_automatic_fallback_on_compilation_error = 7;
352  // Whether to allow automatically falling back to TfLite CPU path on
353  // execution error. Default is not allowing automatic fallback.
354  //
355  // Experimental, use with care (only when you have complete control over the
356  // client code).
357  //
358  // The caveat above for compilation error holds.  Additionally, execution-time
359  // errors are harder to handle automatically as they require invalidating the
360  // TfLite interpreter which most client code has not been designed to deal
361  // with.
362  optional bool allow_automatic_fallback_on_execution_error = 8;
363}
364
365// On-device mini-benchmark result storage. The following definitions are used
366// to keep an append-only log of benchmark results on-device. (Hence there is
367// single top-level event that is used for all data).
368//
369// These definitions don't need a proto-to-flatbuffer conversion, since they are
370// not used for specifying configuration in the Tasks library.
371
372// Which stage of benchmarking the event is for.
373// There might be multiple events with the same type, if a benchmark is run
374// multiple times.
375enum BenchmarkEventType {
376  UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
377  // Benchmark start. A start without an end can be interpreted as a test that
378  // has crashed or hung.
379  START = 1;
380  // Benchmarking completion. A model was successfully loaded, acceleration
381  // configured and inference run without errors. There may still be an issue
382  // with correctness of results, or with performance.
383  END = 2;
384  // Benchmark was not completed due to an error. The error may be a handled
385  // error (e.g., failure in a delegate), or a crash.
386  ERROR = 3;
387  // Benchmark data has been sent for logging.
388  LOGGED = 4;
389}
390
391// A correctness metric from a benchmark, for example KL-divergence between
392// known-good CPU output and on-device output. These are primarily used for
393// telemetry and monitored server-side.
394message BenchmarkMetric {
395  optional string name = 1;
396  repeated float values = 2 [packed = true];
397}
398
399// Outcome of a successfully complete benchmark run. This information is
400// intended to both be used on-device to select best compute configuration as
401// well as sent to server for monitoring.
402//
403// Used with event type END.
404message BenchmarkResult {
405  // Time to load model and apply acceleration. Initialization may get run
406  // multiple times to get information on variance.
407  repeated int64 initialization_time_us = 1 [packed = true];
408  // Time to run inference (call Invoke()). Inference may get run multiple times
409  // to get information on variance.
410  repeated int64 inference_time_us = 2 [packed = true];
411  // Maximum memory used. Measures size of application heap (does not
412  // necessarily take into account driver-side allocation.
413  optional int32 max_memory_kb = 3;
414  // Whether the inference produced correct results (validation graph output
415  // 'ok' for all test inputs). Used on-device to disallow configurations that
416  // produce incorrect results (e.g., due to OpenCL driver bugs).
417  optional bool ok = 4;
418  // Metrics that were used to determine the 'ok' status.
419  repeated BenchmarkMetric metrics = 5;
420}
421
422// A handled error.
423message ErrorCode {
424  // Which delegate the error comes from (or NONE, if it comes from the tflite
425  // framework).
426  optional Delegate source = 1;
427  // What the tflite level error is.
428  optional int32 tflite_error = 2;
429  // What the underlying error is (e.g., NNAPI or OpenGL error).
430  optional int64 underlying_api_error = 3;
431}
432
433// When during benchmark execution an error occurred.
434enum BenchmarkStage {
435  UNKNOWN = 0;
436  // During model loading or delegation.
437  INITIALIZATION = 1;
438  // During inference.
439  INFERENCE = 2;
440}
441
442// An error that occurred during benchmarking.
443//
444// Used with event type ERROR.
445message BenchmarkError {
446  // How far benchmarking got.
447  optional BenchmarkStage stage = 1;
448  // Process exit code.
449  optional int32 exit_code = 2;
450  // Signal the process received.
451  optional int32 signal = 3;
452  // Handled error.
453  repeated ErrorCode error_code = 4;
454}
455
456// Top-level benchmarking event stored on-device. All events for a model are
457// parsed to detect the status.
458message BenchmarkEvent {
459  // Which settings were used for benchmarking.
460  optional TFLiteSettings tflite_settings = 1;
461  // Type of the event.
462  optional BenchmarkEventType event_type = 2;
463  // Result of benchmark, used when type is END.
464  optional BenchmarkResult result = 3;
465  // Error during benchmark, used when type is ERROR.
466  optional BenchmarkError error = 4;
467  // Start timestamps. These are used for
468  // 1. Checking whether a test was started but not completed within a given
469  // deadline.
470  // 2. Optionally, telemetry timestamps.
471  optional int64 boottime_us = 5;
472  optional int64 wallclock_us = 6;
473}
474