1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <inttypes.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <threads.h>
10
11 #include <ddk/debug.h>
12 #include <ddk/device.h>
13 #include <fbl/algorithm.h>
14 #include <fbl/alloc_checker.h>
15 #include <fbl/auto_call.h>
16 #include <fbl/auto_lock.h>
17 #include <fbl/unique_ptr.h>
18 #include <lib/zx/port.h>
19 #include <lib/zx/vmar.h>
20 #include <lib/zx/vmo.h>
21 #include <zircon/compiler.h>
22 #include <zircon/device/block.h>
23 #include <zircon/errors.h>
24 #include <zircon/status.h>
25 #include <zircon/thread_annotations.h>
26 #include <zircon/types.h>
27 #include <zxcrypt/volume.h>
28
29 #include <utility>
30
31 #include "debug.h"
32 #include "device.h"
33 #include "extra.h"
34 #include "worker.h"
35
36 namespace zxcrypt {
37 namespace {
38
39 // Cap largest transaction to a quarter of the VMO buffer.
40 const uint32_t kMaxTransferSize = Volume::kBufferSize / 4;
41
42 // Kick off |Init| thread when binding.
InitThread(void * arg)43 int InitThread(void* arg) {
44 return static_cast<Device*>(arg)->Init();
45 }
46
47 } // namespace
48
49 // Public methods
50
Device(zx_device_t * parent)51 Device::Device(zx_device_t* parent)
52 : DeviceType(parent), active_(false), stalled_(false), num_ops_(0), info_(nullptr), hint_(0) {
53 LOG_ENTRY();
54
55 list_initialize(&queue_);
56 }
57
~Device()58 Device::~Device() {
59 LOG_ENTRY();
60 }
61
62 // Public methods called from global context
63
Bind()64 zx_status_t Device::Bind() {
65 LOG_ENTRY();
66 ZX_DEBUG_ASSERT(!info_);
67 zx_status_t rc;
68
69 // Add the (invisible) device to devmgr
70 if ((rc = DdkAdd("zxcrypt", DEVICE_ADD_INVISIBLE)) != ZX_OK) {
71 zxlogf(ERROR, "DdkAdd('zxcrypt', DEVICE_ADD_INVISIBLE) failed: %s\n",
72 zx_status_get_string(rc));
73 return rc;
74 }
75 // This call to |DdkRemove| only occurs if the thread below fails to start. Any calls to
76 // |DdkUnbind| will be a no-op as |active_| is false.
77 auto cleanup = fbl::MakeAutoCall([this] { DdkRemove(); });
78
79 // Launch the init thread.
80 if (thrd_create(&init_, InitThread, this) != thrd_success) {
81 zxlogf(ERROR, "zxcrypt device %p initialization aborted: failed to start thread\n", this);
82 return ZX_ERR_INTERNAL;
83 }
84
85 cleanup.cancel();
86 return ZX_OK;
87 }
88
Init()89 zx_status_t Device::Init() {
90 LOG_ENTRY();
91 ZX_DEBUG_ASSERT(!info_);
92 zx_status_t rc;
93 fbl::AutoLock lock(&mtx_);
94
95 zxlogf(TRACE, "zxcrypt device %p initializing\n", this);
96 // This call to |DdkRemove| only occurs if the thread starts but encounters an error. Any calls
97 // to |DdkUnbind| will be a no-op as |active_| is false.
98 auto cleanup = fbl::MakeAutoCall([this]() {
99 zxlogf(ERROR, "zxcrypt device %p failed to initialize\n", this);
100 DdkRemove();
101 });
102
103 fbl::AllocChecker ac;
104 fbl::unique_ptr<DeviceInfo> info(new (&ac) DeviceInfo());
105 if (!ac.check()) {
106 zxlogf(ERROR, "failed to allocate %zu bytes\n", sizeof(DeviceInfo));
107 return ZX_ERR_NO_MEMORY;
108 }
109 info->base = nullptr;
110 info->num_workers = 0;
111 info_ = info.get();
112
113 // Open the zxcrypt volume. The volume may adjust the block info, so get it again and determine
114 // the multiplicative factor needed to transform this device's blocks into its parent's.
115 // TODO(security): ZX-1130 workaround. Use null key of a fixed length until fixed
116 crypto::Secret root_key;
117 uint8_t* buf;
118 if ((rc = root_key.Allocate(kZx1130KeyLen, &buf)) != ZX_OK) {
119 zxlogf(ERROR, "failed to key of %zu bytes: %s\n", kZx1130KeyLen, zx_status_get_string(rc));
120 return rc;
121 }
122 memset(buf, 0, root_key.len());
123 fbl::unique_ptr<Volume> volume;
124 if ((rc = Volume::Unlock(parent(), root_key, 0, &volume)) != ZX_OK) {
125 zxlogf(ERROR, "failed to unlock volume: %s\n", zx_status_get_string(rc));
126 return rc;
127 }
128
129 // Get the parent device's block interface
130 block_info_t blk;
131 if ((rc = device_get_protocol(parent(), ZX_PROTOCOL_BLOCK, &info->proto)) != ZX_OK) {
132 zxlogf(ERROR, "failed to get block protocol: %s\n", zx_status_get_string(rc));
133 return rc;
134 }
135 info->proto.ops->query(info->proto.ctx, &blk, &info->op_size);
136
137 // Save device sizes
138 info->block_size = blk.block_size;
139 info->op_size += sizeof(extra_op_t);
140 info->reserved_blocks = volume->reserved_blocks();
141 info->reserved_slices = volume->reserved_slices();
142
143 // Reserve space for shadow I/O transactions
144 if ((rc = zx::vmo::create(Volume::kBufferSize, 0, &info->vmo)) != ZX_OK) {
145 zxlogf(ERROR, "zx::vmo::create failed: %s\n", zx_status_get_string(rc));
146 return rc;
147 }
148 constexpr uint32_t flags = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE;
149 uintptr_t address;
150 if ((rc = zx::vmar::root_self()->map(0, info->vmo, 0, Volume::kBufferSize, flags, &address)) !=
151 ZX_OK) {
152 zxlogf(ERROR, "zx::vmar::map failed: %s\n", zx_status_get_string(rc));
153 return rc;
154 }
155 info->base = reinterpret_cast<uint8_t*>(address);
156
157 // Set up allocation bitmap
158 if ((rc = map_.Reset(Volume::kBufferSize / info->block_size)) != ZX_OK) {
159 zxlogf(ERROR, "bitmap allocation failed: %s\n", zx_status_get_string(rc));
160 return rc;
161 }
162
163 // Start workers
164 // TODO(aarongreen): Investigate performance implications of adding more workers.
165 if ((rc = zx::port::create(0, &port_)) != ZX_OK) {
166 zxlogf(ERROR, "zx::port::create failed: %s\n", zx_status_get_string(rc));
167 return rc;
168 }
169 for (size_t i = 0; i < kNumWorkers; ++i) {
170 zx::port port;
171 port_.duplicate(ZX_RIGHT_SAME_RIGHTS, &port);
172 if ((rc = workers_[i].Start(this, *volume, std::move(port))) != ZX_OK) {
173 zxlogf(ERROR, "failed to start worker %zu: %s\n", i, zx_status_get_string(rc));
174 return rc;
175 }
176 ++info->num_workers;
177 }
178
179 // |info_| now holds the pointer; it is reclaimed in |DdkRelease|.
180 DeviceInfo* released __attribute__((unused)) = info.release();
181
182 // Enable the device. Holding the lock at function scope guarantees that |active_| becomes true
183 // if and only if |cleanup| is canceled.
184 active_.store(true);
185 DdkMakeVisible();
186 zxlogf(TRACE, "zxcrypt device %p initialized\n", this);
187
188 cleanup.cancel();
189 return ZX_OK;
190 }
191
192 ////////////////////////////////////////////////////////////////
193 // ddk::Device methods
194
DdkIoctl(uint32_t op,const void * in,size_t in_len,void * out,size_t out_len,size_t * actual)195 zx_status_t Device::DdkIoctl(uint32_t op, const void* in, size_t in_len, void* out, size_t out_len,
196 size_t* actual) {
197 LOG_ENTRY_ARGS("op=%0x" PRIx32 ", in=%p, in_len=%zu, out=%p, out_len=%zu, actual=%p", op, in,
198 in_len, out, out_len, actual);
199 ZX_DEBUG_ASSERT(info_);
200 zx_status_t rc;
201
202 // Modify inputs
203 switch (op) {
204 case IOCTL_BLOCK_FVM_EXTEND:
205 case IOCTL_BLOCK_FVM_SHRINK: {
206 extend_request_t mod;
207 if (!in || in_len < sizeof(mod)) {
208 zxlogf(ERROR, "bad parameter(s): in=%p, in_len=%zu\n", in, in_len);
209 return ZX_ERR_INVALID_ARGS;
210 }
211 memcpy(&mod, in, sizeof(mod));
212 mod.offset += info_->reserved_slices;
213 rc = device_ioctl(parent(), op, &mod, sizeof(mod), out, out_len, actual);
214 break;
215 }
216 case IOCTL_BLOCK_FVM_VSLICE_QUERY: {
217 query_request_t mod;
218 if (!in || in_len < sizeof(mod)) {
219 zxlogf(ERROR, "bad parameter(s): in=%p, in_len=%zu\n", in, in_len);
220 return ZX_ERR_INVALID_ARGS;
221 }
222 memcpy(&mod, in, sizeof(mod));
223 for (size_t i = 0; i < mod.count; ++i) {
224 mod.vslice_start[i] += info_->reserved_slices;
225 }
226 rc = device_ioctl(parent(), op, &mod, sizeof(mod), out, out_len, actual);
227 break;
228 }
229 default:
230 rc = device_ioctl(parent(), op, in, in_len, out, out_len, actual);
231 break;
232 }
233 if (rc < 0) {
234 zxlogf(ERROR, "parent device returned failure for ioctl 0x%" PRIx32 ": %s\n", op,
235 zx_status_get_string(rc));
236 return rc;
237 }
238
239 // Modify outputs
240 switch (op) {
241 case IOCTL_BLOCK_GET_INFO: {
242 block_info_t* mod = static_cast<block_info_t*>(out);
243 mod->block_count -= info_->reserved_blocks;
244 if (mod->max_transfer_size > kMaxTransferSize) {
245 mod->max_transfer_size = kMaxTransferSize;
246 }
247 break;
248 }
249 case IOCTL_BLOCK_FVM_QUERY: {
250 fvm_info_t* mod = static_cast<fvm_info_t*>(out);
251 mod->vslice_count -= info_->reserved_slices;
252 break;
253 }
254 default:
255 break;
256 }
257 return ZX_OK;
258 }
259
DdkGetSize()260 zx_off_t Device::DdkGetSize() {
261 LOG_ENTRY();
262
263 zx_off_t reserved, size;
264 if (mul_overflow(info_->block_size, info_->reserved_blocks, &reserved) ||
265 sub_overflow(device_get_size(parent()), reserved, &size)) {
266 zxlogf(ERROR, "device_get_size returned less than what has been reserved\n");
267 return 0;
268 }
269
270 return size;
271 }
272
273 // TODO(aarongreen): See ZX-1138. Currently, there's no good way to trigger
274 // this on demand.
DdkUnbind()275 void Device::DdkUnbind() {
276 LOG_ENTRY();
277 // We call |DdkRemove| exactly once after |Init| completes successfully, which is the only place
278 // |active_| becaomes true. The lock is required to prevent |DdkUnbind| from being called
279 // during a call to |Init|.
280 fbl::AutoLock lock(&mtx_);
281 if (active_.exchange(false)) {
282 DdkRemove();
283 }
284 }
285
DdkRelease()286 void Device::DdkRelease() {
287 LOG_ENTRY();
288 zx_status_t rc;
289
290 // One way or another we need to release the memory
291 auto cleanup = fbl::MakeAutoCall([this]() {
292 zxlogf(TRACE, "zxcrypt device %p released\n", this);
293 delete this;
294 });
295
296 // Make sure |Init()| is complete
297 thrd_join(init_, &rc);
298 if (rc != ZX_OK) {
299 zxlogf(WARN, "init thread returned %s\n", zx_status_get_string(rc));
300 }
301
302 // If we died early enough (e.g. OOM), this doesn't exist
303 if (!info_) {
304 return;
305 }
306
307 // Stop workers; send a stop message to each, then join each (possibly in different order).
308 StopWorkersIfDone();
309 for (size_t i = 0; i < info_->num_workers; ++i) {
310 workers_[i].Stop();
311 }
312
313 // Reclaim |info_| to ensure its memory is freed.
314 fbl::unique_ptr<DeviceInfo> info(const_cast<DeviceInfo*>(info_));
315
316 // Release write buffer
317 const uintptr_t address = reinterpret_cast<uintptr_t>(info->base);
318 if (address != 0 &&
319 (rc = zx::vmar::root_self()->unmap(address, Volume::kBufferSize)) != ZX_OK) {
320 zxlogf(WARN, "failed to unmap %" PRIu32 " bytes at %" PRIuPTR ": %s\n", Volume::kBufferSize,
321 address, zx_status_get_string(rc));
322 }
323 }
324
325 ////////////////////////////////////////////////////////////////
326 // ddk::BlockProtocol methods
327
BlockImplQuery(block_info_t * out_info,size_t * out_op_size)328 void Device::BlockImplQuery(block_info_t* out_info, size_t* out_op_size) {
329 LOG_ENTRY_ARGS("out_info=%p, out_op_size=%p", out_info, out_op_size);
330 ZX_DEBUG_ASSERT(info_);
331
332 info_->proto.ops->query(info_->proto.ctx, out_info, out_op_size);
333 out_info->block_count -= info_->reserved_blocks;
334 *out_op_size = info_->op_size;
335 }
336
BlockImplQueue(block_op_t * block,block_impl_queue_callback completion_cb,void * cookie)337 void Device::BlockImplQueue(block_op_t* block, block_impl_queue_callback completion_cb,
338 void* cookie) {
339 LOG_ENTRY_ARGS("block=%p", block);
340 ZX_DEBUG_ASSERT(info_);
341
342 // Check if the device is active.
343 if (!active_.load()) {
344 zxlogf(ERROR, "rejecting I/O request: device is not active\n");
345 completion_cb(cookie, ZX_ERR_BAD_STATE, block);
346 return;
347 }
348 num_ops_.fetch_add(1);
349
350 // Initialize our extra space and save original values
351 extra_op_t* extra = BlockToExtra(block, info_->op_size);
352 zx_status_t rc = extra->Init(block, completion_cb, cookie, info_->reserved_blocks);
353 if (rc != ZX_OK) {
354 zxlogf(ERROR, "failed to initialize extra info: %s\n", zx_status_get_string(rc));
355 BlockComplete(block, rc);
356 return;
357 }
358
359 switch (block->command & BLOCK_OP_MASK) {
360 case BLOCK_OP_WRITE:
361 EnqueueWrite(block);
362 break;
363 case BLOCK_OP_READ:
364 default:
365 BlockForward(block, ZX_OK);
366 break;
367 }
368 }
369
BlockForward(block_op_t * block,zx_status_t status)370 void Device::BlockForward(block_op_t* block, zx_status_t status) {
371 LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
372 ZX_DEBUG_ASSERT(info_);
373
374 if (!block) {
375 zxlogf(SPEW, "early return; no block provided\n");
376 return;
377 }
378 if (status != ZX_OK) {
379 zxlogf(ERROR, "aborting request due to failure: %s\n", zx_status_get_string(status));
380 BlockComplete(block, status);
381 return;
382 }
383 // Check if the device is active (i.e. |DdkUnbind| has not been called).
384 if (!active_.load()) {
385 zxlogf(ERROR, "aborting request; device is not active\n");
386 BlockComplete(block, ZX_ERR_BAD_STATE);
387 return;
388 }
389
390 // Send the request to the parent device
391 block_impl_queue(&info_->proto, block, BlockCallback, this);
392 }
393
BlockComplete(block_op_t * block,zx_status_t status)394 void Device::BlockComplete(block_op_t* block, zx_status_t status) {
395 LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
396 ZX_DEBUG_ASSERT(info_);
397 zx_status_t rc;
398
399 // If a portion of the write buffer was allocated, release it.
400 extra_op_t* extra = BlockToExtra(block, info_->op_size);
401 if (extra->data) {
402 uint64_t off = (extra->data - info_->base) / info_->block_size;
403 uint64_t len = block->rw.length;
404 extra->data = nullptr;
405
406 fbl::AutoLock lock(&mtx_);
407 ZX_DEBUG_ASSERT(map_.Get(off, off + len));
408 rc = map_.Clear(off, off + len);
409 ZX_DEBUG_ASSERT(rc == ZX_OK);
410 }
411
412 // Complete the request.
413 extra->completion_cb(extra->cookie, status, block);
414
415 // If we previously stalled, try to re-queue the deferred requests; otherwise, avoid taking the
416 // lock.
417 if (stalled_.exchange(false)) {
418 EnqueueWrite();
419 }
420
421 if (num_ops_.fetch_sub(1) == 1) {
422 StopWorkersIfDone();
423 }
424 }
425
426 ////////////////////////////////////////////////////////////////
427 // Private methods
428
EnqueueWrite(block_op_t * block)429 void Device::EnqueueWrite(block_op_t* block) {
430 LOG_ENTRY_ARGS("block=%p", block);
431 zx_status_t rc = ZX_OK;
432
433 fbl::AutoLock lock(&mtx_);
434
435 // Append the request to the write queue (if not null)
436 extra_op_t* extra;
437 if (block) {
438 extra = BlockToExtra(block, info_->op_size);
439 list_add_tail(&queue_, &extra->node);
440 }
441 if (stalled_.load()) {
442 zxlogf(SPEW, "early return; no requests completed since last stall\n");
443 return;
444 }
445
446 // Process as many pending write requests as we can right now.
447 list_node_t pending;
448 list_initialize(&pending);
449 while (!list_is_empty(&queue_)) {
450 extra = list_peek_head_type(&queue_, extra_op_t, node);
451 block = ExtraToBlock(extra, info_->op_size);
452
453 // Find an available offset in the write buffer
454 uint64_t off;
455 uint64_t len = block->rw.length;
456 if ((rc = map_.Find(false, hint_, map_.size(), len, &off)) == ZX_ERR_NO_RESOURCES &&
457 (rc = map_.Find(false, 0, map_.size(), len, &off)) == ZX_ERR_NO_RESOURCES) {
458 zxlogf(TRACE, "zxcrypt device %p stalled pending request completion\n", this);
459 stalled_.store(true);
460 break;
461 }
462
463 // We don't expect any other errors
464 ZX_DEBUG_ASSERT(rc == ZX_OK);
465 rc = map_.Set(off, off + len);
466 ZX_DEBUG_ASSERT(rc == ZX_OK);
467
468 // Save a hint as to where to start looking next time
469 hint_ = (off + len) % map_.size();
470
471 // Modify request to use write buffer
472 extra->data = info_->base + (off * info_->block_size);
473 block->rw.vmo = info_->vmo.get();
474 block->rw.offset_vmo = (extra->data - info_->base) / info_->block_size;
475
476 list_add_tail(&pending, list_remove_head(&queue_));
477 }
478
479 // Release the lock and send blocks that are ready to the workers
480 lock.release();
481 extra_op_t* tmp;
482 list_for_every_entry_safe (&pending, extra, tmp, extra_op_t, node) {
483 list_delete(&extra->node);
484 block = ExtraToBlock(extra, info_->op_size);
485 SendToWorker(block);
486 }
487 }
488
SendToWorker(block_op_t * block)489 void Device::SendToWorker(block_op_t* block) {
490 LOG_ENTRY_ARGS("block=%p", block);
491 zx_status_t rc;
492
493 zx_port_packet_t packet;
494 Worker::MakeRequest(&packet, Worker::kBlockRequest, block);
495 if ((rc = port_.queue(&packet)) != ZX_OK) {
496 zxlogf(ERROR, "zx::port::queue failed: %s\n", zx_status_get_string(rc));
497 BlockComplete(block, rc);
498 return;
499 }
500 }
501
BlockCallback(void * cookie,zx_status_t status,block_op_t * block)502 void Device::BlockCallback(void* cookie, zx_status_t status, block_op_t* block) {
503 LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
504
505 // Restore data that may have changed
506 Device* device = static_cast<Device*>(cookie);
507 extra_op_t* extra = BlockToExtra(block, device->op_size());
508 block->rw.vmo = extra->vmo;
509 block->rw.length = extra->length;
510 block->rw.offset_dev = extra->offset_dev;
511 block->rw.offset_vmo = extra->offset_vmo;
512
513 if (status != ZX_OK) {
514 zxlogf(TRACE, "parent device returned %s\n", zx_status_get_string(status));
515 device->BlockComplete(block, status);
516 return;
517 }
518 switch (block->command & BLOCK_OP_MASK) {
519 case BLOCK_OP_READ:
520 device->SendToWorker(block);
521 break;
522 case BLOCK_OP_WRITE:
523 default:
524 device->BlockComplete(block, ZX_OK);
525 break;
526 }
527 }
528
StopWorkersIfDone()529 void Device::StopWorkersIfDone() {
530 // Multiple threads may pass this check, but that's harmless.
531 if (!active_.load() && num_ops_.load() == 0) {
532 zx_port_packet_t packet;
533 Worker::MakeRequest(&packet, Worker::kStopRequest);
534 for (size_t i = 0; i < info_->num_workers; ++i) {
535 port_.queue(&packet);
536 }
537 }
538 }
539
540 } // namespace zxcrypt
541
zxcrypt_device_bind(void * ctx,zx_device_t * parent)542 extern "C" zx_status_t zxcrypt_device_bind(void* ctx, zx_device_t* parent) {
543 LOG_ENTRY_ARGS("ctx=%p, parent=%p", ctx, parent);
544 zx_status_t rc;
545
546 fbl::AllocChecker ac;
547 auto dev = fbl::make_unique_checked<zxcrypt::Device>(&ac, parent);
548 if (!ac.check()) {
549 zxlogf(ERROR, "failed to allocate %zu bytes\n", sizeof(zxcrypt::Device));
550 return ZX_ERR_NO_MEMORY;
551 }
552 if ((rc = dev->Bind()) != ZX_OK) {
553 zxlogf(ERROR, "failed to bind: %s\n", zx_status_get_string(rc));
554 return rc;
555 }
556 // devmgr is now in charge of the memory for |dev|
557 zxcrypt::Device* devmgr_owned __attribute__((unused));
558 devmgr_owned = dev.release();
559
560 return ZX_OK;
561 }
562