1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <inttypes.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <threads.h>
10 
11 #include <ddk/debug.h>
12 #include <ddk/device.h>
13 #include <fbl/algorithm.h>
14 #include <fbl/alloc_checker.h>
15 #include <fbl/auto_call.h>
16 #include <fbl/auto_lock.h>
17 #include <fbl/unique_ptr.h>
18 #include <lib/zx/port.h>
19 #include <lib/zx/vmar.h>
20 #include <lib/zx/vmo.h>
21 #include <zircon/compiler.h>
22 #include <zircon/device/block.h>
23 #include <zircon/errors.h>
24 #include <zircon/status.h>
25 #include <zircon/thread_annotations.h>
26 #include <zircon/types.h>
27 #include <zxcrypt/volume.h>
28 
29 #include <utility>
30 
31 #include "debug.h"
32 #include "device.h"
33 #include "extra.h"
34 #include "worker.h"
35 
36 namespace zxcrypt {
37 namespace {
38 
39 // Cap largest transaction to a quarter of the VMO buffer.
40 const uint32_t kMaxTransferSize = Volume::kBufferSize / 4;
41 
42 // Kick off |Init| thread when binding.
InitThread(void * arg)43 int InitThread(void* arg) {
44     return static_cast<Device*>(arg)->Init();
45 }
46 
47 } // namespace
48 
49 // Public methods
50 
Device(zx_device_t * parent)51 Device::Device(zx_device_t* parent)
52     : DeviceType(parent), active_(false), stalled_(false), num_ops_(0), info_(nullptr), hint_(0) {
53     LOG_ENTRY();
54 
55     list_initialize(&queue_);
56 }
57 
~Device()58 Device::~Device() {
59     LOG_ENTRY();
60 }
61 
62 // Public methods called from global context
63 
Bind()64 zx_status_t Device::Bind() {
65     LOG_ENTRY();
66     ZX_DEBUG_ASSERT(!info_);
67     zx_status_t rc;
68 
69     // Add the (invisible) device to devmgr
70     if ((rc = DdkAdd("zxcrypt", DEVICE_ADD_INVISIBLE)) != ZX_OK) {
71         zxlogf(ERROR, "DdkAdd('zxcrypt', DEVICE_ADD_INVISIBLE) failed: %s\n",
72                zx_status_get_string(rc));
73         return rc;
74     }
75     // This call to |DdkRemove| only occurs if the thread below fails to start.  Any calls to
76     // |DdkUnbind| will be a no-op as |active_| is false.
77     auto cleanup = fbl::MakeAutoCall([this] { DdkRemove(); });
78 
79     // Launch the init thread.
80     if (thrd_create(&init_, InitThread, this) != thrd_success) {
81         zxlogf(ERROR, "zxcrypt device %p initialization aborted: failed to start thread\n", this);
82         return ZX_ERR_INTERNAL;
83     }
84 
85     cleanup.cancel();
86     return ZX_OK;
87 }
88 
Init()89 zx_status_t Device::Init() {
90     LOG_ENTRY();
91     ZX_DEBUG_ASSERT(!info_);
92     zx_status_t rc;
93     fbl::AutoLock lock(&mtx_);
94 
95     zxlogf(TRACE, "zxcrypt device %p initializing\n", this);
96     // This call to |DdkRemove| only occurs if the thread starts but encounters an error.  Any calls
97     // to |DdkUnbind| will be a no-op as |active_| is false.
98     auto cleanup = fbl::MakeAutoCall([this]() {
99         zxlogf(ERROR, "zxcrypt device %p failed to initialize\n", this);
100         DdkRemove();
101     });
102 
103     fbl::AllocChecker ac;
104     fbl::unique_ptr<DeviceInfo> info(new (&ac) DeviceInfo());
105     if (!ac.check()) {
106         zxlogf(ERROR, "failed to allocate %zu bytes\n", sizeof(DeviceInfo));
107         return ZX_ERR_NO_MEMORY;
108     }
109     info->base = nullptr;
110     info->num_workers = 0;
111     info_ = info.get();
112 
113     // Open the zxcrypt volume.  The volume may adjust the block info, so get it again and determine
114     // the multiplicative factor needed to transform this device's blocks into its parent's.
115     // TODO(security): ZX-1130 workaround.  Use null key of a fixed length until fixed
116     crypto::Secret root_key;
117     uint8_t* buf;
118     if ((rc = root_key.Allocate(kZx1130KeyLen, &buf)) != ZX_OK) {
119         zxlogf(ERROR, "failed to key of %zu bytes: %s\n", kZx1130KeyLen, zx_status_get_string(rc));
120         return rc;
121     }
122     memset(buf, 0, root_key.len());
123     fbl::unique_ptr<Volume> volume;
124     if ((rc = Volume::Unlock(parent(), root_key, 0, &volume)) != ZX_OK) {
125         zxlogf(ERROR, "failed to unlock volume: %s\n", zx_status_get_string(rc));
126         return rc;
127     }
128 
129     // Get the parent device's block interface
130     block_info_t blk;
131     if ((rc = device_get_protocol(parent(), ZX_PROTOCOL_BLOCK, &info->proto)) != ZX_OK) {
132         zxlogf(ERROR, "failed to get block protocol: %s\n", zx_status_get_string(rc));
133         return rc;
134     }
135     info->proto.ops->query(info->proto.ctx, &blk, &info->op_size);
136 
137     // Save device sizes
138     info->block_size = blk.block_size;
139     info->op_size += sizeof(extra_op_t);
140     info->reserved_blocks = volume->reserved_blocks();
141     info->reserved_slices = volume->reserved_slices();
142 
143     // Reserve space for shadow I/O transactions
144     if ((rc = zx::vmo::create(Volume::kBufferSize, 0, &info->vmo)) != ZX_OK) {
145         zxlogf(ERROR, "zx::vmo::create failed: %s\n", zx_status_get_string(rc));
146         return rc;
147     }
148     constexpr uint32_t flags = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE;
149     uintptr_t address;
150     if ((rc = zx::vmar::root_self()->map(0, info->vmo, 0, Volume::kBufferSize, flags, &address)) !=
151         ZX_OK) {
152         zxlogf(ERROR, "zx::vmar::map failed: %s\n", zx_status_get_string(rc));
153         return rc;
154     }
155     info->base = reinterpret_cast<uint8_t*>(address);
156 
157     // Set up allocation bitmap
158     if ((rc = map_.Reset(Volume::kBufferSize / info->block_size)) != ZX_OK) {
159         zxlogf(ERROR, "bitmap allocation failed: %s\n", zx_status_get_string(rc));
160         return rc;
161     }
162 
163     // Start workers
164     // TODO(aarongreen): Investigate performance implications of adding more workers.
165     if ((rc = zx::port::create(0, &port_)) != ZX_OK) {
166         zxlogf(ERROR, "zx::port::create failed: %s\n", zx_status_get_string(rc));
167         return rc;
168     }
169     for (size_t i = 0; i < kNumWorkers; ++i) {
170         zx::port port;
171         port_.duplicate(ZX_RIGHT_SAME_RIGHTS, &port);
172         if ((rc = workers_[i].Start(this, *volume, std::move(port))) != ZX_OK) {
173             zxlogf(ERROR, "failed to start worker %zu: %s\n", i, zx_status_get_string(rc));
174             return rc;
175         }
176         ++info->num_workers;
177     }
178 
179     // |info_| now holds the pointer; it is reclaimed in |DdkRelease|.
180     DeviceInfo* released __attribute__((unused)) = info.release();
181 
182     // Enable the device.  Holding the lock at function scope guarantees that |active_| becomes true
183     // if and only if |cleanup| is canceled.
184     active_.store(true);
185     DdkMakeVisible();
186     zxlogf(TRACE, "zxcrypt device %p initialized\n", this);
187 
188     cleanup.cancel();
189     return ZX_OK;
190 }
191 
192 ////////////////////////////////////////////////////////////////
193 // ddk::Device methods
194 
DdkIoctl(uint32_t op,const void * in,size_t in_len,void * out,size_t out_len,size_t * actual)195 zx_status_t Device::DdkIoctl(uint32_t op, const void* in, size_t in_len, void* out, size_t out_len,
196                              size_t* actual) {
197     LOG_ENTRY_ARGS("op=%0x" PRIx32 ", in=%p, in_len=%zu, out=%p, out_len=%zu, actual=%p", op, in,
198                    in_len, out, out_len, actual);
199     ZX_DEBUG_ASSERT(info_);
200     zx_status_t rc;
201 
202     // Modify inputs
203     switch (op) {
204     case IOCTL_BLOCK_FVM_EXTEND:
205     case IOCTL_BLOCK_FVM_SHRINK: {
206         extend_request_t mod;
207         if (!in || in_len < sizeof(mod)) {
208             zxlogf(ERROR, "bad parameter(s): in=%p, in_len=%zu\n", in, in_len);
209             return ZX_ERR_INVALID_ARGS;
210         }
211         memcpy(&mod, in, sizeof(mod));
212         mod.offset += info_->reserved_slices;
213         rc = device_ioctl(parent(), op, &mod, sizeof(mod), out, out_len, actual);
214         break;
215     }
216     case IOCTL_BLOCK_FVM_VSLICE_QUERY: {
217         query_request_t mod;
218         if (!in || in_len < sizeof(mod)) {
219             zxlogf(ERROR, "bad parameter(s): in=%p, in_len=%zu\n", in, in_len);
220             return ZX_ERR_INVALID_ARGS;
221         }
222         memcpy(&mod, in, sizeof(mod));
223         for (size_t i = 0; i < mod.count; ++i) {
224             mod.vslice_start[i] += info_->reserved_slices;
225         }
226         rc = device_ioctl(parent(), op, &mod, sizeof(mod), out, out_len, actual);
227         break;
228     }
229     default:
230         rc = device_ioctl(parent(), op, in, in_len, out, out_len, actual);
231         break;
232     }
233     if (rc < 0) {
234         zxlogf(ERROR, "parent device returned failure for ioctl 0x%" PRIx32 ": %s\n", op,
235                zx_status_get_string(rc));
236         return rc;
237     }
238 
239     // Modify outputs
240     switch (op) {
241     case IOCTL_BLOCK_GET_INFO: {
242         block_info_t* mod = static_cast<block_info_t*>(out);
243         mod->block_count -= info_->reserved_blocks;
244         if (mod->max_transfer_size > kMaxTransferSize) {
245             mod->max_transfer_size = kMaxTransferSize;
246         }
247         break;
248     }
249     case IOCTL_BLOCK_FVM_QUERY: {
250         fvm_info_t* mod = static_cast<fvm_info_t*>(out);
251         mod->vslice_count -= info_->reserved_slices;
252         break;
253     }
254     default:
255         break;
256     }
257     return ZX_OK;
258 }
259 
DdkGetSize()260 zx_off_t Device::DdkGetSize() {
261     LOG_ENTRY();
262 
263     zx_off_t reserved, size;
264     if (mul_overflow(info_->block_size, info_->reserved_blocks, &reserved) ||
265         sub_overflow(device_get_size(parent()), reserved, &size)) {
266         zxlogf(ERROR, "device_get_size returned less than what has been reserved\n");
267         return 0;
268     }
269 
270     return size;
271 }
272 
273 // TODO(aarongreen): See ZX-1138.  Currently, there's no good way to trigger
274 // this on demand.
DdkUnbind()275 void Device::DdkUnbind() {
276     LOG_ENTRY();
277     // We call |DdkRemove| exactly once after |Init| completes successfully, which is the only place
278     // |active_| becaomes true.  The lock is required to prevent |DdkUnbind| from being called
279     // during a call to |Init|.
280     fbl::AutoLock lock(&mtx_);
281     if (active_.exchange(false)) {
282         DdkRemove();
283     }
284 }
285 
DdkRelease()286 void Device::DdkRelease() {
287     LOG_ENTRY();
288     zx_status_t rc;
289 
290     // One way or another we need to release the memory
291     auto cleanup = fbl::MakeAutoCall([this]() {
292         zxlogf(TRACE, "zxcrypt device %p released\n", this);
293         delete this;
294     });
295 
296     // Make sure |Init()| is complete
297     thrd_join(init_, &rc);
298     if (rc != ZX_OK) {
299         zxlogf(WARN, "init thread returned %s\n", zx_status_get_string(rc));
300     }
301 
302     // If we died early enough (e.g. OOM), this doesn't exist
303     if (!info_) {
304         return;
305     }
306 
307     // Stop workers; send a stop message to each, then join each (possibly in different order).
308     StopWorkersIfDone();
309     for (size_t i = 0; i < info_->num_workers; ++i) {
310         workers_[i].Stop();
311     }
312 
313     // Reclaim |info_| to ensure its memory is freed.
314     fbl::unique_ptr<DeviceInfo> info(const_cast<DeviceInfo*>(info_));
315 
316     // Release write buffer
317     const uintptr_t address = reinterpret_cast<uintptr_t>(info->base);
318     if (address != 0 &&
319         (rc = zx::vmar::root_self()->unmap(address, Volume::kBufferSize)) != ZX_OK) {
320         zxlogf(WARN, "failed to unmap %" PRIu32 " bytes at %" PRIuPTR ": %s\n", Volume::kBufferSize,
321                address, zx_status_get_string(rc));
322     }
323 }
324 
325 ////////////////////////////////////////////////////////////////
326 // ddk::BlockProtocol methods
327 
BlockImplQuery(block_info_t * out_info,size_t * out_op_size)328 void Device::BlockImplQuery(block_info_t* out_info, size_t* out_op_size) {
329     LOG_ENTRY_ARGS("out_info=%p, out_op_size=%p", out_info, out_op_size);
330     ZX_DEBUG_ASSERT(info_);
331 
332     info_->proto.ops->query(info_->proto.ctx, out_info, out_op_size);
333     out_info->block_count -= info_->reserved_blocks;
334     *out_op_size = info_->op_size;
335 }
336 
BlockImplQueue(block_op_t * block,block_impl_queue_callback completion_cb,void * cookie)337 void Device::BlockImplQueue(block_op_t* block, block_impl_queue_callback completion_cb,
338                             void* cookie) {
339     LOG_ENTRY_ARGS("block=%p", block);
340     ZX_DEBUG_ASSERT(info_);
341 
342     // Check if the device is active.
343     if (!active_.load()) {
344         zxlogf(ERROR, "rejecting I/O request: device is not active\n");
345         completion_cb(cookie, ZX_ERR_BAD_STATE, block);
346         return;
347     }
348     num_ops_.fetch_add(1);
349 
350     // Initialize our extra space and save original values
351     extra_op_t* extra = BlockToExtra(block, info_->op_size);
352     zx_status_t rc = extra->Init(block, completion_cb, cookie, info_->reserved_blocks);
353     if (rc != ZX_OK) {
354         zxlogf(ERROR, "failed to initialize extra info: %s\n", zx_status_get_string(rc));
355         BlockComplete(block, rc);
356         return;
357     }
358 
359     switch (block->command & BLOCK_OP_MASK) {
360     case BLOCK_OP_WRITE:
361         EnqueueWrite(block);
362         break;
363     case BLOCK_OP_READ:
364     default:
365         BlockForward(block, ZX_OK);
366         break;
367     }
368 }
369 
BlockForward(block_op_t * block,zx_status_t status)370 void Device::BlockForward(block_op_t* block, zx_status_t status) {
371     LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
372     ZX_DEBUG_ASSERT(info_);
373 
374     if (!block) {
375         zxlogf(SPEW, "early return; no block provided\n");
376         return;
377     }
378     if (status != ZX_OK) {
379         zxlogf(ERROR, "aborting request due to failure: %s\n", zx_status_get_string(status));
380         BlockComplete(block, status);
381         return;
382     }
383     // Check if the device is active (i.e. |DdkUnbind| has not been called).
384     if (!active_.load()) {
385         zxlogf(ERROR, "aborting request; device is not active\n");
386         BlockComplete(block, ZX_ERR_BAD_STATE);
387         return;
388     }
389 
390     // Send the request to the parent device
391     block_impl_queue(&info_->proto, block, BlockCallback, this);
392 }
393 
BlockComplete(block_op_t * block,zx_status_t status)394 void Device::BlockComplete(block_op_t* block, zx_status_t status) {
395     LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
396     ZX_DEBUG_ASSERT(info_);
397     zx_status_t rc;
398 
399     // If a portion of the write buffer was allocated, release it.
400     extra_op_t* extra = BlockToExtra(block, info_->op_size);
401     if (extra->data) {
402         uint64_t off = (extra->data - info_->base) / info_->block_size;
403         uint64_t len = block->rw.length;
404         extra->data = nullptr;
405 
406         fbl::AutoLock lock(&mtx_);
407         ZX_DEBUG_ASSERT(map_.Get(off, off + len));
408         rc = map_.Clear(off, off + len);
409         ZX_DEBUG_ASSERT(rc == ZX_OK);
410     }
411 
412     // Complete the request.
413     extra->completion_cb(extra->cookie, status, block);
414 
415     // If we previously stalled, try to re-queue the deferred requests; otherwise, avoid taking the
416     // lock.
417     if (stalled_.exchange(false)) {
418         EnqueueWrite();
419     }
420 
421     if (num_ops_.fetch_sub(1) == 1) {
422         StopWorkersIfDone();
423     }
424 }
425 
426 ////////////////////////////////////////////////////////////////
427 // Private methods
428 
EnqueueWrite(block_op_t * block)429 void Device::EnqueueWrite(block_op_t* block) {
430     LOG_ENTRY_ARGS("block=%p", block);
431     zx_status_t rc = ZX_OK;
432 
433     fbl::AutoLock lock(&mtx_);
434 
435     // Append the request to the write queue (if not null)
436     extra_op_t* extra;
437     if (block) {
438         extra = BlockToExtra(block, info_->op_size);
439         list_add_tail(&queue_, &extra->node);
440     }
441     if (stalled_.load()) {
442         zxlogf(SPEW, "early return; no requests completed since last stall\n");
443         return;
444     }
445 
446     // Process as many pending write requests as we can right now.
447     list_node_t pending;
448     list_initialize(&pending);
449     while (!list_is_empty(&queue_)) {
450         extra = list_peek_head_type(&queue_, extra_op_t, node);
451         block = ExtraToBlock(extra, info_->op_size);
452 
453         // Find an available offset in the write buffer
454         uint64_t off;
455         uint64_t len = block->rw.length;
456         if ((rc = map_.Find(false, hint_, map_.size(), len, &off)) == ZX_ERR_NO_RESOURCES &&
457             (rc = map_.Find(false, 0, map_.size(), len, &off)) == ZX_ERR_NO_RESOURCES) {
458             zxlogf(TRACE, "zxcrypt device %p stalled pending request completion\n", this);
459             stalled_.store(true);
460             break;
461         }
462 
463         // We don't expect any other errors
464         ZX_DEBUG_ASSERT(rc == ZX_OK);
465         rc = map_.Set(off, off + len);
466         ZX_DEBUG_ASSERT(rc == ZX_OK);
467 
468         // Save a hint as to where to start looking next time
469         hint_ = (off + len) % map_.size();
470 
471         // Modify request to use write buffer
472         extra->data = info_->base + (off * info_->block_size);
473         block->rw.vmo = info_->vmo.get();
474         block->rw.offset_vmo = (extra->data - info_->base) / info_->block_size;
475 
476         list_add_tail(&pending, list_remove_head(&queue_));
477     }
478 
479     // Release the lock and send blocks that are ready to the workers
480     lock.release();
481     extra_op_t* tmp;
482     list_for_every_entry_safe (&pending, extra, tmp, extra_op_t, node) {
483         list_delete(&extra->node);
484         block = ExtraToBlock(extra, info_->op_size);
485         SendToWorker(block);
486     }
487 }
488 
SendToWorker(block_op_t * block)489 void Device::SendToWorker(block_op_t* block) {
490     LOG_ENTRY_ARGS("block=%p", block);
491     zx_status_t rc;
492 
493     zx_port_packet_t packet;
494     Worker::MakeRequest(&packet, Worker::kBlockRequest, block);
495     if ((rc = port_.queue(&packet)) != ZX_OK) {
496         zxlogf(ERROR, "zx::port::queue failed: %s\n", zx_status_get_string(rc));
497         BlockComplete(block, rc);
498         return;
499     }
500 }
501 
BlockCallback(void * cookie,zx_status_t status,block_op_t * block)502 void Device::BlockCallback(void* cookie, zx_status_t status, block_op_t* block) {
503     LOG_ENTRY_ARGS("block=%p, status=%s", block, zx_status_get_string(status));
504 
505     // Restore data that may have changed
506     Device* device = static_cast<Device*>(cookie);
507     extra_op_t* extra = BlockToExtra(block, device->op_size());
508     block->rw.vmo = extra->vmo;
509     block->rw.length = extra->length;
510     block->rw.offset_dev = extra->offset_dev;
511     block->rw.offset_vmo = extra->offset_vmo;
512 
513     if (status != ZX_OK) {
514         zxlogf(TRACE, "parent device returned %s\n", zx_status_get_string(status));
515         device->BlockComplete(block, status);
516         return;
517     }
518     switch (block->command & BLOCK_OP_MASK) {
519     case BLOCK_OP_READ:
520         device->SendToWorker(block);
521         break;
522     case BLOCK_OP_WRITE:
523     default:
524         device->BlockComplete(block, ZX_OK);
525         break;
526     }
527 }
528 
StopWorkersIfDone()529 void Device::StopWorkersIfDone() {
530     // Multiple threads may pass this check, but that's harmless.
531     if (!active_.load() && num_ops_.load() == 0) {
532         zx_port_packet_t packet;
533         Worker::MakeRequest(&packet, Worker::kStopRequest);
534         for (size_t i = 0; i < info_->num_workers; ++i) {
535             port_.queue(&packet);
536         }
537     }
538 }
539 
540 } // namespace zxcrypt
541 
zxcrypt_device_bind(void * ctx,zx_device_t * parent)542 extern "C" zx_status_t zxcrypt_device_bind(void* ctx, zx_device_t* parent) {
543     LOG_ENTRY_ARGS("ctx=%p, parent=%p", ctx, parent);
544     zx_status_t rc;
545 
546     fbl::AllocChecker ac;
547     auto dev = fbl::make_unique_checked<zxcrypt::Device>(&ac, parent);
548     if (!ac.check()) {
549         zxlogf(ERROR, "failed to allocate %zu bytes\n", sizeof(zxcrypt::Device));
550         return ZX_ERR_NO_MEMORY;
551     }
552     if ((rc = dev->Bind()) != ZX_OK) {
553         zxlogf(ERROR, "failed to bind: %s\n", zx_status_get_string(rc));
554         return rc;
555     }
556     // devmgr is now in charge of the memory for |dev|
557     zxcrypt::Device* devmgr_owned __attribute__((unused));
558     devmgr_owned = dev.release();
559 
560     return ZX_OK;
561 }
562