// Copyright 2017 The Fuchsia Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #ifdef __Fuchsia__ #include #include #include #include #include #endif #include #include #include #include #include #include #include #include #include "minfs-private.h" #include #include namespace minfs { #ifdef __Fuchsia__ void WriteTxn::Enqueue(zx_handle_t vmo, uint64_t vmo_offset, uint64_t dev_offset, uint64_t nblocks) { ValidateVmoSize(vmo, static_cast(vmo_offset)); for (size_t i = 0; i < requests_.size(); i++) { if (requests_[i].vmo != vmo) { continue; } if (requests_[i].vmo_offset == vmo_offset) { // Take the longer of the operations (if operating on the same // blocks). requests_[i].length = (requests_[i].length > nblocks) ? requests_[i].length : nblocks; return; } else if ((requests_[i].vmo_offset + requests_[i].length == vmo_offset) && (requests_[i].dev_offset + requests_[i].length == dev_offset)) { // Combine with the previous request, if immediately following. requests_[i].length += nblocks; return; } } WriteRequest request; request.vmo = vmo; // NOTE: It's easier to compare everything when dealing // with blocks (not offsets!) so the following are described in // terms of blocks until we Flush(). request.vmo_offset = vmo_offset; request.dev_offset = dev_offset; request.length = nblocks; requests_.push_back(std::move(request)); } zx_status_t WriteTxn::Flush(zx_handle_t vmo, vmoid_t vmoid) { ZX_DEBUG_ASSERT(vmo != ZX_HANDLE_INVALID); ZX_DEBUG_ASSERT(vmoid != VMOID_INVALID); // Update all the outgoing transactions to be in "disk blocks", // not "Minfs blocks". block_fifo_request_t blk_reqs[requests_.size()]; const uint32_t kDiskBlocksPerMinfsBlock = kMinfsBlockSize / bc_->DeviceBlockSize(); for (size_t i = 0; i < requests_.size(); i++) { blk_reqs[i].group = bc_->BlockGroupID(); blk_reqs[i].vmoid = vmoid; blk_reqs[i].opcode = BLOCKIO_WRITE; blk_reqs[i].vmo_offset = requests_[i].vmo_offset * kDiskBlocksPerMinfsBlock; blk_reqs[i].dev_offset = requests_[i].dev_offset * kDiskBlocksPerMinfsBlock; // TODO(ZX-2253): Remove this assertion. uint64_t length = requests_[i].length * kDiskBlocksPerMinfsBlock; ZX_ASSERT_MSG(length < UINT32_MAX, "Too many blocks"); blk_reqs[i].length = static_cast(length); } // Actually send the operations to the underlying block device. zx_status_t status = bc_->Transaction(blk_reqs, requests_.size()); requests_.reset(); return status; } size_t WriteTxn::BlkCount() const { size_t blocks_needed = 0; for (size_t i = 0; i < requests_.size(); i++) { blocks_needed += requests_[i].length; } return blocks_needed; } #endif // __Fuchsia__ WritebackWork::WritebackWork(Bcache* bc) : WriteTxn(bc), #ifdef __Fuchsia__ closure_(nullptr), #endif node_count_(0) {} void WritebackWork::Reset() { #ifdef __Fuchsia__ ZX_DEBUG_ASSERT(Requests().size() == 0); closure_ = nullptr; #endif while (0 < node_count_) { vn_[--node_count_] = nullptr; } } #ifdef __Fuchsia__ // Returns the number of blocks of the writeback buffer that have been // consumed size_t WritebackWork::Complete(zx_handle_t vmo, vmoid_t vmoid) { size_t blk_count = BlkCount(); zx_status_t status = Flush(vmo, vmoid); if (closure_) { closure_(status); } Reset(); return blk_count; } void WritebackWork::SetClosure(SyncCallback closure) { ZX_DEBUG_ASSERT(!closure_); closure_ = std::move(closure); } #else void WritebackWork::Complete() { Transact(); Reset(); } #endif // __Fuchsia__ // Allow "pinning" Vnodes so they aren't destroyed while we're completing // this writeback operation. void WritebackWork::PinVnode(fbl::RefPtr vn) { for (size_t i = 0; i < node_count_; i++) { if (vn_[i].get() == vn.get()) { // Already pinned return; } } ZX_DEBUG_ASSERT(node_count_ < fbl::count_of(vn_)); vn_[node_count_++] = std::move(vn); } #ifdef __Fuchsia__ zx_status_t WritebackBuffer::Create(Bcache* bc, fzl::OwnedVmoMapper mapper, fbl::unique_ptr* out) { fbl::unique_ptr wb(new WritebackBuffer(bc, std::move(mapper))); if (wb->mapper_.size() % kMinfsBlockSize != 0) { return ZX_ERR_INVALID_ARGS; } else if (cnd_init(&wb->consumer_cvar_) != thrd_success) { return ZX_ERR_NO_RESOURCES; } else if (cnd_init(&wb->producer_cvar_) != thrd_success) { return ZX_ERR_NO_RESOURCES; } else if (thrd_create_with_name(&wb->writeback_thrd_, WritebackBuffer::WritebackThread, wb.get(), "minfs-writeback") != thrd_success) { return ZX_ERR_NO_RESOURCES; } zx_status_t status = wb->bc_->AttachVmo(wb->mapper_.vmo(), &wb->buffer_vmoid_); if (status != ZX_OK) { return status; } *out = std::move(wb); return ZX_OK; } WritebackBuffer::WritebackBuffer(Bcache* bc, fzl::OwnedVmoMapper mapper) : bc_(bc), unmounting_(false), mapper_(std::move(mapper)), cap_(mapper_.size() / kMinfsBlockSize) {} WritebackBuffer::~WritebackBuffer() { // Block until the background thread completes itself. { fbl::AutoLock lock(&writeback_lock_); unmounting_ = true; cnd_signal(&consumer_cvar_); } int r; thrd_join(writeback_thrd_, &r); if (buffer_vmoid_ != VMOID_INVALID) { block_fifo_request_t request; request.group = bc_->BlockGroupID(); request.vmoid = buffer_vmoid_; request.opcode = BLOCKIO_CLOSE_VMO; bc_->Transaction(&request, 1); } } zx_status_t WritebackBuffer::EnsureSpaceLocked(size_t blocks) { if (blocks > cap_) { // There will never be enough room in the writeback buffer // for this request. return ZX_ERR_NO_RESOURCES; } while (len_ + blocks > cap_) { // Not enough room to write back work, yet. Wait until // room is available. Waiter w; producer_queue_.push(&w); do { cnd_wait(&producer_cvar_, writeback_lock_.GetInternal()); } while ((&producer_queue_.front() != &w) && // We are first in line to enqueue... (len_ + blocks > cap_)); // ... and there is enough space for us. producer_queue_.pop(); } return ZX_OK; } void WritebackBuffer::CopyToBufferLocked(WriteTxn* txn) { auto& reqs = txn->Requests(); // Write back to the buffer for (size_t i = 0; i < reqs.size(); i++) { size_t vmo_offset = reqs[i].vmo_offset; size_t dev_offset = reqs[i].dev_offset; const size_t vmo_len = reqs[i].length; ZX_DEBUG_ASSERT(vmo_len > 0); size_t wb_offset = (start_ + len_) % cap_; size_t wb_len = (wb_offset + vmo_len > cap_) ? cap_ - wb_offset : vmo_len; ZX_DEBUG_ASSERT(wb_len <= vmo_len); ZX_DEBUG_ASSERT(wb_offset < cap_); zx_handle_t vmo = reqs[i].vmo; void* ptr = (void*)((uintptr_t)(mapper_.start()) + (uintptr_t)(wb_offset * kMinfsBlockSize)); zx_status_t status; ZX_DEBUG_ASSERT((start_ <= wb_offset) ? (start_ < wb_offset + wb_len) : (wb_offset + wb_len <= start_)); // Wraparound ZX_ASSERT_MSG((status = zx_vmo_read(vmo, ptr, vmo_offset * kMinfsBlockSize, wb_len * kMinfsBlockSize)) == ZX_OK, "VMO Read Fail: %d", status); len_ += wb_len; // Update the WriteRequest to transfer from the writeback buffer // out to disk, rather than the supplied VMO reqs[i].vmo_offset = wb_offset; reqs[i].length = wb_len; if (wb_len != vmo_len) { // We wrapped around; write what remains from this request vmo_offset += wb_len; dev_offset += wb_len; wb_len = vmo_len - wb_len; ptr = mapper_.start(); ZX_DEBUG_ASSERT((start_ == 0) ? (start_ < wb_len) : (wb_len <= start_)); // Wraparound ZX_ASSERT(zx_vmo_read(vmo, ptr, vmo_offset * kMinfsBlockSize, wb_len * kMinfsBlockSize) == ZX_OK); len_ += wb_len; // Shift down all following write requests static_assert(fbl::is_pod::value, "Can't memmove non-POD"); // Insert the "new" request, which is the latter half of // the request we wrote out earlier WriteRequest request; request.vmo = reqs[i].vmo; request.vmo_offset = 0; request.dev_offset = dev_offset; request.length = wb_len; i++; reqs.insert(i, request); } } } void WritebackBuffer::Enqueue(fbl::unique_ptr work) { TRACE_DURATION("minfs", "WritebackBuffer::Enqueue"); TRACE_FLOW_BEGIN("minfs", "writeback", reinterpret_cast(work.get())); fbl::AutoLock lock(&writeback_lock_); { TRACE_DURATION("minfs", "Allocating Writeback space"); size_t blocks = work->BlkCount(); // TODO(smklein): Experimentally, all filesystem operations cause between // 0 and 10 blocks to be updated, though the writeback buffer has space // for thousands of blocks. // // Hypothetically, an operation (most likely, an enormous write) could // cause a single operation to exceed the size of the writeback buffer, // but this is currently impossible as our writes are broken into 8KB // chunks. // // Regardless, there should either (1) exist a fallback mechanism for these // extremely large operations, or (2) the worst-case operation should be // calculated, and it should be proven that it will always fit within // the allocated writeback buffer. ZX_ASSERT_MSG(EnsureSpaceLocked(blocks) == ZX_OK, "Requested txn (%zu blocks) larger than writeback buffer", blocks); } { TRACE_DURATION("minfs", "Copying to Writeback buffer"); CopyToBufferLocked(work.get()); } work_queue_.push(std::move(work)); cnd_signal(&consumer_cvar_); } int WritebackBuffer::WritebackThread(void* arg) { WritebackBuffer* b = reinterpret_cast(arg); b->writeback_lock_.Acquire(); while (true) { while (!b->work_queue_.is_empty()) { auto work = b->work_queue_.pop(); TRACE_DURATION("minfs", "WritebackBuffer::WritebackThread"); // Stay unlocked while processing a unit of work b->writeback_lock_.Release(); // TODO(smklein): We could add additional validation that the blocks // in "work" are contiguous and in the range of [start_, len_) (including // wraparound). size_t blks_consumed = work->Complete(b->mapper_.vmo().get(), b->buffer_vmoid_); TRACE_FLOW_END("minfs", "writeback", reinterpret_cast(work.get())); work = nullptr; // Relock before checking the state of the queue b->writeback_lock_.Acquire(); b->start_ = (b->start_ + blks_consumed) % b->cap_; b->len_ -= blks_consumed; cnd_signal(&b->producer_cvar_); } // Before waiting, we should check if we're unmounting. if (b->unmounting_) { b->writeback_lock_.Release(); return 0; } cnd_wait(&b->consumer_cvar_, b->writeback_lock_.GetInternal()); } } #endif // __Fuchsia__ } // namespace minfs