1 // Copyright 2017 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <assert.h>
6 #include <limits.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <threads.h>
11
12 #include <ddk/binding.h>
13 #include <ddk/debug.h>
14 #include <ddk/device.h>
15 #include <ddk/driver.h>
16 #include <ddk/io-buffer.h>
17 #include <ddk/mmio-buffer.h>
18 #include <ddk/protocol/block.h>
19 #include <ddk/protocol/pci.h>
20 #include <ddk/protocol/pci-lib.h>
21
22 #include <hw/reg.h>
23 #include <hw/pci.h>
24
25 #include <lib/sync/completion.h>
26
27 #include <zircon/device/block.h>
28 #include <zircon/syscalls.h>
29 #include <zircon/types.h>
30 #include <zircon/listnode.h>
31
32 #include "nvme-hw.h"
33
34 #define TXN_FLAG_FAILED 1
35
36 typedef struct {
37 block_op_t op;
38 list_node_t node;
39 block_impl_queue_callback completion_cb;
40 void* cookie;
41 uint16_t pending_utxns;
42 uint8_t opcode;
43 uint8_t flags;
44 } nvme_txn_t;
45
46 typedef struct {
47 zx_paddr_t phys; // io buffer phys base (1 page)
48 void* virt; // io buffer virt base
49 zx_handle_t pmt; // pinned memory
50 nvme_txn_t* txn; // related txn
51 uint16_t id;
52 uint16_t reserved0;
53 uint32_t reserved1;
54 } nvme_utxn_t;
55
56 #define UTXN_COUNT 63
57
58 // There's no system constant for this. Ensure it matches reality.
59 #define PAGE_SHIFT (12ULL)
60 static_assert(PAGE_SIZE == (1ULL << PAGE_SHIFT), "");
61
62 #define PAGE_MASK (PAGE_SIZE - 1ULL)
63
64 // Limit maximum transfer size to 1MB which fits comfortably
65 // within our single scatter gather page per utxn setup
66 #define MAX_XFER (1024*1024)
67
68 // Maximum submission and completion queue item counts, for
69 // queues that are a single page in size.
70 #define SQMAX (PAGE_SIZE / sizeof(nvme_cmd_t))
71 #define CQMAX (PAGE_SIZE / sizeof(nvme_cpl_t))
72
73 // global driver state bits
74 #define FLAG_IRQ_THREAD_STARTED 0x0001
75 #define FLAG_IO_THREAD_STARTED 0x0002
76 #define FLAG_SHUTDOWN 0x0004
77
78 #define FLAG_HAS_VWC 0x0100
79
80 typedef struct {
81 mmio_buffer_t mmio;
82 zx_handle_t irqh;
83 zx_handle_t bti;
84 uint32_t flags;
85 mtx_t lock;
86
87 // io queue doorbell registers
88 void* io_sq_tail_db;
89 void* io_cq_head_db;
90
91 nvme_cpl_t* io_cq;
92 nvme_cmd_t* io_sq;
93 uint32_t io_nsid;
94 uint16_t io_cq_head;
95 uint16_t io_cq_toggle;
96 uint16_t io_sq_tail;
97 uint16_t io_sq_head;
98
99 uint64_t utxn_avail; // bitmask of available utxns
100
101 // The pending list is txns that have been received
102 // via nvme_queue() and are waiting for io to start.
103 // The exception is the head of the pending list which may
104 // be partially started, waiting for more utxns to become
105 // available.
106 // The active list consists of txns where all utxns have
107 // been created and we're waiting for them to complete or
108 // error out.
109 list_node_t pending_txns; // inbound txns to process
110 list_node_t active_txns; // txns in flight
111
112 // The io signal completion is signaled from nvme_queue()
113 // or from the irq thread, notifying the io thread that
114 // it has work to do.
115 sync_completion_t io_signal;
116
117 uint32_t max_xfer;
118 block_info_t info;
119
120 // admin queue doorbell registers
121 void* io_admin_sq_tail_db;
122 void* io_admin_cq_head_db;
123
124 // admin queues and state
125 nvme_cpl_t* admin_cq;
126 nvme_cmd_t* admin_sq;
127 uint16_t admin_cq_head;
128 uint16_t admin_cq_toggle;
129 uint16_t admin_sq_tail;
130 uint16_t admin_sq_head;
131
132 // context for admin transactions
133 // presently we serialize these under the admin_lock
134 mtx_t admin_lock;
135 sync_completion_t admin_signal;
136 nvme_cpl_t admin_result;
137
138 pci_protocol_t pci;
139 zx_device_t* zxdev;
140
141 size_t iosz;
142
143 // source of physical pages for queues and admin commands
144 io_buffer_t iob;
145
146 thrd_t irqthread;
147 thrd_t iothread;
148
149 // pool of utxns
150 nvme_utxn_t utxn[UTXN_COUNT];
151 } nvme_device_t;
152
153
154 // We break IO transactions down into one or more "micro transactions" (utxn)
155 // based on the transfer limits of the controller, etc. Each utxn has an
156 // id associated with it, which is used as the command id for the command
157 // queued to the NVME device. This id is the same as its index into the
158 // pool of utxns and the bitmask of free txns, to simplify management.
159 //
160 // We maintain a pool of 63 of these, which is the number of commands
161 // that can be submitted to NVME via a single page submit queue.
162 //
163 // The utxns are not protected by locks. Instead, after initialization,
164 // they may only be touched by the io thread, which is responsible for
165 // queueing commands and dequeuing completion messages.
166
utxn_get(nvme_device_t * nvme)167 static nvme_utxn_t* utxn_get(nvme_device_t* nvme) {
168 uint64_t n = __builtin_ffsll(nvme->utxn_avail);
169 if (n == 0) {
170 return NULL;
171 }
172 n--;
173 nvme->utxn_avail &= ~(1ULL << n);
174 return nvme->utxn + n;
175 }
176
utxn_put(nvme_device_t * nvme,nvme_utxn_t * utxn)177 static void utxn_put(nvme_device_t* nvme, nvme_utxn_t* utxn) {
178 uint64_t n = utxn->id;
179 nvme->utxn_avail |= (1ULL << n);
180 }
181
nvme_admin_cq_get(nvme_device_t * nvme,nvme_cpl_t * cpl)182 static zx_status_t nvme_admin_cq_get(nvme_device_t* nvme, nvme_cpl_t* cpl) {
183 if ((readw(&nvme->admin_cq[nvme->admin_cq_head].status) & 1) != nvme->admin_cq_toggle) {
184 return ZX_ERR_SHOULD_WAIT;
185 }
186 *cpl = nvme->admin_cq[nvme->admin_cq_head];
187
188 // advance the head pointer, wrapping and inverting toggle at max
189 uint16_t next = (nvme->admin_cq_head + 1) & (CQMAX - 1);
190 if ((nvme->admin_cq_head = next) == 0) {
191 nvme->admin_cq_toggle ^= 1;
192 }
193
194 // note the new sq head reported by hw
195 nvme->admin_sq_head = cpl->sq_head;
196
197 // ring the doorbell
198 writel(next, nvme->io_admin_cq_head_db);
199 return ZX_OK;
200 }
201
nvme_admin_sq_put(nvme_device_t * nvme,nvme_cmd_t * cmd)202 static zx_status_t nvme_admin_sq_put(nvme_device_t* nvme, nvme_cmd_t* cmd) {
203 uint16_t next = (nvme->admin_sq_tail + 1) & (SQMAX - 1);
204
205 // if head+1 == tail: queue is full
206 if (next == nvme->admin_sq_head) {
207 return ZX_ERR_SHOULD_WAIT;
208 }
209
210 nvme->admin_sq[nvme->admin_sq_tail] = *cmd;
211 nvme->admin_sq_tail = next;
212
213 // ring the doorbell
214 writel(next, nvme->io_admin_sq_tail_db);
215 return ZX_OK;
216 }
217
nvme_io_cq_get(nvme_device_t * nvme,nvme_cpl_t * cpl)218 static zx_status_t nvme_io_cq_get(nvme_device_t* nvme, nvme_cpl_t* cpl) {
219 if ((readw(&nvme->io_cq[nvme->io_cq_head].status) & 1) != nvme->io_cq_toggle) {
220 return ZX_ERR_SHOULD_WAIT;
221 }
222 *cpl = nvme->io_cq[nvme->io_cq_head];
223
224 // advance the head pointer, wrapping and inverting toggle at max
225 uint16_t next = (nvme->io_cq_head + 1) & (CQMAX - 1);
226 if ((nvme->io_cq_head = next) == 0) {
227 nvme->io_cq_toggle ^= 1;
228 }
229
230 // note the new sq head reported by hw
231 nvme->io_sq_head = cpl->sq_head;
232 return ZX_OK;
233 }
234
nvme_io_cq_ack(nvme_device_t * nvme)235 static void nvme_io_cq_ack(nvme_device_t* nvme) {
236 // ring the doorbell
237 writel(nvme->io_cq_head, nvme->io_cq_head_db);
238 }
239
nvme_io_sq_put(nvme_device_t * nvme,nvme_cmd_t * cmd)240 static zx_status_t nvme_io_sq_put(nvme_device_t* nvme, nvme_cmd_t* cmd) {
241 uint16_t next = (nvme->io_sq_tail + 1) & (SQMAX - 1);
242
243 // if head+1 == tail: queue is full
244 if (next == nvme->io_sq_head) {
245 return ZX_ERR_SHOULD_WAIT;
246 }
247
248 nvme->io_sq[nvme->io_sq_tail] = *cmd;
249 nvme->io_sq_tail = next;
250
251 // ring the doorbell
252 writel(next, nvme->io_sq_tail_db);
253 return ZX_OK;
254 }
255
irq_thread(void * arg)256 static int irq_thread(void* arg) {
257 nvme_device_t* nvme = arg;
258 for (;;) {
259 zx_status_t r;
260 if ((r = zx_interrupt_wait(nvme->irqh, NULL)) != ZX_OK) {
261 zxlogf(ERROR, "nvme: irq wait failed: %d\n", r);
262 break;
263 }
264
265 nvme_cpl_t cpl;
266 if (nvme_admin_cq_get(nvme, &cpl) == ZX_OK) {
267 nvme->admin_result = cpl;
268 sync_completion_signal(&nvme->admin_signal);
269 }
270
271 sync_completion_signal(&nvme->io_signal);
272 }
273 return 0;
274 }
275
nvme_admin_txn(nvme_device_t * nvme,nvme_cmd_t * cmd,nvme_cpl_t * cpl)276 static zx_status_t nvme_admin_txn(nvme_device_t* nvme, nvme_cmd_t* cmd, nvme_cpl_t* cpl) {
277 zx_status_t r;
278 mtx_lock(&nvme->admin_lock);
279 sync_completion_reset(&nvme->admin_signal);
280 if ((r = nvme_admin_sq_put(nvme, cmd)) != ZX_OK) {
281 goto done;
282 }
283 if ((r = sync_completion_wait(&nvme->admin_signal, ZX_SEC(1))) != ZX_OK) {
284 zxlogf(ERROR, "nvme: admin txn: timed out\n");
285 goto done;
286 }
287
288 unsigned code = NVME_CPL_STATUS_CODE(nvme->admin_result.status);
289 if (code != 0) {
290 zxlogf(ERROR, "nvme: admin txn: nvm error %03x\n", code);
291 r = ZX_ERR_IO;
292 }
293 if (cpl != NULL) {
294 *cpl = nvme->admin_result;
295 }
296 done:
297 mtx_unlock(&nvme->admin_lock);
298 return r;
299 }
300
txn_complete(nvme_txn_t * txn,zx_status_t status)301 static inline void txn_complete(nvme_txn_t* txn, zx_status_t status) {
302 txn->completion_cb(txn->cookie, status, &txn->op);
303 }
304
305 // Attempt to generate utxns and queue nvme commands for a txn
306 // Returns true if this could not be completed due to temporary
307 // lack of resources or false if either it succeeded or errored out.
io_process_txn(nvme_device_t * nvme,nvme_txn_t * txn)308 static bool io_process_txn(nvme_device_t* nvme, nvme_txn_t* txn) {
309 zx_handle_t vmo = txn->op.rw.vmo;
310 nvme_utxn_t* utxn;
311 zx_paddr_t* pages;
312 zx_status_t r;
313
314 for (;;) {
315 // If there are no available utxns, we can't proceed
316 // and we tell the caller to retain the txn (true)
317 if ((utxn = utxn_get(nvme)) == NULL) {
318 return true;
319 }
320
321 uint32_t blocks = txn->op.rw.length;
322 if (blocks > nvme->max_xfer) {
323 blocks = nvme->max_xfer;
324 }
325
326 // Total transfer size in bytes
327 size_t bytes = ((size_t) blocks) * ((size_t) nvme->info.block_size);
328
329 // Page offset of first page of transfer
330 size_t pageoffset = txn->op.rw.offset_vmo & (~PAGE_MASK);
331
332 // Byte offset into first page of transfer
333 size_t byteoffset = txn->op.rw.offset_vmo & PAGE_MASK;
334
335 // Total pages mapped / touched
336 size_t pagecount = (byteoffset + bytes + PAGE_MASK) >> PAGE_SHIFT;
337
338 // read disk (OP_READ) -> memory (PERM_WRITE) or
339 // write memory (PERM_READ) -> disk (OP_WRITE)
340 uint32_t opt = (txn->opcode == NVME_OP_READ) ? ZX_BTI_PERM_WRITE : ZX_BTI_PERM_READ;
341
342 pages = utxn->virt;
343
344 if ((r = zx_bti_pin(nvme->bti, opt, vmo, pageoffset, pagecount << PAGE_SHIFT,
345 pages, pagecount, &utxn->pmt)) != ZX_OK) {
346 zxlogf(ERROR, "nvme: could not pin pages: %d\n", r);
347 break;
348 }
349
350 nvme_cmd_t cmd;
351 memset(&cmd, 0, sizeof(cmd));
352 cmd.cmd = NVME_CMD_CID(utxn->id) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(txn->opcode);
353 cmd.nsid = 1;
354 cmd.u.rw.start_lba = txn->op.rw.offset_dev;
355 cmd.u.rw.block_count = blocks - 1;
356 // The NVME command has room for two data pointers inline.
357 // The first is always the pointer to the first page where data is.
358 // The second is the second page if pagecount is 2.
359 // The second is the address of an array of page 2..n if pagecount > 2
360 cmd.dptr.prp[0] = pages[0] | byteoffset;
361 if (pagecount == 2) {
362 cmd.dptr.prp[1] = pages[1];
363 } else if (pagecount > 2) {
364 cmd.dptr.prp[1] = utxn->phys + sizeof(uint64_t);
365 }
366
367 zxlogf(TRACE, "nvme: txn=%p utxn id=%u pages=%zu op=%s\n", txn, utxn->id, pagecount,
368 txn->opcode == NVME_OP_WRITE ? "WR" : "RD");
369 zxlogf(SPEW, "nvme: prp[0]=%016zx prp[1]=%016zx\n", cmd.dptr.prp[0], cmd.dptr.prp[1]);
370 zxlogf(SPEW, "nvme: pages[] = { %016zx, %016zx, %016zx, %016zx, ... }\n",
371 pages[0], pages[1], pages[2], pages[3]);
372
373 if ((r = nvme_io_sq_put(nvme, &cmd)) != ZX_OK) {
374 zxlogf(ERROR, "nvme: could not submit cmd (txn=%p id=%u)\n", txn, utxn->id);
375 break;
376 }
377
378 utxn->txn = txn;
379
380 // keep track of where we are
381 txn->op.rw.offset_dev += blocks;
382 txn->op.rw.offset_vmo += bytes;
383 txn->op.rw.length -= blocks;
384 txn->pending_utxns++;
385
386 // If there's no more remaining, we're done, and we
387 // move this txn to the active list and tell the
388 // caller not to retain the txn (false)
389 if (txn->op.rw.length == 0) {
390 mtx_lock(&nvme->lock);
391 list_add_tail(&nvme->active_txns, &txn->node);
392 mtx_unlock(&nvme->lock);
393 return false;
394 }
395 }
396
397 // failure
398 if ((r = zx_pmt_unpin(utxn->pmt)) != ZX_OK) {
399 zxlogf(ERROR, "nvme: cannot unpin io buffer: %d\n", r);
400 }
401 utxn_put(nvme, utxn);
402
403 mtx_lock(&nvme->lock);
404 txn->flags |= TXN_FLAG_FAILED;
405 if (txn->pending_utxns) {
406 // if there are earlier uncompleted IOs we become active now
407 // and will finish erroring out when they complete
408 list_add_tail(&nvme->active_txns, &txn->node);
409 txn = NULL;
410 }
411 mtx_unlock(&nvme->lock);
412
413 if (txn != NULL) {
414 txn_complete(txn, ZX_ERR_INTERNAL);
415 }
416
417 // Either way we tell the caller not to retain the txn (false)
418 return false;
419 }
420
io_process_txns(nvme_device_t * nvme)421 static void io_process_txns(nvme_device_t* nvme) {
422 nvme_txn_t* txn;
423
424 for (;;) {
425 mtx_lock(&nvme->lock);
426 txn = list_remove_head_type(&nvme->pending_txns, nvme_txn_t, node);
427 mtx_unlock(&nvme->lock);
428
429 if (txn == NULL) {
430 return;
431 }
432
433 if (io_process_txn(nvme, txn)) {
434 // put txn back at front of queue for further processing later
435 mtx_lock(&nvme->lock);
436 list_add_head(&nvme->pending_txns, &txn->node);
437 mtx_unlock(&nvme->lock);
438 return;
439 }
440 }
441 }
442
io_process_cpls(nvme_device_t * nvme)443 static void io_process_cpls(nvme_device_t* nvme) {
444 bool ring_doorbell = false;
445 nvme_cpl_t cpl;
446
447 while (nvme_io_cq_get(nvme, &cpl) == ZX_OK) {
448 ring_doorbell = true;
449
450 if (cpl.cmd_id >= UTXN_COUNT) {
451 zxlogf(ERROR, "nvme: unexpected cmd id %u\n", cpl.cmd_id);
452 continue;
453 }
454 nvme_utxn_t* utxn = nvme->utxn + cpl.cmd_id;
455 nvme_txn_t* txn = utxn->txn;
456
457 if (txn == NULL) {
458 zxlogf(ERROR, "nvme: inactive utxn #%u completed?!\n", cpl.cmd_id);
459 continue;
460 }
461
462 uint32_t code = NVME_CPL_STATUS_CODE(cpl.status);
463 if (code != 0) {
464 zxlogf(ERROR, "nvme: utxn #%u txn %p failed: status=%03x\n",
465 cpl.cmd_id, txn, code);
466 txn->flags |= TXN_FLAG_FAILED;
467 // discard any remaining bytes -- no reason to keep creating
468 // further utxns once one has failed
469 txn->op.rw.length = 0;
470 } else {
471 zxlogf(SPEW, "nvme: utxn #%u txn %p OKAY\n", cpl.cmd_id, txn);
472 }
473
474 zx_status_t r;
475 if ((r = zx_pmt_unpin(utxn->pmt)) != ZX_OK) {
476 zxlogf(ERROR, "nvme: cannot unpin io buffer: %d\n", r);
477 }
478
479 // release the microtransaction
480 utxn->txn = NULL;
481 utxn_put(nvme, utxn);
482
483 txn->pending_utxns--;
484 if ((txn->pending_utxns == 0) && (txn->op.rw.length == 0)) {
485 // remove from either pending or active list
486 mtx_lock(&nvme->lock);
487 list_delete(&txn->node);
488 mtx_unlock(&nvme->lock);
489 zxlogf(TRACE, "nvme: txn %p %s\n", txn, txn->flags & TXN_FLAG_FAILED ? "error" : "okay");
490 txn_complete(txn, txn->flags & TXN_FLAG_FAILED ? ZX_ERR_IO : ZX_OK);
491 }
492 }
493
494 if (ring_doorbell) {
495 nvme_io_cq_ack(nvme);
496 }
497 }
498
io_thread(void * arg)499 static int io_thread(void* arg) {
500 nvme_device_t* nvme = arg;
501 for (;;) {
502 if (sync_completion_wait(&nvme->io_signal, ZX_TIME_INFINITE)) {
503 break;
504 }
505 if (nvme->flags & FLAG_SHUTDOWN) {
506 //TODO: cancel out pending IO
507 zxlogf(INFO, "nvme: io thread exiting\n");
508 break;
509 }
510
511 sync_completion_reset(&nvme->io_signal);
512
513 // process completion messages
514 io_process_cpls(nvme);
515
516 // process work queue
517 io_process_txns(nvme);
518
519 }
520 return 0;
521 }
522
nvme_queue(void * ctx,block_op_t * op,block_impl_queue_callback completion_cb,void * cookie)523 static void nvme_queue(void* ctx, block_op_t* op, block_impl_queue_callback completion_cb,
524 void* cookie) {
525 nvme_device_t* nvme = ctx;
526 nvme_txn_t* txn = containerof(op, nvme_txn_t, op);
527 txn->completion_cb = completion_cb;
528 txn->cookie = cookie;
529
530 switch (txn->op.command & BLOCK_OP_MASK) {
531 case BLOCK_OP_READ:
532 txn->opcode = NVME_OP_READ;
533 break;
534 case BLOCK_OP_WRITE:
535 txn->opcode = NVME_OP_WRITE;
536 break;
537 case BLOCK_OP_FLUSH:
538 // TODO
539 txn_complete(txn, ZX_OK);
540 return;
541 default:
542 txn_complete(txn, ZX_ERR_NOT_SUPPORTED);
543 return;
544 }
545
546 if (txn->op.rw.length == 0) {
547 txn_complete(txn, ZX_ERR_INVALID_ARGS);
548 return;
549 }
550 // Transaction must fit within device
551 if ((txn->op.rw.offset_dev >= nvme->info.block_count) ||
552 (nvme->info.block_count - txn->op.rw.offset_dev < txn->op.rw.length)) {
553 txn_complete(txn, ZX_ERR_OUT_OF_RANGE);
554 return;
555 }
556
557 // convert vmo offset to a byte offset
558 txn->op.rw.offset_vmo *= nvme->info.block_size;
559
560 txn->pending_utxns = 0;
561 txn->flags = 0;
562
563 zxlogf(SPEW, "nvme: io: %s: %ublks @ blk#%zu\n",
564 txn->opcode == NVME_OP_WRITE ? "wr" : "rd",
565 txn->op.rw.length + 1U, txn->op.rw.offset_dev);
566
567 mtx_lock(&nvme->lock);
568 list_add_tail(&nvme->pending_txns, &txn->node);
569 mtx_unlock(&nvme->lock);
570
571 sync_completion_signal(&nvme->io_signal);
572 }
573
nvme_query(void * ctx,block_info_t * info_out,size_t * block_op_size_out)574 static void nvme_query(void* ctx, block_info_t* info_out, size_t* block_op_size_out) {
575 nvme_device_t* nvme = ctx;
576 *info_out = nvme->info;
577 *block_op_size_out = sizeof(nvme_txn_t);
578 }
579
nvme_ioctl(void * ctx,uint32_t op,const void * cmd,size_t cmdlen,void * reply,size_t max,size_t * out_actual)580 static zx_status_t nvme_ioctl(void* ctx, uint32_t op, const void* cmd, size_t cmdlen, void* reply,
581 size_t max, size_t* out_actual) {
582 nvme_device_t* nvme = ctx;
583 switch (op) {
584 case IOCTL_BLOCK_GET_INFO: {
585 if (max < sizeof(block_info_t)) {
586 return ZX_ERR_BUFFER_TOO_SMALL;
587 }
588 size_t sz;
589 nvme_query(nvme, reply, &sz);
590 *out_actual = sizeof(block_info_t);
591 return ZX_OK;
592 }
593 default:
594 return ZX_ERR_NOT_SUPPORTED;
595 }
596 }
597
nvme_get_size(void * ctx)598 static zx_off_t nvme_get_size(void* ctx) {
599 nvme_device_t* nvme = ctx;
600 return nvme->info.block_count * nvme->info.block_size;
601 }
602
nvme_suspend(void * ctx,uint32_t flags)603 static zx_status_t nvme_suspend(void* ctx, uint32_t flags) {
604 return ZX_OK;
605 }
606
nvme_resume(void * ctx,uint32_t flags)607 static zx_status_t nvme_resume(void* ctx, uint32_t flags) {
608 return ZX_OK;
609 }
610
nvme_release(void * ctx)611 static void nvme_release(void* ctx) {
612 nvme_device_t* nvme = ctx;
613 int r;
614
615 zxlogf(INFO, "nvme: release\n");
616 nvme->flags |= FLAG_SHUTDOWN;
617 if (nvme->mmio.vmo != ZX_HANDLE_INVALID) {
618 pci_enable_bus_master(&nvme->pci, false);
619 zx_handle_close(nvme->bti);
620 mmio_buffer_release(&nvme->mmio);
621 // TODO: risks a handle use-after-close, will be resolved by IRQ api
622 // changes coming soon
623 zx_handle_close(nvme->irqh);
624 }
625 if (nvme->flags & FLAG_IRQ_THREAD_STARTED) {
626 thrd_join(nvme->irqthread, &r);
627 }
628 if (nvme->flags & FLAG_IO_THREAD_STARTED) {
629 sync_completion_signal(&nvme->io_signal);
630 thrd_join(nvme->iothread, &r);
631 }
632
633 // error out any pending txns
634 mtx_lock(&nvme->lock);
635 nvme_txn_t* txn;
636 while ((txn = list_remove_head_type(&nvme->active_txns, nvme_txn_t, node)) != NULL) {
637 txn_complete(txn, ZX_ERR_PEER_CLOSED);
638 }
639 while ((txn = list_remove_head_type(&nvme->pending_txns, nvme_txn_t, node)) != NULL) {
640 txn_complete(txn, ZX_ERR_PEER_CLOSED);
641 }
642 mtx_unlock(&nvme->lock);
643
644 io_buffer_release(&nvme->iob);
645 free(nvme);
646 }
647
648 static zx_protocol_device_t device_ops = {
649 .version = DEVICE_OPS_VERSION,
650
651 .ioctl = nvme_ioctl,
652 .get_size = nvme_get_size,
653
654 .suspend = nvme_suspend,
655 .resume = nvme_resume,
656 .release = nvme_release,
657 };
658
infostring(const char * prefix,uint8_t * str,size_t len)659 static void infostring(const char* prefix, uint8_t* str, size_t len) {
660 char tmp[len + 1];
661 size_t i;
662 for (i = 0; i < len; i++) {
663 uint8_t c = str[i];
664 if (c == 0) {
665 break;
666 }
667 if ((c < ' ') || (c > 127)) {
668 c = ' ';
669 }
670 tmp[i] = c;
671 }
672 tmp[i] = 0;
673 while (i > 0) {
674 i--;
675 if (tmp[i] == ' ') {
676 tmp[i] = 0;
677 } else {
678 break;
679 }
680 }
681 zxlogf(INFO, "nvme: %s'%s'\n", prefix, tmp);
682 }
683
684 // Convenience accessors for BAR0 registers
685 #define rd32(r) readl(nvme->mmio.vaddr + NVME_REG_##r)
686 #define rd64(r) readll(nvme->mmio.vaddr + NVME_REG_##r)
687 #define wr32(v,r) writel(v, nvme->mmio.vaddr + NVME_REG_##r)
688 #define wr64(v,r) writell(v, nvme->mmio.vaddr + NVME_REG_##r)
689
690 // dedicated pages from the page pool
691 #define IDX_ADMIN_SQ 0
692 #define IDX_ADMIN_CQ 1
693 #define IDX_IO_SQ 2
694 #define IDX_IO_CQ 3
695 #define IDX_SCRATCH 4
696 #define IDX_UTXN_POOL 5 // this must always be last
697
698 #define IO_PAGE_COUNT (IDX_UTXN_POOL + UTXN_COUNT)
699
U64(uint8_t * x)700 static inline uint64_t U64(uint8_t* x) {
701 return *((uint64_t*) (void*) x);
702 }
U32(uint8_t * x)703 static inline uint32_t U32(uint8_t* x) {
704 return *((uint32_t*) (void*) x);
705 }
U16(uint8_t * x)706 static inline uint32_t U16(uint8_t* x) {
707 return *((uint16_t*) (void*) x);
708 }
709
710 #define WAIT_MS 5000
711
nvme_init(nvme_device_t * nvme)712 static zx_status_t nvme_init(nvme_device_t* nvme) {
713 uint32_t n = rd32(VS);
714 uint64_t cap = rd64(CAP);
715
716 zxlogf(INFO, "nvme: version %d.%d.%d\n", n >> 16, (n >> 8) & 0xFF, n & 0xFF);
717 zxlogf(INFO, "nvme: page size: (MPSMIN): %u (MPSMAX): %u\n",
718 (unsigned) (1 << NVME_CAP_MPSMIN(cap)),
719 (unsigned) (1 << NVME_CAP_MPSMAX(cap)));
720 zxlogf(INFO, "nvme: doorbell stride: %u\n", (unsigned) (1 << NVME_CAP_DSTRD(cap)));
721 zxlogf(INFO, "nvme: timeout: %u ms\n", (unsigned) (1 << NVME_CAP_TO(cap)));
722 zxlogf(INFO, "nvme: boot partition support (BPS): %c\n", NVME_CAP_BPS(cap) ? 'Y' : 'N');
723 zxlogf(INFO, "nvme: supports NVM command set (CSS:NVM): %c\n", NVME_CAP_CSS_NVM(cap) ? 'Y' : 'N');
724 zxlogf(INFO, "nvme: subsystem reset supported (NSSRS): %c\n", NVME_CAP_NSSRS(cap) ? 'Y' : 'N');
725 zxlogf(INFO, "nvme: weighted-round-robin (AMS:WRR): %c\n", NVME_CAP_AMS_WRR(cap) ? 'Y' : 'N');
726 zxlogf(INFO, "nvme: vendor-specific arbitration (AMS:VS): %c\n", NVME_CAP_AMS_VS(cap) ? 'Y' : 'N');
727 zxlogf(INFO, "nvme: contiquous queues required (CQR): %c\n", NVME_CAP_CQR(cap) ? 'Y' : 'N');
728 zxlogf(INFO, "nvme: maximum queue entries supported (MQES): %u\n", ((unsigned) NVME_CAP_MQES(cap)) + 1);
729
730 if ((1 << NVME_CAP_MPSMIN(cap)) > PAGE_SIZE) {
731 zxlogf(ERROR, "nvme: minimum page size larger than platform page size\n");
732 return ZX_ERR_NOT_SUPPORTED;
733 }
734 // allocate pages for various queues and the utxn scatter lists
735 // TODO: these should all be RO to hardware apart from the scratch io page(s)
736 if (io_buffer_init(&nvme->iob, nvme->bti, PAGE_SIZE * IO_PAGE_COUNT, IO_BUFFER_RW) ||
737 io_buffer_physmap(&nvme->iob)) {
738 zxlogf(ERROR, "nvme: could not allocate io buffers\n");
739 return ZX_ERR_NO_MEMORY;
740 }
741
742 // initialize the microtransaction pool
743 nvme->utxn_avail = 0x7FFFFFFFFFFFFFFFULL;
744 for (unsigned n = 0; n < UTXN_COUNT; n++) {
745 nvme->utxn[n].id = n;
746 nvme->utxn[n].phys = nvme->iob.phys_list[IDX_UTXN_POOL + n];
747 nvme->utxn[n].virt = nvme->iob.virt + (IDX_UTXN_POOL + n) * PAGE_SIZE;
748 }
749
750 if (rd32(CSTS) & NVME_CSTS_RDY) {
751 zxlogf(INFO, "nvme: controller is active. resetting...\n");
752 wr32(rd32(CC) & ~NVME_CC_EN, CC); // disable
753 }
754
755 // ensure previous shutdown (by us or bootloader) has completed
756 unsigned ms_remain = WAIT_MS;
757 while (rd32(CSTS) & NVME_CSTS_RDY) {
758 if (--ms_remain == 0) {
759 zxlogf(ERROR, "nvme: timed out waiting for CSTS ~RDY\n");
760 return ZX_ERR_INTERNAL;
761 }
762 zx_nanosleep(zx_deadline_after(ZX_MSEC(1)));
763 }
764
765 zxlogf(INFO, "nvme: controller inactive. (after %u ms)\n", WAIT_MS - ms_remain);
766
767 // configure admin submission and completion queues
768 wr64(nvme->iob.phys_list[IDX_ADMIN_SQ], ASQ);
769 wr64(nvme->iob.phys_list[IDX_ADMIN_CQ], ACQ);
770 wr32(NVME_AQA_ASQS(SQMAX - 1) | NVME_AQA_ACQS(CQMAX - 1), AQA);
771
772 zxlogf(INFO, "nvme: enabling\n");
773 wr32(NVME_CC_EN | NVME_CC_AMS_RR | NVME_CC_MPS(0) |
774 NVME_CC_IOCQES(NVME_CPL_SHIFT) |
775 NVME_CC_IOSQES(NVME_CMD_SHIFT), CC);
776
777 ms_remain = WAIT_MS;
778 while (!(rd32(CSTS) & NVME_CSTS_RDY)) {
779 if (--ms_remain == 0) {
780 zxlogf(ERROR, "nvme: timed out waiting for CSTS RDY\n");
781 return ZX_ERR_INTERNAL;
782 }
783 zx_nanosleep(zx_deadline_after(ZX_MSEC(1)));
784 }
785 zxlogf(INFO, "nvme: controller ready. (after %u ms)\n", WAIT_MS - ms_remain);
786
787 // registers and buffers for admin queues
788 nvme->io_admin_sq_tail_db = nvme->mmio.vaddr + NVME_REG_SQnTDBL(0, cap);
789 nvme->io_admin_cq_head_db = nvme->mmio.vaddr + NVME_REG_CQnHDBL(0, cap);
790
791 nvme->admin_sq = nvme->iob.virt + PAGE_SIZE * IDX_ADMIN_SQ;
792 nvme->admin_sq_head = 0;
793 nvme->admin_sq_tail = 0;
794
795 nvme->admin_cq = nvme->iob.virt + PAGE_SIZE * IDX_ADMIN_CQ;
796 nvme->admin_cq_head = 0;
797 nvme->admin_cq_toggle = 1;
798
799 // registers and buffers for IO queues
800 nvme->io_sq_tail_db = nvme->mmio.vaddr + NVME_REG_SQnTDBL(1, cap);
801 nvme->io_cq_head_db = nvme->mmio.vaddr + NVME_REG_CQnHDBL(1, cap);
802
803 nvme->io_sq = nvme->iob.virt + PAGE_SIZE * IDX_IO_SQ;
804 nvme->io_sq_head = 0;
805 nvme->io_sq_tail = 0;
806
807 nvme->io_cq = nvme->iob.virt + PAGE_SIZE * IDX_IO_CQ;
808 nvme->io_cq_head = 0;
809 nvme->io_cq_toggle = 1;
810
811 // scratch page for admin ops
812 void* scratch = nvme->iob.virt + PAGE_SIZE * IDX_SCRATCH;
813
814 if (thrd_create_with_name(&nvme->irqthread, irq_thread, nvme, "nvme-irq-thread")) {
815 zxlogf(ERROR, "nvme; cannot create irq thread\n");
816 return ZX_ERR_INTERNAL;
817 }
818 nvme->flags |= FLAG_IRQ_THREAD_STARTED;
819
820 if (thrd_create_with_name(&nvme->iothread, io_thread, nvme, "nvme-io-thread")) {
821 zxlogf(ERROR, "nvme; cannot create io thread\n");
822 return ZX_ERR_INTERNAL;
823 }
824 nvme->flags |= FLAG_IO_THREAD_STARTED;
825
826 nvme_cmd_t cmd;
827
828 // identify device
829 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_IDENTIFY);
830 cmd.nsid = 0;
831 cmd.reserved = 0;
832 cmd.mptr = 0;
833 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_SCRATCH];
834 cmd.dptr.prp[1] = 0;
835 cmd.u.raw[0] = 1; // CNS 01
836
837 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) {
838 zxlogf(ERROR, "nvme: device identify op failed\n");
839 return ZX_ERR_INTERNAL;
840 }
841
842 nvme_identify_t* ci = scratch;
843 infostring("model: ", ci->MN, sizeof(ci->MN));
844 infostring("serial number: ", ci->SN, sizeof(ci->SN));
845 infostring("firmware: ", ci->FR, sizeof(ci->FR));
846
847 if ((ci->SQES & 0xF) != NVME_CMD_SHIFT) {
848 zxlogf(ERROR, "nvme: SQES minimum is not %ub\n", NVME_CMD_SIZE);
849 return ZX_ERR_NOT_SUPPORTED;
850 }
851 if ((ci->CQES & 0xF) != NVME_CPL_SHIFT) {
852 zxlogf(ERROR, "nvme: CQES minimum is not %ub\n", NVME_CPL_SIZE);
853 return ZX_ERR_NOT_SUPPORTED;
854 }
855 zxlogf(INFO, "nvme: max outstanding commands: %u\n", ci->MAXCMD);
856
857 uint32_t nscount = ci->NN;
858 zxlogf(INFO, "nvme: max namespaces: %u\n", nscount);
859 zxlogf(INFO, "nvme: scatter gather lists (SGL): %c %08x\n",
860 (ci->SGLS & 3) ? 'Y' : 'N', ci->SGLS);
861
862 // Maximum transfer is in units of 2^n * PAGESIZE, n == 0 means "infinite"
863 nvme->max_xfer = 0xFFFFFFFF;
864 if ((ci->MDTS != 0) && (ci->MDTS < (31 - PAGE_SHIFT))) {
865 nvme->max_xfer = (1 << ci->MDTS) * PAGE_SIZE;
866 }
867
868 zxlogf(INFO, "nvme: max data transfer: %u bytes\n", nvme->max_xfer);
869 zxlogf(INFO, "nvme: sanitize caps: %u\n", ci->SANICAP & 3);
870
871 zxlogf(INFO, "nvme: abort command limit (ACL): %u\n", ci->ACL + 1);
872 zxlogf(INFO, "nvme: asynch event req limit (AERL): %u\n", ci->AERL + 1);
873 zxlogf(INFO, "nvme: firmware: slots: %u reset: %c slot1ro: %c\n", (ci->FRMW >> 1) & 3,
874 (ci->FRMW & (1 << 4)) ? 'N' : 'Y', (ci->FRMW & 1) ? 'Y' : 'N');
875 zxlogf(INFO, "nvme: host buffer: min/preferred: %u/%u pages\n", ci->HMMIN, ci->HMPRE);
876 zxlogf(INFO, "nvme: capacity: total/unalloc: %zu/%zu\n", ci->TNVMCAP_LO, ci->UNVMCAP_LO);
877
878 if (ci->VWC & 1) {
879 nvme->flags |= FLAG_HAS_VWC;
880 }
881 uint32_t awun = ci->AWUN + 1;
882 uint32_t awupf = ci->AWUPF + 1;
883 zxlogf(INFO, "nvme: volatile write cache (VWC): %s\n", nvme->flags & FLAG_HAS_VWC ? "Y" : "N");
884 zxlogf(INFO, "nvme: atomic write unit (AWUN)/(AWUPF): %u/%u blks\n", awun, awupf);
885
886 #define FEATURE(a,b) if (ci->a & a##_##b) zxlogf(INFO, "nvme: feature: %s\n", #b)
887 FEATURE(OACS, DOORBELL_BUFFER_CONFIG);
888 FEATURE(OACS, VIRTUALIZATION_MANAGEMENT);
889 FEATURE(OACS, NVME_MI_SEND_RECV);
890 FEATURE(OACS, DIRECTIVE_SEND_RECV);
891 FEATURE(OACS, DEVICE_SELF_TEST);
892 FEATURE(OACS, NAMESPACE_MANAGEMENT);
893 FEATURE(OACS, FIRMWARE_DOWNLOAD_COMMIT);
894 FEATURE(OACS, FORMAT_NVM);
895 FEATURE(OACS, SECURITY_SEND_RECV);
896 FEATURE(ONCS, TIMESTAMP);
897 FEATURE(ONCS, RESERVATIONS);
898 FEATURE(ONCS, SAVE_SELECT_NONZERO);
899 FEATURE(ONCS, WRITE_UNCORRECTABLE);
900 FEATURE(ONCS, COMPARE);
901
902 // set feature (number of queues) to 1 iosq and 1 iocq
903 memset(&cmd, 0, sizeof(cmd));
904 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_SET_FEATURE);
905 cmd.u.raw[0] = NVME_FEATURE_NUMBER_OF_QUEUES;
906 cmd.u.raw[1] = 0;
907
908 nvme_cpl_t cpl;
909 if (nvme_admin_txn(nvme, &cmd, &cpl) != ZX_OK) {
910 zxlogf(ERROR, "nvme: set feature (number queues) op failed\n");
911 return ZX_ERR_INTERNAL;
912 }
913 zxlogf(INFO,"cpl.cmd %08x\n", cpl.cmd);
914
915 // create the IO completion queue
916 memset(&cmd, 0, sizeof(cmd));
917 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_CREATE_IOCQ);
918 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_IO_CQ];
919 cmd.u.raw[0] = ((CQMAX - 1) << 16) | 1; // queue size, queue id
920 cmd.u.raw[1] = (0 << 16) | 2 | 1; // irq vector, irq enable, phys contig
921
922 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) {
923 zxlogf(ERROR, "nvme: completion queue creation op failed\n");
924 return ZX_ERR_INTERNAL;
925 }
926
927 // create the IO submit queue
928 memset(&cmd, 0, sizeof(cmd));
929 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_CREATE_IOSQ);
930 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_IO_SQ];
931 cmd.u.raw[0] = ((SQMAX - 1) << 16) | 1; // queue size, queue id
932 cmd.u.raw[1] = (1 << 16) | 0 | 1; // cqid, qprio, phys contig
933
934 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) {
935 zxlogf(ERROR, "nvme: submit queue creation op failed\n");
936 return ZX_ERR_INTERNAL;
937 }
938
939 // identify namespace 1
940 memset(&cmd, 0, sizeof(cmd));
941 cmd.cmd = NVME_CMD_CID(0) | NVME_CMD_PRP | NVME_CMD_NORMAL | NVME_CMD_OPC(NVME_ADMIN_OP_IDENTIFY);
942 cmd.nsid = 1;
943 cmd.dptr.prp[0] = nvme->iob.phys_list[IDX_SCRATCH];
944
945 if (nvme_admin_txn(nvme, &cmd, NULL) != ZX_OK) {
946 zxlogf(ERROR, "nvme: namespace identify op failed\n");
947 return ZX_ERR_INTERNAL;
948 }
949
950 nvme_identify_ns_t* ni = scratch;
951
952 uint32_t nawun = (ni->NSFEAT & NSFEAT_LOCAL_ATOMIC_SIZES) ? (ni->NAWUN + 1U) : awun;
953 uint32_t nawupf = (ni->NSFEAT & NSFEAT_LOCAL_ATOMIC_SIZES) ? (ni->NAWUPF + 1U) : awupf;
954 zxlogf(INFO, "nvme: ns: atomic write unit (AWUN)/(AWUPF): %u/%u blks\n", nawun, nawupf);
955 zxlogf(INFO, "nvme: ns: NABSN/NABO/NABSPF/NOIOB: %u/%u/%u/%u\n",
956 ni->NABSN, ni->NABO, ni->NABSPF, ni->NOIOB);
957
958 // table of block formats
959 for (unsigned i = 0; i < 16; i++) {
960 if (ni->LBAF[i]) {
961 zxlogf(INFO, "nvme: ns: LBA FMT %02d: RP=%u LBADS=2^%ub MS=%ub\n",
962 i, NVME_LBAFMT_RP(ni->LBAF[i]), NVME_LBAFMT_LBADS(ni->LBAF[i]),
963 NVME_LBAFMT_MS(ni->LBAF[i]));
964 }
965 }
966
967 zxlogf(INFO, "nvme: ns: LBA FMT #%u active\n", ni->FLBAS & 0xF);
968 zxlogf(INFO, "nvme: ns: data protection: caps/set: 0x%02x/%u\n",
969 ni->DPC & 0x3F, ni->DPS & 3);
970
971 uint32_t fmt = ni->LBAF[ni->FLBAS & 0xF];
972
973 zxlogf(INFO, "nvme: ns: size/cap/util: %zu/%zu/%zu blks\n", ni->NSSZ, ni->NCAP, ni->NUSE);
974
975 nvme->info.block_count = ni->NSSZ;
976 nvme->info.block_size = 1 << NVME_LBAFMT_LBADS(fmt);
977 nvme->info.max_transfer_size = BLOCK_MAX_TRANSFER_UNBOUNDED;
978
979 if (NVME_LBAFMT_MS(fmt)) {
980 zxlogf(ERROR, "nvme: cannot handle LBA format with metadata\n");
981 return ZX_ERR_NOT_SUPPORTED;
982 }
983 if ((nvme->info.block_size < 512) || (nvme->info.block_size > 32768)) {
984 zxlogf(ERROR, "nvme: cannot handle LBA size of %u\n", nvme->info.block_size);
985 return ZX_ERR_NOT_SUPPORTED;
986 }
987
988 // NVME r/w commands operate in block units, maximum of 64K:
989 size_t max_bytes_per_cmd = ((size_t) nvme->info.block_size) * ((size_t) 65536);
990
991 if (nvme->max_xfer > max_bytes_per_cmd) {
992 nvme->max_xfer = max_bytes_per_cmd;
993 }
994
995 // The device may allow transfers larger than we are prepared
996 // to handle. Clip to our limit.
997 if (nvme->max_xfer > MAX_XFER) {
998 nvme->max_xfer = MAX_XFER;
999 }
1000
1001 // convert to block units
1002 nvme->max_xfer /= nvme->info.block_size;
1003 zxlogf(INFO, "nvme: max transfer per r/w op: %u blocks (%u bytes)\n",
1004 nvme->max_xfer, nvme->max_xfer * nvme->info.block_size);
1005
1006 device_make_visible(nvme->zxdev);
1007 return ZX_OK;
1008 }
1009
1010 block_impl_protocol_ops_t block_ops = {
1011 .query = nvme_query,
1012 .queue = nvme_queue,
1013 };
1014
nvme_bind(void * ctx,zx_device_t * dev)1015 static zx_status_t nvme_bind(void* ctx, zx_device_t* dev) {
1016 nvme_device_t* nvme;
1017 if ((nvme = calloc(1, sizeof(nvme_device_t))) == NULL) {
1018 return ZX_ERR_NO_MEMORY;
1019 }
1020 list_initialize(&nvme->pending_txns);
1021 list_initialize(&nvme->active_txns);
1022 mtx_init(&nvme->lock, mtx_plain);
1023 mtx_init(&nvme->admin_lock, mtx_plain);
1024
1025 if (device_get_protocol(dev, ZX_PROTOCOL_PCI, &nvme->pci)) {
1026 goto fail;
1027 }
1028
1029 if (pci_map_bar_buffer(&nvme->pci, 0u, ZX_CACHE_POLICY_UNCACHED_DEVICE, &nvme->mmio)) {
1030 zxlogf(ERROR, "nvme: cannot map registers\n");
1031 goto fail;
1032 }
1033
1034 uint32_t modes[3] = {
1035 ZX_PCIE_IRQ_MODE_MSI_X, ZX_PCIE_IRQ_MODE_MSI, ZX_PCIE_IRQ_MODE_LEGACY,
1036 };
1037 uint32_t nirq = 0;
1038 for (unsigned n = 0; n < countof(modes); n++) {
1039 if ((pci_query_irq_mode(&nvme->pci, modes[n], &nirq) == ZX_OK) &&
1040 (pci_set_irq_mode(&nvme->pci, modes[n], 1) == ZX_OK)) {
1041 zxlogf(INFO, "nvme: irq mode %u, irq count %u (#%u)\n", modes[n], nirq, n);
1042 goto irq_configured;
1043 }
1044 }
1045 zxlogf(ERROR, "nvme: could not configure irqs\n");
1046 goto fail;
1047
1048 irq_configured:
1049 if (pci_map_interrupt(&nvme->pci, 0, &nvme->irqh) != ZX_OK) {
1050 zxlogf(ERROR, "nvme: could not map irq\n");
1051 goto fail;
1052 }
1053 if (pci_enable_bus_master(&nvme->pci, true)) {
1054 zxlogf(ERROR, "nvme: cannot enable bus mastering\n");
1055 goto fail;
1056 }
1057 if (pci_get_bti(&nvme->pci, 0, &nvme->bti) != ZX_OK) {
1058 zxlogf(ERROR, "nvme: cannot obtain bti handle\n");
1059 goto fail;
1060 }
1061
1062 device_add_args_t args = {
1063 .version = DEVICE_ADD_ARGS_VERSION,
1064 .name = "nvme",
1065 .ctx = nvme,
1066 .ops = &device_ops,
1067 .flags = DEVICE_ADD_INVISIBLE,
1068 .proto_id = ZX_PROTOCOL_BLOCK_IMPL,
1069 .proto_ops = &block_ops,
1070 };
1071
1072 if (device_add(dev, &args, &nvme->zxdev)) {
1073 goto fail;
1074 }
1075
1076 if (nvme_init(nvme) != ZX_OK) {
1077 zxlogf(ERROR, "nvme: init failed\n");
1078 device_remove(nvme->zxdev);
1079 return ZX_ERR_INTERNAL;
1080 }
1081
1082 return ZX_OK;
1083
1084 fail:
1085 nvme_release(nvme);
1086 return ZX_ERR_NOT_SUPPORTED;
1087 }
1088
1089 static zx_driver_ops_t driver_ops = {
1090 .version = DRIVER_OPS_VERSION,
1091 .bind = nvme_bind,
1092 };
1093
1094 ZIRCON_DRIVER_BEGIN(nvme, driver_ops, "zircon", "0.1", 4)
1095 BI_ABORT_IF(NE, BIND_PROTOCOL, ZX_PROTOCOL_PCI),
1096 BI_ABORT_IF(NE, BIND_PCI_CLASS, 1), // Mass Storage
1097 BI_ABORT_IF(NE, BIND_PCI_SUBCLASS, 8), // NVM
1098 BI_MATCH_IF(EQ, BIND_PCI_INTERFACE, 2), // NVMHCI
1099 ZIRCON_DRIVER_END(nvme)
1100