1 /*
2 * Copyright (c) 2006-2023, RT-Thread Development Team
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Change Logs:
7 * Date Author Notes
8 * 2023-02-25 GuEe-GUI the first version
9 */
10
11 #include <rthw.h>
12 #include <rtthread.h>
13 #include <rtdevice.h>
14
15 #define DBG_TAG "rtdm.nvme"
16 #define DBG_LVL DBG_INFO
17 #include <rtdbg.h>
18
19 static struct rt_dm_ida nvme_controller_ida = RT_DM_IDA_INIT(CUSTOM);
20 static struct rt_dm_ida nvme_ida = RT_DM_IDA_INIT(NVME);
21
22 static RT_DEFINE_SPINLOCK(nvme_lock);
23 static rt_list_t nvme_nodes = RT_LIST_OBJECT_INIT(nvme_nodes);
24
nvme_readl(struct rt_nvme_controller * nvme,int offset)25 rt_inline rt_uint32_t nvme_readl(struct rt_nvme_controller *nvme, int offset)
26 {
27 return HWREG32(nvme->regs + offset);
28 }
29
nvme_writel(struct rt_nvme_controller * nvme,int offset,rt_uint32_t value)30 rt_inline void nvme_writel(struct rt_nvme_controller *nvme, int offset, rt_uint32_t value)
31 {
32 HWREG32(nvme->regs + offset) = value;
33 }
34
nvme_readq(struct rt_nvme_controller * nvme,int offset)35 rt_inline rt_uint64_t nvme_readq(struct rt_nvme_controller *nvme, int offset)
36 {
37 rt_uint32_t lo32, hi32;
38
39 lo32 = HWREG32(nvme->regs + offset);
40 hi32 = HWREG32(nvme->regs + offset + 4);
41
42 return ((rt_uint64_t)hi32 << 32) + lo32;
43 }
44
nvme_writeq(struct rt_nvme_controller * nvme,int offset,rt_uint64_t value)45 rt_inline void nvme_writeq(struct rt_nvme_controller *nvme, int offset, rt_uint64_t value)
46 {
47 nvme_writel(nvme, offset, (rt_uint32_t)(value & 0xffffffff));
48 nvme_writel(nvme, offset + 4, (rt_uint32_t)(value >> 32));
49 }
50
nvme_poll_csts(struct rt_nvme_controller * nvme,rt_uint32_t mask,rt_uint32_t value)51 static rt_err_t nvme_poll_csts(struct rt_nvme_controller *nvme,
52 rt_uint32_t mask, rt_uint32_t value)
53 {
54 rt_tick_t timeout;
55
56 timeout = rt_tick_from_millisecond(RT_NVME_CAP_TIMEOUT(nvme->cap) * 500);
57 timeout += rt_tick_get();
58
59 do {
60 if ((nvme_readl(nvme, RT_NVME_REG_CSTS) & mask) == value)
61 {
62 return RT_EOK;
63 }
64
65 rt_hw_cpu_relax();
66 } while (rt_tick_get() < timeout);
67
68 return -RT_ETIMEOUT;
69 }
70
nvme_enable_ctrl(struct rt_nvme_controller * nvme)71 static rt_err_t nvme_enable_ctrl(struct rt_nvme_controller *nvme)
72 {
73 nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
74 nvme->ctrl_config |= RT_NVME_CC_ENABLE;
75 nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
76
77 return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, RT_NVME_CSTS_RDY);
78 }
79
nvme_disable_ctrl(struct rt_nvme_controller * nvme)80 static rt_err_t nvme_disable_ctrl(struct rt_nvme_controller *nvme)
81 {
82 nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
83 nvme->ctrl_config &= ~RT_NVME_CC_ENABLE;
84 nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
85
86 return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, 0);
87 }
88
nvme_shutdown_ctrl(struct rt_nvme_controller * nvme)89 static rt_err_t nvme_shutdown_ctrl(struct rt_nvme_controller *nvme)
90 {
91 nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
92 nvme->ctrl_config |= RT_NVME_CC_SHN_NORMAL;
93 nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
94
95 return nvme_poll_csts(nvme, RT_NVME_CSTS_SHST_MASK, RT_NVME_CSTS_SHST_CMPLT);
96 }
97
nvme_next_cmdid(struct rt_nvme_controller * nvme)98 rt_inline rt_le16_t nvme_next_cmdid(struct rt_nvme_controller *nvme)
99 {
100 return rt_cpu_to_le16((rt_uint16_t)rt_atomic_add(&nvme->cmdid, 1));
101 }
102
nvme_submit_cmd(struct rt_nvme_queue * queue,struct rt_nvme_command * cmd)103 static rt_err_t nvme_submit_cmd(struct rt_nvme_queue *queue,
104 struct rt_nvme_command *cmd)
105 {
106 rt_ubase_t level;
107 rt_err_t err = RT_EOK;
108 rt_uint16_t tail, head;
109 struct rt_nvme_controller *nvme = queue->nvme;
110
111 _retry:
112 level = rt_spin_lock_irqsave(&queue->lock);
113
114 tail = queue->sq_tail;
115 head = queue->cq_head;
116
117 if (tail + 1 == head)
118 {
119 /* IO queue is full, waiting for the last IO command to complete. */
120 rt_spin_unlock_irqrestore(&queue->lock, level);
121
122 rt_thread_yield();
123
124 goto _retry;
125 }
126
127 cmd->common.cmdid = nvme_next_cmdid(nvme);
128 rt_memcpy(&queue->sq_cmds[tail], cmd, sizeof(*cmd));
129
130 if (nvme->ops->submit_cmd)
131 {
132 if ((err = nvme->ops->submit_cmd(queue, cmd)))
133 {
134 return err;
135 }
136 }
137
138 if (++tail == queue->depth)
139 {
140 tail = 0;
141 }
142 HWREG32(queue->doorbell) = tail;
143 queue->sq_tail = tail;
144
145 queue->cmd = cmd;
146 queue->err = RT_EOK;
147
148 rt_spin_unlock_irqrestore(&queue->lock, level);
149
150 err = rt_completion_wait(&queue->done,
151 rt_tick_from_millisecond(queue->qid != 0 ? RT_WAITING_FOREVER : 60));
152
153 return err ? : queue->err;
154 }
155
nvme_set_features_simple(struct rt_nvme_controller * nvme,rt_uint32_t fid,rt_uint32_t dword11)156 static rt_err_t nvme_set_features_simple(struct rt_nvme_controller *nvme,
157 rt_uint32_t fid, rt_uint32_t dword11)
158 {
159 struct rt_nvme_command cmd;
160
161 rt_memset(&cmd, 0, sizeof(cmd));
162 cmd.features.opcode = RT_NVME_ADMIN_OPCODE_SET_FEATURES;
163 cmd.features.fid = rt_cpu_to_le32(fid);
164 cmd.features.dword11 = rt_cpu_to_le32(dword11);
165
166 return nvme_submit_cmd(&nvme->admin_queue, &cmd);
167 }
168
nvme_submit_io_cmd(struct rt_nvme_controller * nvme,struct rt_nvme_command * cmd)169 static rt_err_t nvme_submit_io_cmd(struct rt_nvme_controller *nvme,
170 struct rt_nvme_command *cmd)
171 {
172 rt_uint16_t qid;
173
174 qid = rt_atomic_add(&nvme->ioqid[rt_hw_cpu_id()], RT_CPUS_NR);
175 qid %= nvme->io_queue_max;
176
177 return nvme_submit_cmd(&nvme->io_queues[qid], cmd);
178 }
179
180 /*
181 * PRP Mode:
182 *
183 * |63 n+1|n 0|
184 * +----------------------------------------+----------+---+---+
185 * | Page Base Address | Offset | 0 | 0 |
186 * +----------------------------------------+----------+---+---+
187 * |
188 * v
189 * Host Physical Pages
190 * +----------------------------+
191 * +--------------+----------+ | Page k |
192 * | PRP Entry1 | Offset +---------->+----------------------------+
193 * +--------------+----------+ | Page k + 1 |
194 * +----------------------------+
195 * ...
196 * +----------------------------+
197 * +--------------+----------+ | Page k + m |
198 * | PRP Entry2 | 0 +---------->+----------------------------+
199 * +--------------+----------+ | Page k + m + 1 |
200 * +----------------------------+
201 * PRP List (In PRP Entry2):
202 *
203 * |63 n+1|n 0|
204 * +----------------------------------------+------------------+
205 * | Page Base Address k | 0h |
206 * +----------------------------------------+------------------+
207 * | Page Base Address k + 1 | 0h |
208 * +----------------------------------------+------------------+
209 * | ... |
210 * +----------------------------------------+------------------+
211 * | Page Base Address k + m | 0h |
212 * +----------------------------------------+------------------+
213 * | Page Base Address k + m + 1 | 0h |
214 * +----------------------------------------+------------------+
215 *
216 * SGL Mode:
217 * +----- Non-transport
218 * LBA /
219 * +---------------+---------------+-------/-------+---------------+
220 * | 3KB | 4KB | 2KB | 4KB |
221 * +-------+-------+-------+-------+---------------+--------+------+
222 * | +-------------------------+ |
223 * | | |
224 * | +--------------------|------+
225 * | | |
226 * +-------v-------+ +-------v-------+ +-------v-------+
227 * | A MEM BLOCK | | B MEM BLOCK | | C MEM BLOCK |
228 * +-------^-------+ +-------^-------+ +-------^-------+
229 * | | |
230 * +----------------+ | |
231 * | | |
232 * Segment(0) | | |
233 * +----------+----------+ | | |
234 * | Address: A +--+ | |
235 * +----------+----------+ | |
236 * | Type: 0h | Len: 3KB | | |
237 * +----------+----------+ | |
238 * | Address: Segment(1) +--+ | |
239 * +----------+----------+ | | |
240 * | Type: 2h | Len: 48 | | | |
241 * +----------+----------+ | | |
242 * | | |
243 * +------------------------+ | |
244 * | | |
245 * v | |
246 * Segment(1) | |
247 * +----------+----------+ | |
248 * | Address: B +------+ |
249 * +----------+----------+ |
250 * | Type: 0h | Len: 4KB | |
251 * +----------+----------+ |
252 * | Address: <NULL> | |
253 * +----------+----------+ |
254 * | Type: 1h | Len: 2KB | |
255 * +----------+----------+ |
256 * | Address: Segment(2) +--+ |
257 * +----------+----------+ | |
258 * | Type: 0h | Len: 16 | | |
259 * +----------+----------+ | |
260 * | |
261 * +------------------------+ |
262 * | |
263 * v |
264 * Segment(2) |
265 * +----------+----------+ |
266 * | Address: C +---------------------------+
267 * +----------+----------+
268 * | Type: 0h | Len: 4KB |
269 * +----------+----------+
270 */
271
nvme_blk_rw(struct rt_nvme_device * ndev,rt_off_t slba,rt_ubase_t buffer_dma,rt_size_t lbas,rt_uint8_t opcode)272 static rt_ssize_t nvme_blk_rw(struct rt_nvme_device *ndev, rt_off_t slba,
273 rt_ubase_t buffer_dma, rt_size_t lbas, rt_uint8_t opcode)
274 {
275 rt_err_t err;
276 rt_uint16_t max_lbas;
277 rt_uint32_t lba_shift;
278 rt_size_t tlbas;
279 rt_ssize_t data_length;
280 struct rt_nvme_command cmd;
281 struct rt_nvme_controller *nvme = ndev->ctrl;
282
283 rt_memset(&cmd, 0, sizeof(cmd));
284 cmd.rw.opcode = opcode;
285 cmd.rw.flags = nvme->sgl_mode << RT_NVME_CMD_FLAGS_PSDT_SHIFT;
286 cmd.rw.nsid = rt_cpu_to_le32(ndev->nsid);
287
288 tlbas = lbas;
289 lba_shift = ndev->lba_shift;
290 max_lbas = 1 << (nvme->max_transfer_shift - lba_shift);
291
292 if (nvme->sgl_mode)
293 {
294 while ((rt_ssize_t)lbas > 0)
295 {
296 if (lbas < max_lbas)
297 {
298 max_lbas = (rt_uint16_t)lbas;
299 }
300
301 data_length = max_lbas << lba_shift;
302
303 cmd.rw.sgl.adddress = rt_cpu_to_le64(buffer_dma);
304 cmd.rw.sgl.length = rt_cpu_to_le32(data_length);
305 cmd.rw.sgl.sgl_identify = SGL_DESC_TYPE_DATA_BLOCK;
306 cmd.rw.slba = rt_cpu_to_le16(slba);
307 cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
308
309 if ((err = nvme_submit_io_cmd(nvme, &cmd)))
310 {
311 tlbas -= lbas;
312 break;
313 }
314
315 lbas -= max_lbas;
316 slba += max_lbas;
317 buffer_dma += data_length;
318 }
319 }
320 else
321 {
322 void *prp_list = RT_NULL;
323 rt_size_t prp_list_size = 0, page_size;
324
325 page_size = nvme->page_size;
326
327 while ((rt_ssize_t)lbas > 0)
328 {
329 rt_uint64_t prp2_addr, dma_addr;
330 rt_ssize_t remain_length, page_offset;
331
332 if (lbas < max_lbas)
333 {
334 max_lbas = (rt_uint16_t)lbas;
335 }
336
337 /*
338 * PRP transfer:
339 * 1. data_length <= 4KB:
340 * prp1 = buffer_dma
341 * prp2 = 0
342 *
343 * 2. 4KB < data_length <= 8KB:
344 * prp1 = buffer_dma
345 * prp2 = buffer_dma
346 *
347 * 3. 8KB < data_length:
348 * prp1 = buffer_dma(0, 4k)
349 * prp2 = buffer_dma(4k, ~)
350 */
351 dma_addr = buffer_dma;
352 page_offset = buffer_dma & (page_size - 1);
353 data_length = max_lbas << lba_shift;
354 remain_length = data_length - (page_size - page_offset);
355
356 do {
357 rt_size_t prps_per_page, prps, pages;
358 rt_uint64_t *prp_list_ptr, prp_list_dma;
359
360 if (remain_length <= 0)
361 {
362 prp2_addr = 0;
363 break;
364 }
365
366 if (remain_length)
367 {
368 dma_addr += (page_size - page_offset);
369 }
370
371 if (remain_length <= page_size)
372 {
373 prp2_addr = dma_addr;
374 break;
375 }
376
377 prps_per_page = page_size / sizeof(rt_uint64_t);
378 prps = RT_DIV_ROUND_UP(remain_length, page_size);
379 pages = RT_DIV_ROUND_UP(prps - 1, prps_per_page - 1);
380
381 if (prps > prp_list_size)
382 {
383 if (prp_list)
384 {
385 rt_free_align(prp_list);
386 }
387
388 prp_list = rt_malloc_align(pages * page_size, page_size);
389
390 if (!prp_list)
391 {
392 LOG_D("No memory to create a PRP List");
393 /* Ask user to try again */
394 return tlbas - lbas;
395 }
396
397 prp_list_size = pages * (prps_per_page - 1) + 1;
398 }
399 prp_list_ptr = prp_list;
400 prp_list_dma = (rt_uint64_t)rt_kmem_v2p(prp_list_ptr);
401
402 prp2_addr = prp_list_dma;
403
404 for (int i = 0; prps; --prps, ++i)
405 {
406 /* End of the entry, fill the next entry addr if remain */
407 if ((i == (prps_per_page - 1)) && prps > 1)
408 {
409 prp_list_dma += page_size;
410 *prp_list_ptr = rt_cpu_to_le64(prp_list_dma);
411
412 /* Start to fill the next PRP */
413 i = 0;
414 }
415
416 *prp_list_ptr = rt_cpu_to_le64(dma_addr);
417 dma_addr += page_size;
418 }
419
420 rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, prp_list_ptr, prp_list_size);
421 } while (0);
422
423 cmd.rw.prp1 = rt_cpu_to_le64(buffer_dma);
424 cmd.rw.prp2 = rt_cpu_to_le64(prp2_addr);
425 cmd.rw.slba = rt_cpu_to_le16(slba);
426 cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
427
428 if ((err = nvme_submit_io_cmd(nvme, &cmd)))
429 {
430 tlbas -= lbas;
431 break;
432 }
433
434 lbas -= max_lbas;
435 slba += max_lbas;
436 buffer_dma += data_length;
437 }
438
439 if (prp_list)
440 {
441 rt_free_align(prp_list);
442 }
443 }
444
445 return tlbas;
446 }
447
nvme_blk_read(struct rt_blk_disk * disk,rt_off_t sector,void * buffer,rt_size_t sector_count)448 static rt_ssize_t nvme_blk_read(struct rt_blk_disk *disk, rt_off_t sector,
449 void *buffer, rt_size_t sector_count)
450 {
451 rt_ssize_t res;
452 rt_uint32_t page_bits;
453 rt_size_t buffer_size;
454 rt_ubase_t buffer_dma;
455 void *temp_buffer = RT_NULL;
456 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
457 struct rt_nvme_controller *nvme = ndev->ctrl;
458
459 buffer_size = (1 << ndev->lba_shift) * sector_count;
460 buffer_dma = (rt_ubase_t)rt_kmem_v2p(buffer);
461
462 if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
463 (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
464 {
465 LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "read");
466
467 page_bits = rt_page_bits(buffer_size);
468 temp_buffer = rt_pages_alloc(page_bits);
469
470 if (!temp_buffer)
471 {
472 return -RT_ENOMEM;
473 }
474
475 buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
476 }
477
478 res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_READ);
479
480 if (res > 0)
481 {
482 if (res != sector_count)
483 {
484 /*
485 * Don't always aim for optimization, checking for equality
486 * is much faster than multiplication calculation.
487 */
488 buffer_size = res * (1 << ndev->lba_shift);
489 }
490
491 if (temp_buffer)
492 {
493 rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, temp_buffer, buffer_size);
494 rt_memcpy(buffer, temp_buffer, buffer_size);
495 }
496 else
497 {
498 rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, buffer, buffer_size);
499 }
500 }
501
502 if (temp_buffer)
503 {
504 rt_pages_free(temp_buffer, page_bits);
505 }
506
507 return res;
508 }
509
nvme_blk_write(struct rt_blk_disk * disk,rt_off_t sector,const void * buffer,rt_size_t sector_count)510 static rt_ssize_t nvme_blk_write(struct rt_blk_disk *disk, rt_off_t sector,
511 const void *buffer, rt_size_t sector_count)
512 {
513 rt_ssize_t res;
514 rt_uint32_t page_bits;
515 rt_size_t buffer_size;
516 rt_ubase_t buffer_dma;
517 void *temp_buffer = RT_NULL;
518 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
519 struct rt_nvme_controller *nvme = ndev->ctrl;
520
521 buffer_size = (1 << ndev->lba_shift) * sector_count;
522 buffer_dma = (rt_ubase_t)rt_kmem_v2p((void *)buffer);
523
524 if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
525 (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
526 {
527 LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "write");
528
529 page_bits = rt_page_bits(buffer_size);
530 temp_buffer = rt_pages_alloc(page_bits);
531
532 if (!temp_buffer)
533 {
534 return -RT_ENOMEM;
535 }
536
537 buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
538
539 rt_memcpy(temp_buffer, buffer, buffer_size);
540 buffer = temp_buffer;
541 }
542
543 rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)buffer, buffer_size);
544
545 res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_WRITE);
546
547 if (temp_buffer)
548 {
549 rt_pages_free(temp_buffer, page_bits);
550 }
551
552 return res;
553 }
554
nvme_blk_getgeome(struct rt_blk_disk * disk,struct rt_device_blk_geometry * geometry)555 static rt_err_t nvme_blk_getgeome(struct rt_blk_disk *disk,
556 struct rt_device_blk_geometry *geometry)
557 {
558 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
559
560 geometry->bytes_per_sector = 1 << ndev->lba_shift;
561 geometry->block_size = 1 << ndev->lba_shift;
562 geometry->sector_count = rt_le64_to_cpu(ndev->id.nsze);
563
564 return RT_EOK;
565 }
566
nvme_blk_sync(struct rt_blk_disk * disk)567 static rt_err_t nvme_blk_sync(struct rt_blk_disk *disk)
568 {
569 struct rt_nvme_command cmd;
570 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
571
572 rt_memset(&cmd, 0, sizeof(cmd));
573 cmd.common.opcode = RT_NVME_CMD_FLUSH;
574 cmd.common.nsid = rt_cpu_to_le32(ndev->nsid);
575
576 return nvme_submit_io_cmd(ndev->ctrl, &cmd);
577 }
578
nvme_blk_erase(struct rt_blk_disk * disk)579 static rt_err_t nvme_blk_erase(struct rt_blk_disk *disk)
580 {
581 rt_err_t err = RT_EOK;
582 rt_ssize_t slba, lbas, max_lbas;
583 struct rt_nvme_command cmd;
584 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
585 struct rt_nvme_controller *nvme = ndev->ctrl;
586
587 if (!nvme->write_zeroes)
588 {
589 return -RT_ENOSYS;
590 }
591
592 rt_memset(&cmd, 0, sizeof(cmd));
593 cmd.write_zeroes.opcode = RT_NVME_CMD_WRITE_ZEROES;
594 cmd.write_zeroes.nsid = rt_cpu_to_le32(ndev->nsid);
595
596 slba = 0;
597 lbas = rt_le64_to_cpu(ndev->id.nsze);
598 max_lbas = 1 << (nvme->max_transfer_shift - ndev->lba_shift);
599
600 while ((rt_ssize_t)lbas > 0)
601 {
602 if (lbas < max_lbas)
603 {
604 max_lbas = (rt_uint16_t)lbas;
605 }
606
607 cmd.write_zeroes.slba = rt_cpu_to_le16(slba);
608 cmd.write_zeroes.length = rt_cpu_to_le16(max_lbas - 1);
609
610 if ((err = nvme_submit_io_cmd(nvme, &cmd)))
611 {
612 break;
613 }
614
615 lbas -= max_lbas;
616 slba += max_lbas;
617 }
618
619 return err;
620 }
621
nvme_blk_autorefresh(struct rt_blk_disk * disk,rt_bool_t is_auto)622 static rt_err_t nvme_blk_autorefresh(struct rt_blk_disk *disk, rt_bool_t is_auto)
623 {
624 struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
625 struct rt_nvme_controller *nvme = ndev->ctrl;
626
627 if (nvme->volatile_write_cache & RT_NVME_CTRL_VWC_PRESENT)
628 {
629 return nvme_set_features_simple(nvme, RT_NVME_FEAT_VOLATILE_WC, !!is_auto);
630 }
631 else if (!is_auto)
632 {
633 return RT_EOK;
634 }
635
636 return -RT_ENOSYS;
637 }
638
639 static const struct rt_blk_disk_ops nvme_blk_ops =
640 {
641 .read = nvme_blk_read,
642 .write = nvme_blk_write,
643 .getgeome = nvme_blk_getgeome,
644 .sync = nvme_blk_sync,
645 .erase = nvme_blk_erase,
646 .autorefresh = nvme_blk_autorefresh,
647 };
648
nvme_queue_isr(int irqno,void * param)649 static void nvme_queue_isr(int irqno, void *param)
650 {
651 rt_ubase_t level;
652 rt_uint16_t head, phase, status;
653 struct rt_nvme_queue *queue = param;
654 struct rt_nvme_controller *nvme = queue->nvme;
655
656 level = rt_spin_lock_irqsave(&queue->lock);
657
658 head = queue->cq_head;
659 phase = queue->cq_phase;
660 status = HWREG16(&queue->cq_entry[head].status);
661 status = rt_le16_to_cpu(status);
662
663 if ((status & 0x01) == phase)
664 {
665 if ((status >> 1))
666 {
667 queue->err = -RT_EIO;
668 goto _end_cmd;
669 }
670
671 if (nvme->ops->complete_cmd)
672 {
673 nvme->ops->complete_cmd(queue, queue->cmd);
674 }
675
676 _end_cmd:
677 if (++head == queue->depth)
678 {
679 head = 0;
680 phase = !phase;
681 }
682
683 HWREG32(queue->doorbell + nvme->doorbell_stride) = head;
684 queue->cq_head = head;
685 queue->cq_phase = phase;
686
687 rt_completion_done(&queue->done);
688 }
689
690 rt_spin_unlock_irqrestore(&queue->lock, level);
691 }
692
nvme_identify(struct rt_nvme_controller * nvme,rt_uint32_t nsid,rt_uint32_t cns,void * data)693 static rt_err_t nvme_identify(struct rt_nvme_controller *nvme,
694 rt_uint32_t nsid, rt_uint32_t cns, void *data)
695 {
696 rt_err_t err;
697 rt_uint32_t page_size = nvme->page_size;
698 rt_ubase_t data_phy = (rt_ubase_t)rt_kmem_v2p(data);
699 int offset = data_phy & (page_size - 1);
700 struct rt_nvme_command cmd;
701
702 rt_memset(&cmd, 0, sizeof(cmd));
703 cmd.identify.opcode = RT_NVME_ADMIN_OPCODE_IDENTIFY;
704 cmd.identify.nsid = rt_cpu_to_le32(nsid);
705 cmd.identify.prp1 = rt_cpu_to_le64(data_phy);
706
707 if (sizeof(struct rt_nvme_id_ctrl) <= page_size - offset)
708 {
709 cmd.identify.prp2 = 0;
710 }
711 else
712 {
713 data_phy += (page_size - offset);
714 cmd.identify.prp2 = rt_cpu_to_le64(data_phy);
715 }
716 cmd.identify.cns = rt_cpu_to_le32(cns);
717
718 rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, data, sizeof(struct rt_nvme_id_ctrl));
719
720 if (!(err = nvme_submit_cmd(&nvme->admin_queue, &cmd)))
721 {
722 rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, data, sizeof(struct rt_nvme_id_ctrl));
723 }
724
725 return err;
726 }
727
nvme_attach_queue(struct rt_nvme_queue * queue,rt_uint8_t opcode)728 static rt_err_t nvme_attach_queue(struct rt_nvme_queue *queue, rt_uint8_t opcode)
729 {
730 struct rt_nvme_command cmd;
731 struct rt_nvme_controller *nvme = queue->nvme;
732 rt_uint16_t flags = RT_NVME_QUEUE_PHYS_CONTIG;
733
734 rt_memset(&cmd, 0, sizeof(cmd));
735
736 if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_CQ)
737 {
738 cmd.create_cq.opcode = opcode;
739 cmd.create_cq.prp1 = rt_cpu_to_le64(queue->cq_entry_phy);
740 cmd.create_cq.cqid = rt_cpu_to_le16(queue->qid);
741 cmd.create_cq.qsize = rt_cpu_to_le16(queue->depth - 1);
742 cmd.create_cq.cq_flags = rt_cpu_to_le16(flags | RT_NVME_CQ_IRQ_ENABLED);
743 cmd.create_cq.irq_vector = rt_cpu_to_le16(nvme->irqs_nr > 1 ? queue->qid : 0);
744 }
745 else if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_SQ)
746 {
747 cmd.create_sq.opcode = opcode;
748 cmd.create_sq.prp1 = rt_cpu_to_le64(queue->sq_cmds_phy);
749 cmd.create_sq.sqid = rt_cpu_to_le16(queue->qid);
750 cmd.create_sq.qsize = rt_cpu_to_le16(queue->depth - 1);
751 cmd.create_sq.sq_flags = rt_cpu_to_le16(flags | RT_NVME_SQ_PRIO_MEDIUM);
752 cmd.create_sq.cqid = rt_cpu_to_le16(queue->qid);
753 }
754 else
755 {
756 LOG_E("What the fuck opcode = %x", opcode);
757 RT_ASSERT(0);
758 }
759
760 return nvme_submit_cmd(&nvme->admin_queue, &cmd);
761 }
762
nvme_attach_queue_sq(struct rt_nvme_queue * queue)763 rt_inline rt_err_t nvme_attach_queue_sq(struct rt_nvme_queue *queue)
764 {
765 return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_SQ);
766 }
767
nvme_attach_queue_cq(struct rt_nvme_queue * queue)768 rt_inline rt_err_t nvme_attach_queue_cq(struct rt_nvme_queue *queue)
769 {
770 return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_CQ);
771 }
772
nvme_detach_queue(struct rt_nvme_queue * queue,rt_uint8_t opcode)773 static rt_err_t nvme_detach_queue(struct rt_nvme_queue *queue,
774 rt_uint8_t opcode)
775 {
776 struct rt_nvme_command cmd;
777 struct rt_nvme_controller *nvme = queue->nvme;
778
779 rt_memset(&cmd, 0, sizeof(cmd));
780 cmd.delete_queue.opcode = opcode;
781 cmd.delete_queue.qid = rt_cpu_to_le16(queue->qid);
782
783 return nvme_submit_cmd(&nvme->admin_queue, &cmd);
784 }
785
nvme_queue_dma_flags(void)786 rt_inline rt_ubase_t nvme_queue_dma_flags(void)
787 {
788 return RT_DMA_F_NOCACHE | RT_DMA_F_LINEAR;
789 }
790
nvme_free_queue(struct rt_nvme_queue * queue)791 static void nvme_free_queue(struct rt_nvme_queue *queue)
792 {
793 rt_ubase_t dma_flags;
794 struct rt_nvme_controller *nvme = queue->nvme;
795
796 if (nvme->ops->cleanup_queue)
797 {
798 rt_err_t err;
799
800 if (!(err = nvme->ops->cleanup_queue(queue)))
801 {
802 LOG_W("Cleanup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
803 }
804 }
805
806 dma_flags = nvme_queue_dma_flags();
807
808 if (queue->sq_cmds)
809 {
810 rt_dma_free(nvme->dev, sizeof(*queue->sq_cmds) * queue->depth,
811 queue->sq_cmds, queue->sq_cmds_phy, dma_flags);
812 }
813
814 if (queue->cq_entry)
815 {
816 rt_dma_free(nvme->dev, sizeof(*queue->cq_entry) * queue->depth,
817 queue->cq_entry, queue->cq_entry_phy, dma_flags);
818 }
819 }
820
nvme_alloc_queue(struct rt_nvme_controller * nvme,int qid,int depth)821 static struct rt_nvme_queue *nvme_alloc_queue(struct rt_nvme_controller *nvme,
822 int qid, int depth)
823 {
824 rt_err_t err;
825 rt_ubase_t dma_flags;
826 struct rt_nvme_queue *queue = &nvme->queue[qid];
827
828 rt_memset(queue, 0, sizeof(*queue));
829
830 queue->nvme = nvme;
831 queue->doorbell = &nvme->doorbell_tbl[qid * 2 * nvme->doorbell_stride];
832 queue->qid = qid;
833 queue->depth = depth;
834 queue->cq_head = 0;
835 queue->cq_phase = 1;
836 rt_completion_init(&queue->done);
837 rt_spin_lock_init(&queue->lock);
838
839 dma_flags = nvme_queue_dma_flags();
840
841 /* struct rt_nvme_command */
842 queue->sq_cmds = rt_dma_alloc(nvme->dev,
843 sizeof(*queue->sq_cmds) * depth, &queue->sq_cmds_phy, dma_flags);
844
845 if (!queue->sq_cmds)
846 {
847 err = -RT_ENOMEM;
848 goto _fail;
849 }
850
851 /* struct rt_nvme_completion */
852 queue->cq_entry = rt_dma_alloc(nvme->dev,
853 sizeof(*queue->cq_entry) * depth, &queue->cq_entry_phy, dma_flags);
854
855 if (!queue->cq_entry)
856 {
857 err = -RT_ENOMEM;
858 goto _fail;
859 }
860
861 rt_memset(queue->sq_cmds, 0, sizeof(struct rt_nvme_command) * depth);
862 rt_memset(queue->cq_entry, 0, sizeof(struct rt_nvme_completion) * depth);
863
864 if (nvme->ops->setup_queue)
865 {
866 if (!(err = nvme->ops->setup_queue(queue)))
867 {
868 LOG_E("Setup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
869
870 goto _fail;
871 }
872 }
873
874 return queue;
875
876 _fail:
877 nvme_free_queue(queue);
878
879 return rt_err_ptr(err);
880 }
881
nvme_configure_admin_queue(struct rt_nvme_controller * nvme)882 static rt_err_t nvme_configure_admin_queue(struct rt_nvme_controller *nvme)
883 {
884 rt_err_t err;
885 int irq;
886 char name[RT_NAME_MAX];
887 rt_uint32_t aqa;
888 rt_uint32_t page_shift = ARCH_PAGE_SHIFT;
889 rt_uint32_t page_min = RT_NVME_CAP_MPSMIN(nvme->cap) + 12;
890 rt_uint32_t page_max = RT_NVME_CAP_MPSMAX(nvme->cap) + 12;
891 struct rt_nvme_queue *admin_queue;
892
893 if (page_shift < page_min)
894 {
895 LOG_E("Device %s page size (%u) %s than host (%u)",
896 "minimum", 1 << page_min, "larger", 1 << page_shift);
897 return -RT_EINVAL;
898 }
899
900 if (page_shift > page_max)
901 {
902 LOG_W("Device %s page size (%u) %s than host (%u)",
903 "maximum", 1 << page_max, "smaller", 1 << page_shift);
904 page_shift = page_max;
905 }
906
907 if ((err = nvme_disable_ctrl(nvme)))
908 {
909 return err;
910 }
911
912 admin_queue = nvme_alloc_queue(nvme, 0, RT_NVME_AQ_DEPTH);
913
914 if (rt_is_err(admin_queue))
915 {
916 return rt_ptr_err(admin_queue);
917 }
918
919 aqa = admin_queue->depth - 1;
920 aqa |= aqa << 16;
921
922 nvme->page_shift = page_shift;
923 nvme->page_size = 1U << page_shift;
924
925 nvme->ctrl_config = RT_NVME_CC_CSS_NVM;
926 nvme->ctrl_config |= (page_shift - 12) << RT_NVME_CC_MPS_SHIFT;
927 nvme->ctrl_config |= RT_NVME_CC_ARB_RR | RT_NVME_CC_SHN_NONE;
928 nvme->ctrl_config |= RT_NVME_CC_IOSQES | RT_NVME_CC_IOCQES;
929
930 nvme_writel(nvme, RT_NVME_REG_AQA, aqa);
931 nvme_writeq(nvme, RT_NVME_REG_ASQ, admin_queue->sq_cmds_phy);
932 nvme_writeq(nvme, RT_NVME_REG_ACQ, admin_queue->cq_entry_phy);
933
934 if ((err = nvme_enable_ctrl(nvme)))
935 {
936 nvme_free_queue(admin_queue);
937
938 return err;
939 }
940
941 irq = nvme->irqs[0];
942
943 rt_snprintf(name, RT_NAME_MAX, "%s-admin-queue", nvme->name);
944
945 rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->admin_queue, name);
946 rt_hw_interrupt_umask(irq);
947
948 return RT_EOK;
949 }
950
nvme_setup_io_queues(struct rt_nvme_controller * nvme)951 static rt_err_t nvme_setup_io_queues(struct rt_nvme_controller *nvme)
952 {
953 rt_err_t err;
954 rt_uint32_t value;
955 int irq, cpuid = 0;
956 char name[RT_NAME_MAX];
957 rt_bool_t affinity_fixup = RT_FALSE;
958 RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
959 struct rt_nvme_queue *queue;
960
961 nvme->io_queue_max = nvme->irqs_nr > 1 ? nvme->irqs_nr - 1 : 1;
962 value = (nvme->io_queue_max - 1) | ((nvme->io_queue_max - 1) << 16);
963
964 if ((err = nvme_set_features_simple(nvme, RT_NVME_FEAT_NUM_QUEUES, value)))
965 {
966 return err;
967 }
968
969 for (int i = 0, q_idx = 1; i < nvme->io_queue_max; ++i, ++q_idx)
970 {
971 queue = nvme_alloc_queue(nvme, q_idx, nvme->queue_depth);
972
973 if (!queue)
974 {
975 return -RT_ENOMEM;
976 }
977
978 if ((err = nvme_attach_queue_cq(queue)) ||
979 (err = nvme_attach_queue_sq(queue)))
980 {
981 return err;
982 }
983 }
984
985 for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
986 {
987 irq = nvme->irqs[irq_idx % nvme->irqs_nr];
988
989 rt_snprintf(name, RT_NAME_MAX, "%s-io-queue%d", nvme->name, i);
990
991 if (!affinity_fixup)
992 {
993 RT_IRQ_AFFINITY_SET(affinity, cpuid % RT_CPUS_NR);
994 if (rt_pic_irq_set_affinity(irq, affinity))
995 {
996 /* Fixup in secondary CPU startup */
997 affinity_fixup = RT_TRUE;
998 }
999 RT_IRQ_AFFINITY_CLEAR(affinity, cpuid++ % RT_CPUS_NR);
1000 }
1001
1002 rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->io_queues[i], name);
1003 rt_hw_interrupt_umask(irq);
1004 }
1005
1006 return RT_EOK;
1007 }
1008
nvme_remove_io_queues(struct rt_nvme_controller * nvme)1009 static void nvme_remove_io_queues(struct rt_nvme_controller *nvme)
1010 {
1011 int irq;
1012 struct rt_nvme_queue *queue;
1013
1014 for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
1015 {
1016 queue = &nvme->io_queues[i];
1017
1018 nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_SQ);
1019 nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_CQ);
1020 nvme_free_queue(queue);
1021
1022 irq = nvme->irqs[irq_idx % nvme->irqs_nr];
1023
1024 rt_hw_interrupt_mask(irq);
1025 rt_pic_detach_irq(irq, queue);
1026 }
1027 }
1028
nvme_remove_admin_queues(struct rt_nvme_controller * nvme)1029 static void nvme_remove_admin_queues(struct rt_nvme_controller *nvme)
1030 {
1031 int irq = nvme->irqs[0];
1032
1033 rt_hw_interrupt_mask(irq);
1034 rt_pic_detach_irq(irq, &nvme->admin_queue);
1035
1036 nvme_free_queue(&nvme->admin_queue);
1037 }
1038
nvme_remove_devices(struct rt_nvme_controller * nvme)1039 static void nvme_remove_devices(struct rt_nvme_controller *nvme)
1040 {
1041 struct rt_nvme_device *ndev, *next_ndev;
1042
1043 rt_list_for_each_entry_safe(ndev, next_ndev, &nvme->ns_nodes, list)
1044 {
1045 rt_list_remove(&ndev->list);
1046
1047 rt_hw_blk_disk_unregister(&ndev->parent);
1048 rt_free(ndev);
1049 }
1050 }
1051
nvme_scan_device(struct rt_nvme_controller * nvme,rt_size_t number_of_ns)1052 static rt_err_t nvme_scan_device(struct rt_nvme_controller *nvme,
1053 rt_size_t number_of_ns)
1054 {
1055 rt_err_t err = RT_EOK;
1056 rt_uint32_t lbaf;
1057 struct rt_nvme_id_ns *id = RT_NULL;
1058
1059 if (!(id = rt_malloc_align(sizeof(*id), nvme->page_size)))
1060 {
1061 return -RT_ENOMEM;
1062 }
1063
1064 /* NVME Namespace is start with "1" */
1065 for (rt_uint32_t nsid = 1; nsid <= number_of_ns; ++nsid)
1066 {
1067 struct rt_nvme_device *ndev = rt_calloc(1, sizeof(*ndev));
1068
1069 if (!ndev)
1070 {
1071 err = -RT_ENOMEM;
1072 goto _free_res;
1073 }
1074
1075 rt_memset(id, 0, sizeof(*id));
1076 if ((err = nvme_identify(nvme, nsid, 0, id)))
1077 {
1078 goto _free_res;
1079 }
1080
1081 if (!id->nsze)
1082 {
1083 continue;
1084 }
1085
1086 ndev->ctrl = nvme;
1087
1088 rt_memcpy(&ndev->id, id, sizeof(ndev->id));
1089 lbaf = id->flbas & RT_NVME_NS_FLBAS_LBA_MASK;
1090 lbaf |= ((id->flbas & RT_NVME_NS_FLBAS_LBA_UMASK) >> RT_NVME_NS_FLBAS_LBA_SHIFT);
1091
1092 ndev->nsid = nsid;
1093 ndev->lba_shift = id->lbaf[lbaf].ds;
1094
1095 ndev->parent.ida = &nvme_ida;
1096 ndev->parent.parallel_io = RT_TRUE;
1097 ndev->parent.ops = &nvme_blk_ops;
1098 ndev->parent.max_partitions = RT_BLK_PARTITION_MAX;
1099 rt_dm_dev_set_name(&ndev->parent.parent, "%sn%u", nvme->name, nsid);
1100
1101 if ((err = rt_hw_blk_disk_register(&ndev->parent)))
1102 {
1103 goto _free_res;
1104 }
1105
1106 rt_list_init(&ndev->list);
1107 rt_list_insert_before(&nvme->ns_nodes, &ndev->list);
1108 }
1109
1110 _free_res:
1111 rt_free_align(id);
1112
1113 return err;
1114 }
1115
strip_len(const char * str,rt_size_t max_len)1116 rt_inline rt_size_t strip_len(const char *str, rt_size_t max_len)
1117 {
1118 rt_size_t size = 0;
1119
1120 for (int i = 0; *str && i < max_len; ++i, ++str)
1121 {
1122 if (*str != ' ')
1123 {
1124 size = i + 1;
1125 }
1126 }
1127
1128 return size;
1129 }
1130
rt_nvme_controller_register(struct rt_nvme_controller * nvme)1131 rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme)
1132 {
1133 rt_err_t err;
1134 struct rt_nvme_id_ctrl *ctrl = RT_NULL;
1135
1136 if (!nvme || !nvme->ops)
1137 {
1138 return -RT_EINVAL;
1139 }
1140
1141 if (nvme_readl(nvme, RT_NVME_REG_CSTS) == (rt_uint32_t)-1)
1142 {
1143 LOG_E("Out of memory");
1144
1145 return -RT_EINVAL;
1146 }
1147
1148 if ((nvme->nvme_id = rt_dm_ida_alloc(&nvme_controller_ida)) < 0)
1149 {
1150 return -RT_EFULL;
1151 }
1152
1153 rt_snprintf(nvme->name, RT_NAME_MAX, "nvme%u", nvme->nvme_id);
1154
1155 nvme->cap = nvme_readq(nvme, RT_NVME_REG_CAP);
1156 nvme->queue_depth = RT_NVME_CAP_MQES(nvme->cap) + 1;
1157 nvme->doorbell_stride = 1 << RT_NVME_CAP_STRIDE(nvme->cap);
1158 nvme->doorbell_tbl = nvme->regs + RT_NVME_REG_DBS;
1159
1160 if ((err = nvme_configure_admin_queue(nvme)))
1161 {
1162 LOG_E("Configure admin queue error = %s", rt_strerror(err));
1163 goto _free_admin_queue;
1164 }
1165
1166 if ((err = nvme_setup_io_queues(nvme)))
1167 {
1168 LOG_E("Unable to setup I/O queues error = %s", rt_strerror(err));
1169 goto _free_admin_queue;
1170 }
1171
1172 if (!(ctrl = rt_malloc_align(sizeof(*ctrl), nvme->page_size)))
1173 {
1174 err = -RT_ENOMEM;
1175 goto _fail;
1176 }
1177
1178 if ((err = nvme_identify(nvme, 0, 1, ctrl)))
1179 {
1180 goto _fail;
1181 }
1182
1183 if (ctrl->mdts)
1184 {
1185 nvme->max_transfer_shift = ctrl->mdts + (RT_NVME_CAP_MPSMIN(nvme->cap) + 12);
1186 }
1187 else
1188 {
1189 /* 1MB is recommended. */
1190 nvme->max_transfer_shift = 20;
1191 }
1192 nvme->volatile_write_cache = ctrl->vwc;
1193 nvme->write_zeroes = !!(rt_le64_to_cpu(ctrl->oncs) & RT_NVME_CTRL_ONCS_WRITE_ZEROES);
1194
1195 if ((rt_le32_to_cpu(ctrl->sgls) & RT_NVME_ID_SGL_SUPPORT_MASK))
1196 {
1197 nvme->sgl_mode = RT_NVME_PSDT_SGL_MPTR_SGL;
1198 }
1199
1200 LOG_I("NVM Express v%d.%d (%s, %-*.s, %-*.s)",
1201 nvme_readl(nvme, RT_NVME_REG_VS) >> 16,
1202 nvme_readl(nvme, RT_NVME_REG_VS) & 0xff,
1203 nvme->ops->name,
1204 strip_len(ctrl->mn, sizeof(ctrl->mn)), ctrl->mn,
1205 strip_len(ctrl->fr, sizeof(ctrl->fr)), ctrl->fr);
1206
1207 rt_list_init(&nvme->ns_nodes);
1208 if ((err = nvme_scan_device(nvme, rt_le32_to_cpu(ctrl->nn))))
1209 {
1210 goto _fail;
1211 }
1212
1213 rt_free_align(ctrl);
1214
1215 rt_spin_lock(&nvme_lock);
1216 rt_list_insert_after(&nvme_nodes, &nvme->list);
1217 rt_spin_unlock(&nvme_lock);
1218
1219 return RT_EOK;
1220
1221 _fail:
1222 if (ctrl)
1223 {
1224 rt_free_align(ctrl);
1225 }
1226 nvme_remove_devices(nvme);
1227 nvme_remove_io_queues(nvme);
1228 _free_admin_queue:
1229 nvme_remove_admin_queues(nvme);
1230
1231 rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
1232
1233 return err;
1234 }
1235
rt_nvme_controller_unregister(struct rt_nvme_controller * nvme)1236 rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme)
1237 {
1238 rt_err_t err;
1239
1240 if (!nvme)
1241 {
1242 return -RT_EINVAL;
1243 }
1244
1245 rt_spin_lock(&nvme_lock);
1246 rt_list_remove(&nvme->list);
1247 rt_spin_unlock(&nvme_lock);
1248
1249 nvme_remove_devices(nvme);
1250 nvme_remove_io_queues(nvme);
1251 nvme_remove_admin_queues(nvme);
1252
1253 rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
1254
1255 if (!(err = nvme_shutdown_ctrl(nvme)))
1256 {
1257 err = nvme_disable_ctrl(nvme);
1258 }
1259 else
1260 {
1261 LOG_E("%s: shutdown error = %s", nvme->name, rt_strerror(err));
1262 }
1263
1264 return err;
1265 }
1266
1267 /*
1268 * NVME's IO queue should be Per-CPU, fixup the affinity after the secondary CPU
1269 * startup, this stage can make sure the affinity setting success as possible.
1270 */
nvme_queue_affinify_fixup(void)1271 static int nvme_queue_affinify_fixup(void)
1272 {
1273 int cpuid = rt_hw_cpu_id();
1274 struct rt_nvme_controller *nvme;
1275 RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
1276 RT_IRQ_AFFINITY_DECLARE(current_affinity) = { 0 };
1277
1278 RT_IRQ_AFFINITY_SET(affinity, cpuid);
1279
1280 rt_hw_spin_lock(&nvme_lock.lock);
1281 rt_list_for_each_entry(nvme, &nvme_nodes, list)
1282 {
1283 for (int i = cpuid % RT_CPUS_NR; i < nvme->io_queue_max; i += RT_CPUS_NR)
1284 {
1285 int irq = nvme->irqs[i];
1286
1287 if (!rt_pic_irq_get_affinity(irq, current_affinity) &&
1288 !rt_bitmap_test_bit(current_affinity, cpuid))
1289 {
1290 rt_ubase_t level = rt_hw_interrupt_disable();
1291
1292 rt_pic_irq_set_affinity(irq, affinity);
1293
1294 rt_hw_interrupt_enable(level);
1295 }
1296 }
1297 }
1298 rt_hw_spin_unlock(&nvme_lock.lock);
1299
1300 return 0;
1301 }
1302 INIT_SECONDARY_CPU_EXPORT(nvme_queue_affinify_fixup);
1303