1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2
3 #include <linux/aer.h>
4 #include <linux/bitmap.h>
5 #include <linux/delay.h>
6 #include <linux/interrupt.h>
7 #include <linux/io.h>
8 #include <linux/io-64-nonatomic-lo-hi.h>
9 #include <linux/mm.h>
10 #include <linux/module.h>
11 #include <linux/nvme.h>
12 #include <linux/pci.h>
13 #include <linux/wait.h>
14 #include <linux/sched/signal.h>
15
16 #include "fun_queue.h"
17 #include "fun_dev.h"
18
19 #define FUN_ADMIN_CMD_TO_MS 3000
20
21 enum {
22 AQA_ASQS_SHIFT = 0,
23 AQA_ACQS_SHIFT = 16,
24 AQA_MIN_QUEUE_SIZE = 2,
25 AQA_MAX_QUEUE_SIZE = 4096
26 };
27
28 /* context for admin commands */
29 struct fun_cmd_ctx {
30 fun_admin_callback_t cb; /* callback to invoke on completion */
31 void *cb_data; /* user data provided to callback */
32 int cpu; /* CPU where the cmd's tag was allocated */
33 };
34
35 /* Context for synchronous admin commands. */
36 struct fun_sync_cmd_ctx {
37 struct completion compl;
38 u8 *rsp_buf; /* caller provided response buffer */
39 unsigned int rsp_len; /* response buffer size */
40 u8 rsp_status; /* command response status */
41 };
42
43 /* Wait for the CSTS.RDY bit to match @enabled. */
fun_wait_ready(struct fun_dev * fdev,bool enabled)44 static int fun_wait_ready(struct fun_dev *fdev, bool enabled)
45 {
46 unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg);
47 u32 bit = enabled ? NVME_CSTS_RDY : 0;
48 unsigned long deadline;
49
50 deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */
51
52 for (;;) {
53 u32 csts = readl(fdev->bar + NVME_REG_CSTS);
54
55 if (csts == ~0) {
56 dev_err(fdev->dev, "CSTS register read %#x\n", csts);
57 return -EIO;
58 }
59
60 if ((csts & NVME_CSTS_RDY) == bit)
61 return 0;
62
63 if (time_is_before_jiffies(deadline))
64 break;
65
66 msleep(100);
67 }
68
69 dev_err(fdev->dev,
70 "Timed out waiting for device to indicate RDY %u; aborting %s\n",
71 enabled, enabled ? "initialization" : "reset");
72 return -ETIMEDOUT;
73 }
74
75 /* Check CSTS and return an error if it is unreadable or has unexpected
76 * RDY value.
77 */
fun_check_csts_rdy(struct fun_dev * fdev,unsigned int expected_rdy)78 static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy)
79 {
80 u32 csts = readl(fdev->bar + NVME_REG_CSTS);
81 u32 actual_rdy = csts & NVME_CSTS_RDY;
82
83 if (csts == ~0) {
84 dev_err(fdev->dev, "CSTS register read %#x\n", csts);
85 return -EIO;
86 }
87 if (actual_rdy != expected_rdy) {
88 dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy);
89 return -EINVAL;
90 }
91 return 0;
92 }
93
94 /* Check that CSTS RDY has the expected value. Then write a new value to the CC
95 * register and wait for CSTS RDY to match the new CC ENABLE state.
96 */
fun_update_cc_enable(struct fun_dev * fdev,unsigned int initial_rdy)97 static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy)
98 {
99 int rc = fun_check_csts_rdy(fdev, initial_rdy);
100
101 if (rc)
102 return rc;
103 writel(fdev->cc_reg, fdev->bar + NVME_REG_CC);
104 return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE));
105 }
106
fun_disable_ctrl(struct fun_dev * fdev)107 static int fun_disable_ctrl(struct fun_dev *fdev)
108 {
109 fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE);
110 return fun_update_cc_enable(fdev, 1);
111 }
112
fun_enable_ctrl(struct fun_dev * fdev,u32 admin_cqesz_log2,u32 admin_sqesz_log2)113 static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2,
114 u32 admin_sqesz_log2)
115 {
116 fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) |
117 (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) |
118 ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) |
119 NVME_CC_ENABLE;
120
121 return fun_update_cc_enable(fdev, 0);
122 }
123
fun_map_bars(struct fun_dev * fdev,const char * name)124 static int fun_map_bars(struct fun_dev *fdev, const char *name)
125 {
126 struct pci_dev *pdev = to_pci_dev(fdev->dev);
127 int err;
128
129 err = pci_request_mem_regions(pdev, name);
130 if (err) {
131 dev_err(&pdev->dev,
132 "Couldn't get PCI memory resources, err %d\n", err);
133 return err;
134 }
135
136 fdev->bar = pci_ioremap_bar(pdev, 0);
137 if (!fdev->bar) {
138 dev_err(&pdev->dev, "Couldn't map BAR 0\n");
139 pci_release_mem_regions(pdev);
140 return -ENOMEM;
141 }
142
143 return 0;
144 }
145
fun_unmap_bars(struct fun_dev * fdev)146 static void fun_unmap_bars(struct fun_dev *fdev)
147 {
148 struct pci_dev *pdev = to_pci_dev(fdev->dev);
149
150 if (fdev->bar) {
151 iounmap(fdev->bar);
152 fdev->bar = NULL;
153 pci_release_mem_regions(pdev);
154 }
155 }
156
fun_set_dma_masks(struct device * dev)157 static int fun_set_dma_masks(struct device *dev)
158 {
159 int err;
160
161 err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
162 if (err)
163 dev_err(dev, "DMA mask configuration failed, err %d\n", err);
164 return err;
165 }
166
fun_admin_irq(int irq,void * data)167 static irqreturn_t fun_admin_irq(int irq, void *data)
168 {
169 struct fun_queue *funq = data;
170
171 return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE;
172 }
173
fun_complete_admin_cmd(struct fun_queue * funq,void * data,void * entry,const struct fun_cqe_info * info)174 static void fun_complete_admin_cmd(struct fun_queue *funq, void *data,
175 void *entry, const struct fun_cqe_info *info)
176 {
177 const struct fun_admin_rsp_common *rsp_common = entry;
178 struct fun_dev *fdev = funq->fdev;
179 struct fun_cmd_ctx *cmd_ctx;
180 int cpu;
181 u16 cid;
182
183 if (info->sqhd == cpu_to_be16(0xffff)) {
184 dev_dbg(fdev->dev, "adminq event");
185 if (fdev->adminq_cb)
186 fdev->adminq_cb(fdev, entry);
187 return;
188 }
189
190 cid = be16_to_cpu(rsp_common->cid);
191 dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid,
192 rsp_common->op, rsp_common->ret);
193
194 cmd_ctx = &fdev->cmd_ctx[cid];
195 if (cmd_ctx->cpu < 0) {
196 dev_err(fdev->dev,
197 "admin CQE with CID=%u, op=%u does not match a pending command\n",
198 cid, rsp_common->op);
199 return;
200 }
201
202 if (cmd_ctx->cb)
203 cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL));
204
205 cpu = cmd_ctx->cpu;
206 cmd_ctx->cpu = -1;
207 sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu);
208 }
209
fun_init_cmd_ctx(struct fun_dev * fdev,unsigned int ntags)210 static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags)
211 {
212 unsigned int i;
213
214 fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL);
215 if (!fdev->cmd_ctx)
216 return -ENOMEM;
217
218 for (i = 0; i < ntags; i++)
219 fdev->cmd_ctx[i].cpu = -1;
220
221 return 0;
222 }
223
224 /* Allocate and enable an admin queue and assign it the first IRQ vector. */
fun_enable_admin_queue(struct fun_dev * fdev,const struct fun_dev_params * areq)225 static int fun_enable_admin_queue(struct fun_dev *fdev,
226 const struct fun_dev_params *areq)
227 {
228 struct fun_queue_alloc_req qreq = {
229 .cqe_size_log2 = areq->cqe_size_log2,
230 .sqe_size_log2 = areq->sqe_size_log2,
231 .cq_depth = areq->cq_depth,
232 .sq_depth = areq->sq_depth,
233 .rq_depth = areq->rq_depth,
234 };
235 unsigned int ntags = areq->sq_depth - 1;
236 struct fun_queue *funq;
237 int rc;
238
239 if (fdev->admin_q)
240 return -EEXIST;
241
242 if (areq->sq_depth < AQA_MIN_QUEUE_SIZE ||
243 areq->sq_depth > AQA_MAX_QUEUE_SIZE ||
244 areq->cq_depth < AQA_MIN_QUEUE_SIZE ||
245 areq->cq_depth > AQA_MAX_QUEUE_SIZE)
246 return -EINVAL;
247
248 fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq);
249 if (!fdev->admin_q)
250 return -ENOMEM;
251
252 rc = fun_init_cmd_ctx(fdev, ntags);
253 if (rc)
254 goto free_q;
255
256 rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false,
257 GFP_KERNEL, dev_to_node(fdev->dev));
258 if (rc)
259 goto free_cmd_ctx;
260
261 funq = fdev->admin_q;
262 funq->cq_vector = 0;
263 rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq);
264 if (rc)
265 goto free_sbq;
266
267 fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL);
268 fdev->adminq_cb = areq->event_cb;
269
270 writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT |
271 (funq->cq_depth - 1) << AQA_ACQS_SHIFT,
272 fdev->bar + NVME_REG_AQA);
273
274 writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ);
275 writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ);
276
277 rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2);
278 if (rc)
279 goto free_irq;
280
281 if (areq->rq_depth) {
282 rc = fun_create_rq(funq);
283 if (rc)
284 goto disable_ctrl;
285
286 funq_rq_post(funq);
287 }
288
289 return 0;
290
291 disable_ctrl:
292 fun_disable_ctrl(fdev);
293 free_irq:
294 fun_free_irq(funq);
295 free_sbq:
296 sbitmap_queue_free(&fdev->admin_sbq);
297 free_cmd_ctx:
298 kvfree(fdev->cmd_ctx);
299 fdev->cmd_ctx = NULL;
300 free_q:
301 fun_free_queue(fdev->admin_q);
302 fdev->admin_q = NULL;
303 return rc;
304 }
305
fun_disable_admin_queue(struct fun_dev * fdev)306 static void fun_disable_admin_queue(struct fun_dev *fdev)
307 {
308 struct fun_queue *admq = fdev->admin_q;
309
310 if (!admq)
311 return;
312
313 fun_disable_ctrl(fdev);
314
315 fun_free_irq(admq);
316 __fun_process_cq(admq, 0);
317
318 sbitmap_queue_free(&fdev->admin_sbq);
319
320 kvfree(fdev->cmd_ctx);
321 fdev->cmd_ctx = NULL;
322
323 fun_free_queue(admq);
324 fdev->admin_q = NULL;
325 }
326
327 /* Return %true if the admin queue has stopped servicing commands as can be
328 * detected through registers. This isn't exhaustive and may provide false
329 * negatives.
330 */
fun_adminq_stopped(struct fun_dev * fdev)331 static bool fun_adminq_stopped(struct fun_dev *fdev)
332 {
333 u32 csts = readl(fdev->bar + NVME_REG_CSTS);
334
335 return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY;
336 }
337
fun_wait_for_tag(struct fun_dev * fdev,int * cpup)338 static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup)
339 {
340 struct sbitmap_queue *sbq = &fdev->admin_sbq;
341 struct sbq_wait_state *ws = &sbq->ws[0];
342 DEFINE_SBQ_WAIT(wait);
343 int tag;
344
345 for (;;) {
346 sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE);
347 if (fdev->suppress_cmds) {
348 tag = -ESHUTDOWN;
349 break;
350 }
351 tag = sbitmap_queue_get(sbq, cpup);
352 if (tag >= 0)
353 break;
354 schedule();
355 }
356
357 sbitmap_finish_wait(sbq, ws, &wait);
358 return tag;
359 }
360
361 /* Submit an asynchronous admin command. Caller is responsible for implementing
362 * any waiting or timeout. Upon command completion the callback @cb is called.
363 */
fun_submit_admin_cmd(struct fun_dev * fdev,struct fun_admin_req_common * cmd,fun_admin_callback_t cb,void * cb_data,bool wait_ok)364 int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd,
365 fun_admin_callback_t cb, void *cb_data, bool wait_ok)
366 {
367 struct fun_queue *funq = fdev->admin_q;
368 unsigned int cmdsize = cmd->len8 * 8;
369 struct fun_cmd_ctx *cmd_ctx;
370 int tag, cpu, rc = 0;
371
372 if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2)))
373 return -EMSGSIZE;
374
375 tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu);
376 if (tag < 0) {
377 if (!wait_ok)
378 return -EAGAIN;
379 tag = fun_wait_for_tag(fdev, &cpu);
380 if (tag < 0)
381 return tag;
382 }
383
384 cmd->cid = cpu_to_be16(tag);
385
386 cmd_ctx = &fdev->cmd_ctx[tag];
387 cmd_ctx->cb = cb;
388 cmd_ctx->cb_data = cb_data;
389
390 spin_lock(&funq->sq_lock);
391
392 if (unlikely(fdev->suppress_cmds)) {
393 rc = -ESHUTDOWN;
394 sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu);
395 } else {
396 cmd_ctx->cpu = cpu;
397 memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize);
398
399 dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail,
400 cmd);
401
402 if (++funq->sq_tail == funq->sq_depth)
403 funq->sq_tail = 0;
404 writel(funq->sq_tail, funq->sq_db);
405 }
406 spin_unlock(&funq->sq_lock);
407 return rc;
408 }
409
410 /* Abandon a pending admin command by clearing the issuer's callback data.
411 * Failure indicates that the command either has already completed or its
412 * completion is racing with this call.
413 */
fun_abandon_admin_cmd(struct fun_dev * fd,const struct fun_admin_req_common * cmd,void * cb_data)414 static bool fun_abandon_admin_cmd(struct fun_dev *fd,
415 const struct fun_admin_req_common *cmd,
416 void *cb_data)
417 {
418 u16 cid = be16_to_cpu(cmd->cid);
419 struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid];
420
421 return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data;
422 }
423
424 /* Stop submission of new admin commands and wake up any processes waiting for
425 * tags. Already submitted commands are left to complete or time out.
426 */
fun_admin_stop(struct fun_dev * fdev)427 static void fun_admin_stop(struct fun_dev *fdev)
428 {
429 spin_lock(&fdev->admin_q->sq_lock);
430 fdev->suppress_cmds = true;
431 spin_unlock(&fdev->admin_q->sq_lock);
432 sbitmap_queue_wake_all(&fdev->admin_sbq);
433 }
434
435 /* The callback for synchronous execution of admin commands. It copies the
436 * command response to the caller's buffer and signals completion.
437 */
fun_admin_cmd_sync_cb(struct fun_dev * fd,void * rsp,void * cb_data)438 static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data)
439 {
440 const struct fun_admin_rsp_common *rsp_common = rsp;
441 struct fun_sync_cmd_ctx *ctx = cb_data;
442
443 if (!ctx)
444 return; /* command issuer timed out and left */
445 if (ctx->rsp_buf) {
446 unsigned int rsp_len = rsp_common->len8 * 8;
447
448 if (unlikely(rsp_len > ctx->rsp_len)) {
449 dev_err(fd->dev,
450 "response for op %u is %uB > response buffer %uB\n",
451 rsp_common->op, rsp_len, ctx->rsp_len);
452 rsp_len = ctx->rsp_len;
453 }
454 memcpy(ctx->rsp_buf, rsp, rsp_len);
455 }
456 ctx->rsp_status = rsp_common->ret;
457 complete(&ctx->compl);
458 }
459
460 /* Submit a synchronous admin command. */
fun_submit_admin_sync_cmd(struct fun_dev * fdev,struct fun_admin_req_common * cmd,void * rsp,size_t rspsize,unsigned int timeout)461 int fun_submit_admin_sync_cmd(struct fun_dev *fdev,
462 struct fun_admin_req_common *cmd, void *rsp,
463 size_t rspsize, unsigned int timeout)
464 {
465 struct fun_sync_cmd_ctx ctx = {
466 .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl),
467 .rsp_buf = rsp,
468 .rsp_len = rspsize,
469 };
470 unsigned int cmdlen = cmd->len8 * 8;
471 unsigned long jiffies_left;
472 int ret;
473
474 ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx,
475 true);
476 if (ret)
477 return ret;
478
479 if (!timeout)
480 timeout = FUN_ADMIN_CMD_TO_MS;
481
482 jiffies_left = wait_for_completion_timeout(&ctx.compl,
483 msecs_to_jiffies(timeout));
484 if (!jiffies_left) {
485 /* The command timed out. Attempt to cancel it so we can return.
486 * But if the command is in the process of completing we'll
487 * wait for it.
488 */
489 if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) {
490 dev_err(fdev->dev, "admin command timed out: %*ph\n",
491 cmdlen, cmd);
492 fun_admin_stop(fdev);
493 /* see if the timeout was due to a queue failure */
494 if (fun_adminq_stopped(fdev))
495 dev_err(fdev->dev,
496 "device does not accept admin commands\n");
497
498 return -ETIMEDOUT;
499 }
500 wait_for_completion(&ctx.compl);
501 }
502
503 if (ctx.rsp_status) {
504 dev_err(fdev->dev, "admin command failed, err %d: %*ph\n",
505 ctx.rsp_status, cmdlen, cmd);
506 }
507
508 return -ctx.rsp_status;
509 }
510 EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd);
511
512 /* Return the number of device resources of the requested type. */
fun_get_res_count(struct fun_dev * fdev,enum fun_admin_op res)513 int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res)
514 {
515 union {
516 struct fun_admin_res_count_req req;
517 struct fun_admin_res_count_rsp rsp;
518 } cmd;
519 int rc;
520
521 cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req));
522 cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT,
523 0, 0);
524
525 rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp,
526 sizeof(cmd), 0);
527 return rc ? rc : be32_to_cpu(cmd.rsp.count.data);
528 }
529 EXPORT_SYMBOL_GPL(fun_get_res_count);
530
531 /* Request that the instance of resource @res with the given id be deleted. */
fun_res_destroy(struct fun_dev * fdev,enum fun_admin_op res,unsigned int flags,u32 id)532 int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res,
533 unsigned int flags, u32 id)
534 {
535 struct fun_admin_generic_destroy_req req = {
536 .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)),
537 .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY,
538 flags, id)
539 };
540
541 return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
542 }
543 EXPORT_SYMBOL_GPL(fun_res_destroy);
544
545 /* Bind two entities of the given types and IDs. */
fun_bind(struct fun_dev * fdev,enum fun_admin_bind_type type0,unsigned int id0,enum fun_admin_bind_type type1,unsigned int id1)546 int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
547 unsigned int id0, enum fun_admin_bind_type type1,
548 unsigned int id1)
549 {
550 struct {
551 struct fun_admin_bind_req req;
552 struct fun_admin_bind_entry entry[2];
553 } cmd = {
554 .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND,
555 sizeof(cmd)),
556 .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0),
557 .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1),
558 };
559
560 return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0);
561 }
562 EXPORT_SYMBOL_GPL(fun_bind);
563
fun_get_dev_limits(struct fun_dev * fdev)564 static int fun_get_dev_limits(struct fun_dev *fdev)
565 {
566 struct pci_dev *pdev = to_pci_dev(fdev->dev);
567 unsigned int cq_count, sq_count, num_dbs;
568 int rc;
569
570 rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ);
571 if (rc < 0)
572 return rc;
573 cq_count = rc;
574
575 rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ);
576 if (rc < 0)
577 return rc;
578 sq_count = rc;
579
580 /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the
581 * device must provide additional queues.
582 */
583 if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth)
584 return -EINVAL;
585
586 /* Calculate the max QID based on SQ/CQ/doorbell counts.
587 * SQ/CQ doorbells alternate.
588 */
589 num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >>
590 (2 + NVME_CAP_STRIDE(fdev->cap_reg));
591 fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1;
592 fdev->kern_end_qid = fdev->max_qid + 1;
593 return 0;
594 }
595
596 /* Allocate all MSI-X vectors available on a function and at least @min_vecs. */
fun_alloc_irqs(struct pci_dev * pdev,unsigned int min_vecs)597 static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs)
598 {
599 int vecs, num_msix = pci_msix_vec_count(pdev);
600
601 if (num_msix < 0)
602 return num_msix;
603 if (min_vecs > num_msix)
604 return -ERANGE;
605
606 vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX);
607 if (vecs > 0) {
608 dev_info(&pdev->dev,
609 "Allocated %d IRQ vectors of %d requested\n",
610 vecs, num_msix);
611 } else {
612 dev_err(&pdev->dev,
613 "Unable to allocate at least %u IRQ vectors\n",
614 min_vecs);
615 }
616 return vecs;
617 }
618
619 /* Allocate and initialize the IRQ manager state. */
fun_alloc_irq_mgr(struct fun_dev * fdev)620 static int fun_alloc_irq_mgr(struct fun_dev *fdev)
621 {
622 fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL);
623 if (!fdev->irq_map)
624 return -ENOMEM;
625
626 spin_lock_init(&fdev->irqmgr_lock);
627 /* mark IRQ 0 allocated, it is used by the admin queue */
628 __set_bit(0, fdev->irq_map);
629 fdev->irqs_avail = fdev->num_irqs - 1;
630 return 0;
631 }
632
633 /* Reserve @nirqs of the currently available IRQs and return their indices. */
fun_reserve_irqs(struct fun_dev * fdev,unsigned int nirqs,u16 * irq_indices)634 int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices)
635 {
636 unsigned int b, n = 0;
637 int err = -ENOSPC;
638
639 if (!nirqs)
640 return 0;
641
642 spin_lock(&fdev->irqmgr_lock);
643 if (nirqs > fdev->irqs_avail)
644 goto unlock;
645
646 for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) {
647 __set_bit(b, fdev->irq_map);
648 irq_indices[n++] = b;
649 if (n >= nirqs)
650 break;
651 }
652
653 WARN_ON(n < nirqs);
654 fdev->irqs_avail -= n;
655 err = n;
656 unlock:
657 spin_unlock(&fdev->irqmgr_lock);
658 return err;
659 }
660 EXPORT_SYMBOL(fun_reserve_irqs);
661
662 /* Release @nirqs previously allocated IRQS with the supplied indices. */
fun_release_irqs(struct fun_dev * fdev,unsigned int nirqs,u16 * irq_indices)663 void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs,
664 u16 *irq_indices)
665 {
666 unsigned int i;
667
668 spin_lock(&fdev->irqmgr_lock);
669 for (i = 0; i < nirqs; i++)
670 __clear_bit(irq_indices[i], fdev->irq_map);
671 fdev->irqs_avail += nirqs;
672 spin_unlock(&fdev->irqmgr_lock);
673 }
674 EXPORT_SYMBOL(fun_release_irqs);
675
fun_serv_handler(struct work_struct * work)676 static void fun_serv_handler(struct work_struct *work)
677 {
678 struct fun_dev *fd = container_of(work, struct fun_dev, service_task);
679
680 if (test_bit(FUN_SERV_DISABLED, &fd->service_flags))
681 return;
682 if (fd->serv_cb)
683 fd->serv_cb(fd);
684 }
685
fun_serv_stop(struct fun_dev * fd)686 void fun_serv_stop(struct fun_dev *fd)
687 {
688 set_bit(FUN_SERV_DISABLED, &fd->service_flags);
689 cancel_work_sync(&fd->service_task);
690 }
691 EXPORT_SYMBOL_GPL(fun_serv_stop);
692
fun_serv_restart(struct fun_dev * fd)693 void fun_serv_restart(struct fun_dev *fd)
694 {
695 clear_bit(FUN_SERV_DISABLED, &fd->service_flags);
696 if (fd->service_flags)
697 schedule_work(&fd->service_task);
698 }
699 EXPORT_SYMBOL_GPL(fun_serv_restart);
700
fun_serv_sched(struct fun_dev * fd)701 void fun_serv_sched(struct fun_dev *fd)
702 {
703 if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags))
704 schedule_work(&fd->service_task);
705 }
706 EXPORT_SYMBOL_GPL(fun_serv_sched);
707
708 /* Check and try to get the device into a proper state for initialization,
709 * i.e., CSTS.RDY = CC.EN = 0.
710 */
sanitize_dev(struct fun_dev * fdev)711 static int sanitize_dev(struct fun_dev *fdev)
712 {
713 int rc;
714
715 fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP);
716 fdev->cc_reg = readl(fdev->bar + NVME_REG_CC);
717
718 /* First get RDY to agree with the current EN. Give RDY the opportunity
719 * to complete a potential recent EN change.
720 */
721 rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE);
722 if (rc)
723 return rc;
724
725 /* Next, reset the device if EN is currently 1. */
726 if (fdev->cc_reg & NVME_CC_ENABLE)
727 rc = fun_disable_ctrl(fdev);
728
729 return rc;
730 }
731
732 /* Undo the device initialization of fun_dev_enable(). */
fun_dev_disable(struct fun_dev * fdev)733 void fun_dev_disable(struct fun_dev *fdev)
734 {
735 struct pci_dev *pdev = to_pci_dev(fdev->dev);
736
737 pci_set_drvdata(pdev, NULL);
738
739 if (fdev->fw_handle != FUN_HCI_ID_INVALID) {
740 fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0,
741 fdev->fw_handle);
742 fdev->fw_handle = FUN_HCI_ID_INVALID;
743 }
744
745 fun_disable_admin_queue(fdev);
746
747 bitmap_free(fdev->irq_map);
748 pci_free_irq_vectors(pdev);
749
750 pci_clear_master(pdev);
751 pci_disable_pcie_error_reporting(pdev);
752 pci_disable_device(pdev);
753
754 fun_unmap_bars(fdev);
755 }
756 EXPORT_SYMBOL(fun_dev_disable);
757
758 /* Perform basic initialization of a device, including
759 * - PCI config space setup and BAR0 mapping
760 * - interrupt management initialization
761 * - 1 admin queue setup
762 * - determination of some device limits, such as number of queues.
763 */
fun_dev_enable(struct fun_dev * fdev,struct pci_dev * pdev,const struct fun_dev_params * areq,const char * name)764 int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
765 const struct fun_dev_params *areq, const char *name)
766 {
767 int rc;
768
769 fdev->dev = &pdev->dev;
770 rc = fun_map_bars(fdev, name);
771 if (rc)
772 return rc;
773
774 rc = fun_set_dma_masks(fdev->dev);
775 if (rc)
776 goto unmap;
777
778 rc = pci_enable_device_mem(pdev);
779 if (rc) {
780 dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc);
781 goto unmap;
782 }
783
784 pci_enable_pcie_error_reporting(pdev);
785
786 rc = sanitize_dev(fdev);
787 if (rc)
788 goto disable_dev;
789
790 fdev->fw_handle = FUN_HCI_ID_INVALID;
791 fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1;
792 fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg);
793 fdev->dbs = fdev->bar + NVME_REG_DBS;
794
795 INIT_WORK(&fdev->service_task, fun_serv_handler);
796 fdev->service_flags = FUN_SERV_DISABLED;
797 fdev->serv_cb = areq->serv_cb;
798
799 rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */
800 if (rc < 0)
801 goto disable_dev;
802 fdev->num_irqs = rc;
803
804 rc = fun_alloc_irq_mgr(fdev);
805 if (rc)
806 goto free_irqs;
807
808 pci_set_master(pdev);
809 rc = fun_enable_admin_queue(fdev, areq);
810 if (rc)
811 goto free_irq_mgr;
812
813 rc = fun_get_dev_limits(fdev);
814 if (rc < 0)
815 goto disable_admin;
816
817 pci_save_state(pdev);
818 pci_set_drvdata(pdev, fdev);
819 pcie_print_link_status(pdev);
820 dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n",
821 fdev->q_depth, fdev->db_stride, fdev->max_qid,
822 fdev->kern_end_qid);
823 return 0;
824
825 disable_admin:
826 fun_disable_admin_queue(fdev);
827 free_irq_mgr:
828 pci_clear_master(pdev);
829 bitmap_free(fdev->irq_map);
830 free_irqs:
831 pci_free_irq_vectors(pdev);
832 disable_dev:
833 pci_disable_pcie_error_reporting(pdev);
834 pci_disable_device(pdev);
835 unmap:
836 fun_unmap_bars(fdev);
837 return rc;
838 }
839 EXPORT_SYMBOL(fun_dev_enable);
840
841 MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
842 MODULE_DESCRIPTION("Core services driver for Fungible devices");
843 MODULE_LICENSE("Dual BSD/GPL");
844