1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/smp.h>
5 #include "dr_types.h"
6 
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11 
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13 
14 struct dr_data_seg {
15 	u64 addr;
16 	u32 length;
17 	u32 lkey;
18 	unsigned int send_flags;
19 };
20 
21 struct postsend_info {
22 	struct dr_data_seg write;
23 	struct dr_data_seg read;
24 	u64 remote_addr;
25 	u32 rkey;
26 };
27 
28 struct dr_qp_rtr_attr {
29 	struct mlx5dr_cmd_gid_attr dgid_attr;
30 	enum ib_mtu mtu;
31 	u32 qp_num;
32 	u16 port_num;
33 	u8 min_rnr_timer;
34 	u8 sgid_index;
35 	u16 udp_src_port;
36 	u8 fl:1;
37 };
38 
39 struct dr_qp_rts_attr {
40 	u8 timeout;
41 	u8 retry_cnt;
42 	u8 rnr_retry;
43 };
44 
45 struct dr_qp_init_attr {
46 	u32 cqn;
47 	u32 pdn;
48 	u32 max_send_wr;
49 	struct mlx5_uars_page *uar;
50 	u8 isolate_vl_tc:1;
51 };
52 
53 struct mlx5dr_send_info_pool_obj {
54 	struct mlx5dr_ste_send_info ste_send_info;
55 	struct mlx5dr_send_info_pool *pool;
56 	struct list_head list_node;
57 };
58 
59 struct mlx5dr_send_info_pool {
60 	struct list_head free_list;
61 };
62 
dr_send_info_pool_fill(struct mlx5dr_send_info_pool * pool)63 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
64 {
65 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
66 	int i;
67 
68 	for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
69 		pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
70 		if (!pool_obj)
71 			goto clean_pool;
72 
73 		pool_obj->pool = pool;
74 		list_add_tail(&pool_obj->list_node, &pool->free_list);
75 	}
76 
77 	return 0;
78 
79 clean_pool:
80 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
81 		list_del(&pool_obj->list_node);
82 		kfree(pool_obj);
83 	}
84 
85 	return -ENOMEM;
86 }
87 
dr_send_info_pool_destroy(struct mlx5dr_send_info_pool * pool)88 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
89 {
90 	struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
91 
92 	list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
93 		list_del(&pool_obj->list_node);
94 		kfree(pool_obj);
95 	}
96 
97 	kfree(pool);
98 }
99 
mlx5dr_send_info_pool_destroy(struct mlx5dr_domain * dmn)100 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
101 {
102 	dr_send_info_pool_destroy(dmn->send_info_pool_tx);
103 	dr_send_info_pool_destroy(dmn->send_info_pool_rx);
104 }
105 
dr_send_info_pool_create(void)106 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
107 {
108 	struct mlx5dr_send_info_pool *pool;
109 	int ret;
110 
111 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
112 	if (!pool)
113 		return NULL;
114 
115 	INIT_LIST_HEAD(&pool->free_list);
116 
117 	ret = dr_send_info_pool_fill(pool);
118 	if (ret) {
119 		kfree(pool);
120 		return NULL;
121 	}
122 
123 	return pool;
124 }
125 
mlx5dr_send_info_pool_create(struct mlx5dr_domain * dmn)126 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
127 {
128 	dmn->send_info_pool_rx = dr_send_info_pool_create();
129 	if (!dmn->send_info_pool_rx)
130 		return -ENOMEM;
131 
132 	dmn->send_info_pool_tx = dr_send_info_pool_create();
133 	if (!dmn->send_info_pool_tx) {
134 		dr_send_info_pool_destroy(dmn->send_info_pool_rx);
135 		return -ENOMEM;
136 	}
137 
138 	return 0;
139 }
140 
141 struct mlx5dr_ste_send_info
mlx5dr_send_info_alloc(struct mlx5dr_domain * dmn,enum mlx5dr_domain_nic_type nic_type)142 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
143 			enum mlx5dr_domain_nic_type nic_type)
144 {
145 	struct mlx5dr_send_info_pool_obj *pool_obj;
146 	struct mlx5dr_send_info_pool *pool;
147 	int ret;
148 
149 	pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
150 						   dmn->send_info_pool_tx;
151 
152 	if (unlikely(list_empty(&pool->free_list))) {
153 		ret = dr_send_info_pool_fill(pool);
154 		if (ret)
155 			return NULL;
156 	}
157 
158 	pool_obj = list_first_entry_or_null(&pool->free_list,
159 					    struct mlx5dr_send_info_pool_obj,
160 					    list_node);
161 
162 	if (likely(pool_obj)) {
163 		list_del_init(&pool_obj->list_node);
164 	} else {
165 		WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
166 		return NULL;
167 	}
168 
169 	return &pool_obj->ste_send_info;
170 }
171 
mlx5dr_send_info_free(struct mlx5dr_ste_send_info * ste_send_info)172 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
173 {
174 	struct mlx5dr_send_info_pool_obj *pool_obj;
175 
176 	pool_obj = container_of(ste_send_info,
177 				struct mlx5dr_send_info_pool_obj,
178 				ste_send_info);
179 
180 	list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
181 }
182 
dr_parse_cqe(struct mlx5dr_cq * dr_cq,struct mlx5_cqe64 * cqe64)183 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
184 {
185 	unsigned int idx;
186 	u8 opcode;
187 
188 	opcode = get_cqe_opcode(cqe64);
189 	if (opcode == MLX5_CQE_REQ_ERR) {
190 		idx = be16_to_cpu(cqe64->wqe_counter) &
191 			(dr_cq->qp->sq.wqe_cnt - 1);
192 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
193 	} else if (opcode == MLX5_CQE_RESP_ERR) {
194 		++dr_cq->qp->sq.cc;
195 	} else {
196 		idx = be16_to_cpu(cqe64->wqe_counter) &
197 			(dr_cq->qp->sq.wqe_cnt - 1);
198 		dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
199 
200 		return CQ_OK;
201 	}
202 
203 	return CQ_POLL_ERR;
204 }
205 
dr_cq_poll_one(struct mlx5dr_cq * dr_cq)206 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
207 {
208 	struct mlx5_cqe64 *cqe64;
209 	int err;
210 
211 	cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
212 	if (!cqe64) {
213 		if (unlikely(dr_cq->mdev->state ==
214 			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
215 			mlx5_core_dbg_once(dr_cq->mdev,
216 					   "Polling CQ while device is shutting down\n");
217 			return CQ_POLL_ERR;
218 		}
219 		return CQ_EMPTY;
220 	}
221 
222 	mlx5_cqwq_pop(&dr_cq->wq);
223 	err = dr_parse_cqe(dr_cq, cqe64);
224 	mlx5_cqwq_update_db_record(&dr_cq->wq);
225 
226 	return err;
227 }
228 
dr_poll_cq(struct mlx5dr_cq * dr_cq,int ne)229 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
230 {
231 	int npolled;
232 	int err = 0;
233 
234 	for (npolled = 0; npolled < ne; ++npolled) {
235 		err = dr_cq_poll_one(dr_cq);
236 		if (err != CQ_OK)
237 			break;
238 	}
239 
240 	return err == CQ_POLL_ERR ? err : npolled;
241 }
242 
dr_create_rc_qp(struct mlx5_core_dev * mdev,struct dr_qp_init_attr * attr)243 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
244 					 struct dr_qp_init_attr *attr)
245 {
246 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
247 	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
248 	struct mlx5_wq_param wqp;
249 	struct mlx5dr_qp *dr_qp;
250 	int inlen;
251 	void *qpc;
252 	void *in;
253 	int err;
254 
255 	dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
256 	if (!dr_qp)
257 		return NULL;
258 
259 	wqp.buf_numa_node = mdev->priv.numa_node;
260 	wqp.db_numa_node = mdev->priv.numa_node;
261 
262 	dr_qp->rq.pc = 0;
263 	dr_qp->rq.cc = 0;
264 	dr_qp->rq.wqe_cnt = 4;
265 	dr_qp->sq.pc = 0;
266 	dr_qp->sq.cc = 0;
267 	dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
268 
269 	MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
270 	MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
271 	MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
272 	err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
273 				&dr_qp->wq_ctrl);
274 	if (err) {
275 		mlx5_core_warn(mdev, "Can't create QP WQ\n");
276 		goto err_wq;
277 	}
278 
279 	dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
280 				     sizeof(dr_qp->sq.wqe_head[0]),
281 				     GFP_KERNEL);
282 
283 	if (!dr_qp->sq.wqe_head) {
284 		mlx5_core_warn(mdev, "Can't allocate wqe head\n");
285 		goto err_wqe_head;
286 	}
287 
288 	inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
289 		MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
290 		dr_qp->wq_ctrl.buf.npages;
291 	in = kvzalloc(inlen, GFP_KERNEL);
292 	if (!in) {
293 		err = -ENOMEM;
294 		goto err_in;
295 	}
296 
297 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
298 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
299 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
300 	MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
301 	MLX5_SET(qpc, qpc, pd, attr->pdn);
302 	MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
303 	MLX5_SET(qpc, qpc, log_page_size,
304 		 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
305 	MLX5_SET(qpc, qpc, fre, 1);
306 	MLX5_SET(qpc, qpc, rlky, 1);
307 	MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
308 	MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
309 	MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
310 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
311 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
312 	MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
313 	MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
314 	MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
315 	if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
316 		MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
317 	mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
318 				  (__be64 *)MLX5_ADDR_OF(create_qp_in,
319 							 in, pas));
320 
321 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
322 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
323 	dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
324 	kvfree(in);
325 	if (err)
326 		goto err_in;
327 	dr_qp->uar = attr->uar;
328 
329 	return dr_qp;
330 
331 err_in:
332 	kfree(dr_qp->sq.wqe_head);
333 err_wqe_head:
334 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
335 err_wq:
336 	kfree(dr_qp);
337 	return NULL;
338 }
339 
dr_destroy_qp(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp)340 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
341 			  struct mlx5dr_qp *dr_qp)
342 {
343 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
344 
345 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
346 	MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
347 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
348 
349 	kfree(dr_qp->sq.wqe_head);
350 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
351 	kfree(dr_qp);
352 }
353 
dr_cmd_notify_hw(struct mlx5dr_qp * dr_qp,void * ctrl)354 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
355 {
356 	dma_wmb();
357 	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
358 
359 	/* After wmb() the hw aware of new work */
360 	wmb();
361 
362 	mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
363 }
364 
dr_rdma_segments(struct mlx5dr_qp * dr_qp,u64 remote_addr,u32 rkey,struct dr_data_seg * data_seg,u32 opcode,bool notify_hw)365 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
366 			     u32 rkey, struct dr_data_seg *data_seg,
367 			     u32 opcode, bool notify_hw)
368 {
369 	struct mlx5_wqe_raddr_seg *wq_raddr;
370 	struct mlx5_wqe_ctrl_seg *wq_ctrl;
371 	struct mlx5_wqe_data_seg *wq_dseg;
372 	unsigned int size;
373 	unsigned int idx;
374 
375 	size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
376 		sizeof(*wq_raddr) / 16;
377 
378 	idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
379 
380 	wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
381 	wq_ctrl->imm = 0;
382 	wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
383 		MLX5_WQE_CTRL_CQ_UPDATE : 0;
384 	wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
385 						opcode);
386 	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
387 	wq_raddr = (void *)(wq_ctrl + 1);
388 	wq_raddr->raddr = cpu_to_be64(remote_addr);
389 	wq_raddr->rkey = cpu_to_be32(rkey);
390 	wq_raddr->reserved = 0;
391 
392 	wq_dseg = (void *)(wq_raddr + 1);
393 	wq_dseg->byte_count = cpu_to_be32(data_seg->length);
394 	wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
395 	wq_dseg->addr = cpu_to_be64(data_seg->addr);
396 
397 	dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
398 
399 	if (notify_hw)
400 		dr_cmd_notify_hw(dr_qp, wq_ctrl);
401 }
402 
dr_post_send(struct mlx5dr_qp * dr_qp,struct postsend_info * send_info)403 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
404 {
405 	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
406 			 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
407 	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
408 			 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
409 }
410 
411 /**
412  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
413  * with send_list parameters:
414  *
415  *     @ste:       The data that attached to this specific ste
416  *     @size:      of data to write
417  *     @offset:    of the data from start of the hw_ste entry
418  *     @data:      data
419  *     @ste_info:  ste to be sent with send_list
420  *     @send_list: to append into it
421  *     @copy_data: if true indicates that the data should be kept because
422  *                 it's not backuped any where (like in re-hash).
423  *                 if false, it lets the data to be updated after
424  *                 it was added to the list.
425  */
mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste * ste,u16 size,u16 offset,u8 * data,struct mlx5dr_ste_send_info * ste_info,struct list_head * send_list,bool copy_data)426 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
427 					       u16 offset, u8 *data,
428 					       struct mlx5dr_ste_send_info *ste_info,
429 					       struct list_head *send_list,
430 					       bool copy_data)
431 {
432 	ste_info->size = size;
433 	ste_info->ste = ste;
434 	ste_info->offset = offset;
435 
436 	if (copy_data) {
437 		memcpy(ste_info->data_cont, data, size);
438 		ste_info->data = ste_info->data_cont;
439 	} else {
440 		ste_info->data = data;
441 	}
442 
443 	list_add_tail(&ste_info->send_list, send_list);
444 }
445 
446 /* The function tries to consume one wc each time, unless the queue is full, in
447  * that case, which means that the hw is behind the sw in a full queue len
448  * the function will drain the cq till it empty.
449  */
dr_handle_pending_wc(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)450 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
451 				struct mlx5dr_send_ring *send_ring)
452 {
453 	bool is_drain = false;
454 	int ne;
455 
456 	if (send_ring->pending_wqe < send_ring->signal_th)
457 		return 0;
458 
459 	/* Queue is full start drain it */
460 	if (send_ring->pending_wqe >=
461 	    dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
462 		is_drain = true;
463 
464 	do {
465 		ne = dr_poll_cq(send_ring->cq, 1);
466 		if (unlikely(ne < 0)) {
467 			mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
468 					    send_ring->qp->qpn);
469 			send_ring->err_state = true;
470 			return ne;
471 		} else if (ne == 1) {
472 			send_ring->pending_wqe -= send_ring->signal_th;
473 		}
474 	} while (is_drain && send_ring->pending_wqe);
475 
476 	return 0;
477 }
478 
dr_fill_data_segs(struct mlx5dr_send_ring * send_ring,struct postsend_info * send_info)479 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
480 			      struct postsend_info *send_info)
481 {
482 	send_ring->pending_wqe++;
483 
484 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
485 		send_info->write.send_flags |= IB_SEND_SIGNALED;
486 
487 	send_ring->pending_wqe++;
488 	send_info->read.length = send_info->write.length;
489 	/* Read into the same write area */
490 	send_info->read.addr = (uintptr_t)send_info->write.addr;
491 	send_info->read.lkey = send_ring->mr->mkey;
492 
493 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
494 		send_info->read.send_flags = IB_SEND_SIGNALED;
495 	else
496 		send_info->read.send_flags = 0;
497 }
498 
dr_postsend_icm_data(struct mlx5dr_domain * dmn,struct postsend_info * send_info)499 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
500 				struct postsend_info *send_info)
501 {
502 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
503 	u32 buff_offset;
504 	int ret;
505 
506 	if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
507 		     send_ring->err_state)) {
508 		mlx5_core_dbg_once(dmn->mdev,
509 				   "Skipping post send: QP err state: %d, device state: %d\n",
510 				   send_ring->err_state, dmn->mdev->state);
511 		return 0;
512 	}
513 
514 	spin_lock(&send_ring->lock);
515 
516 	ret = dr_handle_pending_wc(dmn, send_ring);
517 	if (ret)
518 		goto out_unlock;
519 
520 	if (send_info->write.length > dmn->info.max_inline_size) {
521 		buff_offset = (send_ring->tx_head &
522 			       (dmn->send_ring->signal_th - 1)) *
523 			send_ring->max_post_send_size;
524 		/* Copy to ring mr */
525 		memcpy(send_ring->buf + buff_offset,
526 		       (void *)(uintptr_t)send_info->write.addr,
527 		       send_info->write.length);
528 		send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
529 		send_info->write.lkey = send_ring->mr->mkey;
530 	}
531 
532 	send_ring->tx_head++;
533 	dr_fill_data_segs(send_ring, send_info);
534 	dr_post_send(send_ring->qp, send_info);
535 
536 out_unlock:
537 	spin_unlock(&send_ring->lock);
538 	return ret;
539 }
540 
dr_get_tbl_copy_details(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 ** data,u32 * byte_size,int * iterations,int * num_stes)541 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
542 				   struct mlx5dr_ste_htbl *htbl,
543 				   u8 **data,
544 				   u32 *byte_size,
545 				   int *iterations,
546 				   int *num_stes)
547 {
548 	u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
549 	int alloc_size;
550 
551 	if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
552 		*iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
553 		*byte_size = dmn->send_ring->max_post_send_size;
554 		alloc_size = *byte_size;
555 		*num_stes = *byte_size / DR_STE_SIZE;
556 	} else {
557 		*iterations = 1;
558 		*num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
559 		alloc_size = *num_stes * DR_STE_SIZE;
560 	}
561 
562 	*data = kvzalloc(alloc_size, GFP_KERNEL);
563 	if (!*data)
564 		return -ENOMEM;
565 
566 	return 0;
567 }
568 
569 /**
570  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
571  *
572  *     @dmn:    Domain
573  *     @ste:    The ste struct that contains the data (at
574  *              least part of it)
575  *     @data:   The real data to send size data
576  *     @size:   for writing.
577  *     @offset: The offset from the icm mapped data to
578  *              start write to this for write only part of the
579  *              buffer.
580  *
581  * Return: 0 on success.
582  */
mlx5dr_send_postsend_ste(struct mlx5dr_domain * dmn,struct mlx5dr_ste * ste,u8 * data,u16 size,u16 offset)583 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
584 			     u8 *data, u16 size, u16 offset)
585 {
586 	struct postsend_info send_info = {};
587 
588 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
589 
590 	send_info.write.addr = (uintptr_t)data;
591 	send_info.write.length = size;
592 	send_info.write.lkey = 0;
593 	send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
594 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
595 
596 	return dr_postsend_icm_data(dmn, &send_info);
597 }
598 
mlx5dr_send_postsend_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * formatted_ste,u8 * mask)599 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
600 			      struct mlx5dr_ste_htbl *htbl,
601 			      u8 *formatted_ste, u8 *mask)
602 {
603 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
604 	int num_stes_per_iter;
605 	int iterations;
606 	u8 *data;
607 	int ret;
608 	int i;
609 	int j;
610 
611 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
612 				      &iterations, &num_stes_per_iter);
613 	if (ret)
614 		return ret;
615 
616 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
617 
618 	/* Send the data iteration times */
619 	for (i = 0; i < iterations; i++) {
620 		u32 ste_index = i * (byte_size / DR_STE_SIZE);
621 		struct postsend_info send_info = {};
622 
623 		/* Copy all ste's on the data buffer
624 		 * need to add the bit_mask
625 		 */
626 		for (j = 0; j < num_stes_per_iter; j++) {
627 			struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
628 			u32 ste_off = j * DR_STE_SIZE;
629 
630 			if (mlx5dr_ste_is_not_used(ste)) {
631 				memcpy(data + ste_off,
632 				       formatted_ste, DR_STE_SIZE);
633 			} else {
634 				/* Copy data */
635 				memcpy(data + ste_off,
636 				       htbl->chunk->hw_ste_arr +
637 				       DR_STE_SIZE_REDUCED * (ste_index + j),
638 				       DR_STE_SIZE_REDUCED);
639 				/* Copy bit_mask */
640 				memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
641 				       mask, DR_STE_SIZE_MASK);
642 				/* Only when we have mask we need to re-arrange the STE */
643 				mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
644 								data + (j * DR_STE_SIZE),
645 								DR_STE_SIZE);
646 			}
647 		}
648 
649 		send_info.write.addr = (uintptr_t)data;
650 		send_info.write.length = byte_size;
651 		send_info.write.lkey = 0;
652 		send_info.remote_addr =
653 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
654 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
655 
656 		ret = dr_postsend_icm_data(dmn, &send_info);
657 		if (ret)
658 			goto out_free;
659 	}
660 
661 out_free:
662 	kvfree(data);
663 	return ret;
664 }
665 
666 /* Initialize htble with default STEs */
mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain * dmn,struct mlx5dr_ste_htbl * htbl,u8 * ste_init_data,bool update_hw_ste)667 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
668 					struct mlx5dr_ste_htbl *htbl,
669 					u8 *ste_init_data,
670 					bool update_hw_ste)
671 {
672 	u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
673 	int iterations;
674 	int num_stes;
675 	u8 *copy_dst;
676 	u8 *data;
677 	int ret;
678 	int i;
679 
680 	ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
681 				      &iterations, &num_stes);
682 	if (ret)
683 		return ret;
684 
685 	if (update_hw_ste) {
686 		/* Copy the reduced STE to hash table ste_arr */
687 		for (i = 0; i < num_stes; i++) {
688 			copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
689 			memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
690 		}
691 	}
692 
693 	mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
694 
695 	/* Copy the same STE on the data buffer */
696 	for (i = 0; i < num_stes; i++) {
697 		copy_dst = data + i * DR_STE_SIZE;
698 		memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
699 	}
700 
701 	/* Send the data iteration times */
702 	for (i = 0; i < iterations; i++) {
703 		u8 ste_index = i * (byte_size / DR_STE_SIZE);
704 		struct postsend_info send_info = {};
705 
706 		send_info.write.addr = (uintptr_t)data;
707 		send_info.write.length = byte_size;
708 		send_info.write.lkey = 0;
709 		send_info.remote_addr =
710 			mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
711 		send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
712 
713 		ret = dr_postsend_icm_data(dmn, &send_info);
714 		if (ret)
715 			goto out_free;
716 	}
717 
718 out_free:
719 	kvfree(data);
720 	return ret;
721 }
722 
mlx5dr_send_postsend_action(struct mlx5dr_domain * dmn,struct mlx5dr_action * action)723 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
724 				struct mlx5dr_action *action)
725 {
726 	struct postsend_info send_info = {};
727 
728 	send_info.write.addr = (uintptr_t)action->rewrite->data;
729 	send_info.write.length = action->rewrite->num_of_actions *
730 				 DR_MODIFY_ACTION_SIZE;
731 	send_info.write.lkey = 0;
732 	send_info.remote_addr =
733 		mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
734 	send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
735 
736 	return dr_postsend_icm_data(dmn, &send_info);
737 }
738 
dr_modify_qp_rst2init(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,int port)739 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
740 				 struct mlx5dr_qp *dr_qp,
741 				 int port)
742 {
743 	u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
744 	void *qpc;
745 
746 	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
747 
748 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
749 	MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
750 	MLX5_SET(qpc, qpc, rre, 1);
751 	MLX5_SET(qpc, qpc, rwe, 1);
752 
753 	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
754 	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
755 
756 	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
757 }
758 
dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rts_attr * attr)759 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
760 				    struct mlx5dr_qp *dr_qp,
761 				    struct dr_qp_rts_attr *attr)
762 {
763 	u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
764 	void *qpc;
765 
766 	qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
767 
768 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
769 
770 	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
771 	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
772 	MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
773 
774 	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
775 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
776 
777 	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
778 }
779 
dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev * mdev,struct mlx5dr_qp * dr_qp,struct dr_qp_rtr_attr * attr)780 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
781 				     struct mlx5dr_qp *dr_qp,
782 				     struct dr_qp_rtr_attr *attr)
783 {
784 	u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
785 	void *qpc;
786 
787 	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
788 
789 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
790 
791 	MLX5_SET(qpc, qpc, mtu, attr->mtu);
792 	MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
793 	MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
794 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
795 	       attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
796 	memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
797 	       attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
798 	MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
799 		 attr->sgid_index);
800 
801 	if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
802 		MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
803 			 attr->udp_src_port);
804 
805 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
806 	MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
807 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
808 
809 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
810 	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
811 
812 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
813 }
814 
dr_send_allow_fl(struct mlx5dr_cmd_caps * caps)815 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
816 {
817 	/* Check whether RC RoCE QP creation with force loopback is allowed.
818 	 * There are two separate capability bits for this:
819 	 *  - force loopback when RoCE is enabled
820 	 *  - force loopback when RoCE is disabled
821 	 */
822 	return ((caps->roce_caps.roce_en &&
823 		 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
824 		(!caps->roce_caps.roce_en &&
825 		 caps->roce_caps.fl_rc_qp_when_roce_disabled));
826 }
827 
dr_prepare_qp_to_rts(struct mlx5dr_domain * dmn)828 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
829 {
830 	struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
831 	struct dr_qp_rts_attr rts_attr = {};
832 	struct dr_qp_rtr_attr rtr_attr = {};
833 	enum ib_mtu mtu = IB_MTU_1024;
834 	u16 gid_index = 0;
835 	int port = 1;
836 	int ret;
837 
838 	/* Init */
839 	ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
840 	if (ret) {
841 		mlx5dr_err(dmn, "Failed modify QP rst2init\n");
842 		return ret;
843 	}
844 
845 	/* RTR */
846 	rtr_attr.mtu		= mtu;
847 	rtr_attr.qp_num		= dr_qp->qpn;
848 	rtr_attr.min_rnr_timer	= 12;
849 	rtr_attr.port_num	= port;
850 	rtr_attr.udp_src_port	= dmn->info.caps.roce_min_src_udp;
851 
852 	/* If QP creation with force loopback is allowed, then there
853 	 * is no need for GID index when creating the QP.
854 	 * Otherwise we query GID attributes and use GID index.
855 	 */
856 	rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
857 	if (!rtr_attr.fl) {
858 		ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
859 					   &rtr_attr.dgid_attr);
860 		if (ret)
861 			return ret;
862 
863 		rtr_attr.sgid_index = gid_index;
864 	}
865 
866 	ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
867 	if (ret) {
868 		mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
869 		return ret;
870 	}
871 
872 	/* RTS */
873 	rts_attr.timeout	= 14;
874 	rts_attr.retry_cnt	= 7;
875 	rts_attr.rnr_retry	= 7;
876 
877 	ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
878 	if (ret) {
879 		mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
880 		return ret;
881 	}
882 
883 	return 0;
884 }
885 
dr_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)886 static void dr_cq_complete(struct mlx5_core_cq *mcq,
887 			   struct mlx5_eqe *eqe)
888 {
889 	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
890 }
891 
dr_create_cq(struct mlx5_core_dev * mdev,struct mlx5_uars_page * uar,size_t ncqe)892 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
893 				      struct mlx5_uars_page *uar,
894 				      size_t ncqe)
895 {
896 	u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
897 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
898 	struct mlx5_wq_param wqp;
899 	struct mlx5_cqe64 *cqe;
900 	struct mlx5dr_cq *cq;
901 	int inlen, err, eqn;
902 	void *cqc, *in;
903 	__be64 *pas;
904 	int vector;
905 	u32 i;
906 
907 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
908 	if (!cq)
909 		return NULL;
910 
911 	ncqe = roundup_pow_of_two(ncqe);
912 	MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
913 
914 	wqp.buf_numa_node = mdev->priv.numa_node;
915 	wqp.db_numa_node = mdev->priv.numa_node;
916 
917 	err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
918 			       &cq->wq_ctrl);
919 	if (err)
920 		goto out;
921 
922 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
923 		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
924 		cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
925 	}
926 
927 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
928 		sizeof(u64) * cq->wq_ctrl.buf.npages;
929 	in = kvzalloc(inlen, GFP_KERNEL);
930 	if (!in)
931 		goto err_cqwq;
932 
933 	vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
934 	err = mlx5_vector2eqn(mdev, vector, &eqn);
935 	if (err) {
936 		kvfree(in);
937 		goto err_cqwq;
938 	}
939 
940 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
941 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
942 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
943 	MLX5_SET(cqc, cqc, uar_page, uar->index);
944 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
945 		 MLX5_ADAPTER_PAGE_SHIFT);
946 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
947 
948 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
949 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
950 
951 	cq->mcq.comp  = dr_cq_complete;
952 
953 	err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
954 	kvfree(in);
955 
956 	if (err)
957 		goto err_cqwq;
958 
959 	cq->mcq.cqe_sz = 64;
960 	cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
961 	cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
962 	*cq->mcq.set_ci_db = 0;
963 
964 	/* set no-zero value, in order to avoid the HW to run db-recovery on
965 	 * CQ that used in polling mode.
966 	 */
967 	*cq->mcq.arm_db = cpu_to_be32(2 << 28);
968 
969 	cq->mcq.vector = 0;
970 	cq->mcq.uar = uar;
971 	cq->mdev = mdev;
972 
973 	return cq;
974 
975 err_cqwq:
976 	mlx5_wq_destroy(&cq->wq_ctrl);
977 out:
978 	kfree(cq);
979 	return NULL;
980 }
981 
dr_destroy_cq(struct mlx5_core_dev * mdev,struct mlx5dr_cq * cq)982 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
983 {
984 	mlx5_core_destroy_cq(mdev, &cq->mcq);
985 	mlx5_wq_destroy(&cq->wq_ctrl);
986 	kfree(cq);
987 }
988 
dr_create_mkey(struct mlx5_core_dev * mdev,u32 pdn,u32 * mkey)989 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
990 {
991 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
992 	void *mkc;
993 
994 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
995 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
996 	MLX5_SET(mkc, mkc, a, 1);
997 	MLX5_SET(mkc, mkc, rw, 1);
998 	MLX5_SET(mkc, mkc, rr, 1);
999 	MLX5_SET(mkc, mkc, lw, 1);
1000 	MLX5_SET(mkc, mkc, lr, 1);
1001 
1002 	MLX5_SET(mkc, mkc, pd, pdn);
1003 	MLX5_SET(mkc, mkc, length64, 1);
1004 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1005 
1006 	return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1007 }
1008 
dr_reg_mr(struct mlx5_core_dev * mdev,u32 pdn,void * buf,size_t size)1009 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1010 				   u32 pdn, void *buf, size_t size)
1011 {
1012 	struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1013 	struct device *dma_device;
1014 	dma_addr_t dma_addr;
1015 	int err;
1016 
1017 	if (!mr)
1018 		return NULL;
1019 
1020 	dma_device = mlx5_core_dma_dev(mdev);
1021 	dma_addr = dma_map_single(dma_device, buf, size,
1022 				  DMA_BIDIRECTIONAL);
1023 	err = dma_mapping_error(dma_device, dma_addr);
1024 	if (err) {
1025 		mlx5_core_warn(mdev, "Can't dma buf\n");
1026 		kfree(mr);
1027 		return NULL;
1028 	}
1029 
1030 	err = dr_create_mkey(mdev, pdn, &mr->mkey);
1031 	if (err) {
1032 		mlx5_core_warn(mdev, "Can't create mkey\n");
1033 		dma_unmap_single(dma_device, dma_addr, size,
1034 				 DMA_BIDIRECTIONAL);
1035 		kfree(mr);
1036 		return NULL;
1037 	}
1038 
1039 	mr->dma_addr = dma_addr;
1040 	mr->size = size;
1041 	mr->addr = buf;
1042 
1043 	return mr;
1044 }
1045 
dr_dereg_mr(struct mlx5_core_dev * mdev,struct mlx5dr_mr * mr)1046 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1047 {
1048 	mlx5_core_destroy_mkey(mdev, mr->mkey);
1049 	dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1050 			 DMA_BIDIRECTIONAL);
1051 	kfree(mr);
1052 }
1053 
mlx5dr_send_ring_alloc(struct mlx5dr_domain * dmn)1054 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1055 {
1056 	struct dr_qp_init_attr init_attr = {};
1057 	int cq_size;
1058 	int size;
1059 	int ret;
1060 
1061 	dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1062 	if (!dmn->send_ring)
1063 		return -ENOMEM;
1064 
1065 	cq_size = QUEUE_SIZE + 1;
1066 	dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1067 	if (!dmn->send_ring->cq) {
1068 		mlx5dr_err(dmn, "Failed creating CQ\n");
1069 		ret = -ENOMEM;
1070 		goto free_send_ring;
1071 	}
1072 
1073 	init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1074 	init_attr.pdn = dmn->pdn;
1075 	init_attr.uar = dmn->uar;
1076 	init_attr.max_send_wr = QUEUE_SIZE;
1077 
1078 	/* Isolated VL is applicable only if force loopback is supported */
1079 	if (dr_send_allow_fl(&dmn->info.caps))
1080 		init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1081 
1082 	spin_lock_init(&dmn->send_ring->lock);
1083 
1084 	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1085 	if (!dmn->send_ring->qp)  {
1086 		mlx5dr_err(dmn, "Failed creating QP\n");
1087 		ret = -ENOMEM;
1088 		goto clean_cq;
1089 	}
1090 
1091 	dmn->send_ring->cq->qp = dmn->send_ring->qp;
1092 
1093 	dmn->info.max_send_wr = QUEUE_SIZE;
1094 	dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1095 					DR_STE_SIZE);
1096 
1097 	dmn->send_ring->signal_th = dmn->info.max_send_wr /
1098 		SIGNAL_PER_DIV_QUEUE;
1099 
1100 	/* Prepare qp to be used */
1101 	ret = dr_prepare_qp_to_rts(dmn);
1102 	if (ret)
1103 		goto clean_qp;
1104 
1105 	dmn->send_ring->max_post_send_size =
1106 		mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1107 						   DR_ICM_TYPE_STE);
1108 
1109 	/* Allocating the max size as a buffer for writing */
1110 	size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1111 	dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1112 	if (!dmn->send_ring->buf) {
1113 		ret = -ENOMEM;
1114 		goto clean_qp;
1115 	}
1116 
1117 	dmn->send_ring->buf_size = size;
1118 
1119 	dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1120 				       dmn->pdn, dmn->send_ring->buf, size);
1121 	if (!dmn->send_ring->mr) {
1122 		ret = -ENOMEM;
1123 		goto free_mem;
1124 	}
1125 
1126 	dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1127 					    dmn->pdn, dmn->send_ring->sync_buff,
1128 					    MIN_READ_SYNC);
1129 	if (!dmn->send_ring->sync_mr) {
1130 		ret = -ENOMEM;
1131 		goto clean_mr;
1132 	}
1133 
1134 	return 0;
1135 
1136 clean_mr:
1137 	dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1138 free_mem:
1139 	kfree(dmn->send_ring->buf);
1140 clean_qp:
1141 	dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1142 clean_cq:
1143 	dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1144 free_send_ring:
1145 	kfree(dmn->send_ring);
1146 
1147 	return ret;
1148 }
1149 
mlx5dr_send_ring_free(struct mlx5dr_domain * dmn,struct mlx5dr_send_ring * send_ring)1150 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1151 			   struct mlx5dr_send_ring *send_ring)
1152 {
1153 	dr_destroy_qp(dmn->mdev, send_ring->qp);
1154 	dr_destroy_cq(dmn->mdev, send_ring->cq);
1155 	dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1156 	dr_dereg_mr(dmn->mdev, send_ring->mr);
1157 	kfree(send_ring->buf);
1158 	kfree(send_ring);
1159 }
1160 
mlx5dr_send_ring_force_drain(struct mlx5dr_domain * dmn)1161 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1162 {
1163 	struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1164 	struct postsend_info send_info = {};
1165 	u8 data[DR_STE_SIZE];
1166 	int num_of_sends_req;
1167 	int ret;
1168 	int i;
1169 
1170 	/* Sending this amount of requests makes sure we will get drain */
1171 	num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1172 
1173 	/* Send fake requests forcing the last to be signaled */
1174 	send_info.write.addr = (uintptr_t)data;
1175 	send_info.write.length = DR_STE_SIZE;
1176 	send_info.write.lkey = 0;
1177 	/* Using the sync_mr in order to write/read */
1178 	send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1179 	send_info.rkey = send_ring->sync_mr->mkey;
1180 
1181 	for (i = 0; i < num_of_sends_req; i++) {
1182 		ret = dr_postsend_icm_data(dmn, &send_info);
1183 		if (ret)
1184 			return ret;
1185 	}
1186 
1187 	spin_lock(&send_ring->lock);
1188 	ret = dr_handle_pending_wc(dmn, send_ring);
1189 	spin_unlock(&send_ring->lock);
1190 
1191 	return ret;
1192 }
1193