1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDPA simulator for networking device.
4 *
5 * Copyright (c) 2020, Red Hat Inc. All rights reserved.
6 * Author: Jason Wang <jasowang@redhat.com>
7 *
8 */
9
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/etherdevice.h>
16 #include <linux/vringh.h>
17 #include <linux/vdpa.h>
18 #include <net/netlink.h>
19 #include <uapi/linux/virtio_net.h>
20 #include <uapi/linux/vdpa.h>
21
22 #include "vdpa_sim.h"
23
24 #define DRV_VERSION "0.1"
25 #define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
26 #define DRV_DESC "vDPA Device Simulator for networking device"
27 #define DRV_LICENSE "GPL v2"
28
29 #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
30 (1ULL << VIRTIO_NET_F_MAC) | \
31 (1ULL << VIRTIO_NET_F_STATUS) | \
32 (1ULL << VIRTIO_NET_F_MTU) | \
33 (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
34 (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR))
35
36 /* 3 virtqueues, 2 address spaces, 2 virtqueue groups */
37 #define VDPASIM_NET_VQ_NUM 3
38 #define VDPASIM_NET_AS_NUM 2
39 #define VDPASIM_NET_GROUP_NUM 2
40
41 struct vdpasim_dataq_stats {
42 struct u64_stats_sync syncp;
43 u64 pkts;
44 u64 bytes;
45 u64 drops;
46 u64 errors;
47 u64 overruns;
48 };
49
50 struct vdpasim_cq_stats {
51 struct u64_stats_sync syncp;
52 u64 requests;
53 u64 successes;
54 u64 errors;
55 };
56
57 struct vdpasim_net{
58 struct vdpasim vdpasim;
59 struct vdpasim_dataq_stats tx_stats;
60 struct vdpasim_dataq_stats rx_stats;
61 struct vdpasim_cq_stats cq_stats;
62 };
63
sim_to_net(struct vdpasim * vdpasim)64 static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
65 {
66 return container_of(vdpasim, struct vdpasim_net, vdpasim);
67 }
68
vdpasim_net_complete(struct vdpasim_virtqueue * vq,size_t len)69 static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
70 {
71 /* Make sure data is wrote before advancing index */
72 smp_wmb();
73
74 vringh_complete_iotlb(&vq->vring, vq->head, len);
75
76 /* Make sure used is visible before rasing the interrupt. */
77 smp_wmb();
78
79 local_bh_disable();
80 if (vringh_need_notify_iotlb(&vq->vring) > 0)
81 vringh_notify(&vq->vring);
82 local_bh_enable();
83 }
84
receive_filter(struct vdpasim * vdpasim,size_t len)85 static bool receive_filter(struct vdpasim *vdpasim, size_t len)
86 {
87 bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1);
88 size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
89 sizeof(struct virtio_net_hdr);
90 struct virtio_net_config *vio_config = vdpasim->config;
91
92 if (len < ETH_ALEN + hdr_len)
93 return false;
94
95 if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) ||
96 is_multicast_ether_addr(vdpasim->buffer + hdr_len))
97 return true;
98 if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN))
99 return true;
100
101 return false;
102 }
103
vdpasim_handle_ctrl_mac(struct vdpasim * vdpasim,u8 cmd)104 static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
105 u8 cmd)
106 {
107 struct virtio_net_config *vio_config = vdpasim->config;
108 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
109 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
110 size_t read;
111
112 switch (cmd) {
113 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
114 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov,
115 vio_config->mac, ETH_ALEN);
116 if (read == ETH_ALEN)
117 status = VIRTIO_NET_OK;
118 break;
119 default:
120 break;
121 }
122
123 return status;
124 }
125
vdpasim_handle_cvq(struct vdpasim * vdpasim)126 static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
127 {
128 struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
129 struct vdpasim_net *net = sim_to_net(vdpasim);
130 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
131 struct virtio_net_ctrl_hdr ctrl;
132 size_t read, write;
133 u64 requests = 0, errors = 0, successes = 0;
134 int err;
135
136 if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
137 return;
138
139 if (!cvq->ready)
140 return;
141
142 while (true) {
143 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov,
144 &cvq->out_iov,
145 &cvq->head, GFP_ATOMIC);
146 if (err <= 0)
147 break;
148
149 ++requests;
150 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl,
151 sizeof(ctrl));
152 if (read != sizeof(ctrl)) {
153 ++errors;
154 break;
155 }
156
157 switch (ctrl.class) {
158 case VIRTIO_NET_CTRL_MAC:
159 status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd);
160 break;
161 default:
162 break;
163 }
164
165 if (status == VIRTIO_NET_OK)
166 ++successes;
167 else
168 ++errors;
169
170 /* Make sure data is wrote before advancing index */
171 smp_wmb();
172
173 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov,
174 &status, sizeof(status));
175 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
176 vringh_kiov_cleanup(&cvq->in_iov);
177 vringh_kiov_cleanup(&cvq->out_iov);
178
179 /* Make sure used is visible before rasing the interrupt. */
180 smp_wmb();
181
182 local_bh_disable();
183 if (cvq->cb)
184 cvq->cb(cvq->private);
185 local_bh_enable();
186 }
187
188 u64_stats_update_begin(&net->cq_stats.syncp);
189 net->cq_stats.requests += requests;
190 net->cq_stats.errors += errors;
191 net->cq_stats.successes += successes;
192 u64_stats_update_end(&net->cq_stats.syncp);
193 }
194
vdpasim_net_work(struct work_struct * work)195 static void vdpasim_net_work(struct work_struct *work)
196 {
197 struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
198 struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
199 struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
200 struct vdpasim_net *net = sim_to_net(vdpasim);
201 ssize_t read, write;
202 u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0;
203 u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
204 int err;
205
206 spin_lock(&vdpasim->lock);
207
208 if (!vdpasim->running)
209 goto out;
210
211 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
212 goto out;
213
214 vdpasim_handle_cvq(vdpasim);
215
216 if (!txq->ready || !rxq->ready)
217 goto out;
218
219 while (true) {
220 err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
221 &txq->head, GFP_ATOMIC);
222 if (err <= 0) {
223 if (err)
224 ++tx_errors;
225 break;
226 }
227
228 ++tx_pkts;
229 read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
230 vdpasim->buffer,
231 PAGE_SIZE);
232
233 tx_bytes += read;
234
235 if (!receive_filter(vdpasim, read)) {
236 ++rx_drops;
237 vdpasim_net_complete(txq, 0);
238 continue;
239 }
240
241 err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
242 &rxq->head, GFP_ATOMIC);
243 if (err <= 0) {
244 ++rx_overruns;
245 vdpasim_net_complete(txq, 0);
246 break;
247 }
248
249 write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
250 vdpasim->buffer, read);
251 if (write <= 0) {
252 ++rx_errors;
253 break;
254 }
255
256 ++rx_pkts;
257 rx_bytes += write;
258
259 vdpasim_net_complete(txq, 0);
260 vdpasim_net_complete(rxq, write);
261
262 if (tx_pkts > 4) {
263 schedule_work(&vdpasim->work);
264 goto out;
265 }
266 }
267
268 out:
269 spin_unlock(&vdpasim->lock);
270
271 u64_stats_update_begin(&net->tx_stats.syncp);
272 net->tx_stats.pkts += tx_pkts;
273 net->tx_stats.bytes += tx_bytes;
274 net->tx_stats.errors += tx_errors;
275 u64_stats_update_end(&net->tx_stats.syncp);
276
277 u64_stats_update_begin(&net->rx_stats.syncp);
278 net->rx_stats.pkts += rx_pkts;
279 net->rx_stats.bytes += rx_bytes;
280 net->rx_stats.drops += rx_drops;
281 net->rx_stats.errors += rx_errors;
282 net->rx_stats.overruns += rx_overruns;
283 u64_stats_update_end(&net->rx_stats.syncp);
284 }
285
vdpasim_net_get_stats(struct vdpasim * vdpasim,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)286 static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx,
287 struct sk_buff *msg,
288 struct netlink_ext_ack *extack)
289 {
290 struct vdpasim_net *net = sim_to_net(vdpasim);
291 u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops;
292 u64 tx_pkts, tx_bytes, tx_errors, tx_drops;
293 u64 cq_requests, cq_successes, cq_errors;
294 unsigned int start;
295 int err = -EMSGSIZE;
296
297 switch(idx) {
298 case 0:
299 do {
300 start = u64_stats_fetch_begin(&net->rx_stats.syncp);
301 rx_pkts = net->rx_stats.pkts;
302 rx_bytes = net->rx_stats.bytes;
303 rx_errors = net->rx_stats.errors;
304 rx_overruns = net->rx_stats.overruns;
305 rx_drops = net->rx_stats.drops;
306 } while (u64_stats_fetch_retry(&net->rx_stats.syncp, start));
307
308 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
309 "rx packets"))
310 break;
311 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
312 rx_pkts, VDPA_ATTR_PAD))
313 break;
314 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
315 "rx bytes"))
316 break;
317 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
318 rx_bytes, VDPA_ATTR_PAD))
319 break;
320 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
321 "rx errors"))
322 break;
323 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
324 rx_errors, VDPA_ATTR_PAD))
325 break;
326 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
327 "rx overruns"))
328 break;
329 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
330 rx_overruns, VDPA_ATTR_PAD))
331 break;
332 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
333 "rx drops"))
334 break;
335 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
336 rx_drops, VDPA_ATTR_PAD))
337 break;
338 err = 0;
339 break;
340 case 1:
341 do {
342 start = u64_stats_fetch_begin(&net->tx_stats.syncp);
343 tx_pkts = net->tx_stats.pkts;
344 tx_bytes = net->tx_stats.bytes;
345 tx_errors = net->tx_stats.errors;
346 tx_drops = net->tx_stats.drops;
347 } while (u64_stats_fetch_retry(&net->tx_stats.syncp, start));
348
349 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
350 "tx packets"))
351 break;
352 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
353 tx_pkts, VDPA_ATTR_PAD))
354 break;
355 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
356 "tx bytes"))
357 break;
358 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
359 tx_bytes, VDPA_ATTR_PAD))
360 break;
361 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
362 "tx errors"))
363 break;
364 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
365 tx_errors, VDPA_ATTR_PAD))
366 break;
367 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
368 "tx drops"))
369 break;
370 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
371 tx_drops, VDPA_ATTR_PAD))
372 break;
373 err = 0;
374 break;
375 case 2:
376 do {
377 start = u64_stats_fetch_begin(&net->cq_stats.syncp);
378 cq_requests = net->cq_stats.requests;
379 cq_successes = net->cq_stats.successes;
380 cq_errors = net->cq_stats.errors;
381 } while (u64_stats_fetch_retry(&net->cq_stats.syncp, start));
382
383 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
384 "cvq requests"))
385 break;
386 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
387 cq_requests, VDPA_ATTR_PAD))
388 break;
389 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
390 "cvq successes"))
391 break;
392 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
393 cq_successes, VDPA_ATTR_PAD))
394 break;
395 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
396 "cvq errors"))
397 break;
398 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
399 cq_errors, VDPA_ATTR_PAD))
400 break;
401 err = 0;
402 break;
403 default:
404 err = -EINVAL;
405 break;
406 }
407
408 return err;
409 }
410
vdpasim_net_get_config(struct vdpasim * vdpasim,void * config)411 static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
412 {
413 struct virtio_net_config *net_config = config;
414
415 net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
416 }
417
vdpasim_net_setup_config(struct vdpasim * vdpasim,const struct vdpa_dev_set_config * config)418 static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
419 const struct vdpa_dev_set_config *config)
420 {
421 struct virtio_net_config *vio_config = vdpasim->config;
422
423 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR))
424 memcpy(vio_config->mac, config->net.mac, ETH_ALEN);
425 if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
426 vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu);
427 else
428 /* Setup default MTU to be 1500 */
429 vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
430 }
431
vdpasim_net_mgmtdev_release(struct device * dev)432 static void vdpasim_net_mgmtdev_release(struct device *dev)
433 {
434 }
435
436 static struct device vdpasim_net_mgmtdev = {
437 .init_name = "vdpasim_net",
438 .release = vdpasim_net_mgmtdev_release,
439 };
440
vdpasim_net_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)441 static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
442 const struct vdpa_dev_set_config *config)
443 {
444 struct vdpasim_dev_attr dev_attr = {};
445 struct vdpasim_net *net;
446 struct vdpasim *simdev;
447 int ret;
448
449 dev_attr.mgmt_dev = mdev;
450 dev_attr.name = name;
451 dev_attr.id = VIRTIO_ID_NET;
452 dev_attr.supported_features = VDPASIM_NET_FEATURES;
453 dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
454 dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
455 dev_attr.nas = VDPASIM_NET_AS_NUM;
456 dev_attr.alloc_size = sizeof(struct vdpasim_net);
457 dev_attr.config_size = sizeof(struct virtio_net_config);
458 dev_attr.get_config = vdpasim_net_get_config;
459 dev_attr.work_fn = vdpasim_net_work;
460 dev_attr.get_stats = vdpasim_net_get_stats;
461 dev_attr.buffer_size = PAGE_SIZE;
462
463 simdev = vdpasim_create(&dev_attr, config);
464 if (IS_ERR(simdev))
465 return PTR_ERR(simdev);
466
467 vdpasim_net_setup_config(simdev, config);
468
469 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
470 if (ret)
471 goto reg_err;
472
473 net = sim_to_net(simdev);
474
475 u64_stats_init(&net->tx_stats.syncp);
476 u64_stats_init(&net->rx_stats.syncp);
477 u64_stats_init(&net->cq_stats.syncp);
478
479 return 0;
480
481 reg_err:
482 put_device(&simdev->vdpa.dev);
483 return ret;
484 }
485
vdpasim_net_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)486 static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
487 struct vdpa_device *dev)
488 {
489 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
490
491 _vdpa_unregister_device(&simdev->vdpa);
492 }
493
494 static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
495 .dev_add = vdpasim_net_dev_add,
496 .dev_del = vdpasim_net_dev_del
497 };
498
499 static struct virtio_device_id id_table[] = {
500 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
501 { 0 },
502 };
503
504 static struct vdpa_mgmt_dev mgmt_dev = {
505 .device = &vdpasim_net_mgmtdev,
506 .id_table = id_table,
507 .ops = &vdpasim_net_mgmtdev_ops,
508 .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR |
509 1 << VDPA_ATTR_DEV_NET_CFG_MTU |
510 1 << VDPA_ATTR_DEV_FEATURES),
511 .max_supported_vqs = VDPASIM_NET_VQ_NUM,
512 .supported_features = VDPASIM_NET_FEATURES,
513 };
514
vdpasim_net_init(void)515 static int __init vdpasim_net_init(void)
516 {
517 int ret;
518
519 ret = device_register(&vdpasim_net_mgmtdev);
520 if (ret) {
521 put_device(&vdpasim_net_mgmtdev);
522 return ret;
523 }
524
525 ret = vdpa_mgmtdev_register(&mgmt_dev);
526 if (ret)
527 goto parent_err;
528 return 0;
529
530 parent_err:
531 device_unregister(&vdpasim_net_mgmtdev);
532 return ret;
533 }
534
vdpasim_net_exit(void)535 static void __exit vdpasim_net_exit(void)
536 {
537 vdpa_mgmtdev_unregister(&mgmt_dev);
538 device_unregister(&vdpasim_net_mgmtdev);
539 }
540
541 module_init(vdpasim_net_init);
542 module_exit(vdpasim_net_exit);
543
544 MODULE_VERSION(DRV_VERSION);
545 MODULE_LICENSE(DRV_LICENSE);
546 MODULE_AUTHOR(DRV_AUTHOR);
547 MODULE_DESCRIPTION(DRV_DESC);
548