1 /*
2 * Copyright (c) 2015 Travis Geiselbrecht
3 *
4 * Use of this source code is governed by a MIT-style
5 * license that can be found in the LICENSE file or at
6 * https://opensource.org/licenses/MIT
7 */
8 #include <dev/virtio/net.h>
9
10 #include <stdlib.h>
11 #include <inttypes.h>
12 #include <lk/debug.h>
13 #include <assert.h>
14 #include <lk/trace.h>
15 #include <lk/compiler.h>
16 #include <lk/list.h>
17 #include <string.h>
18 #include <lk/err.h>
19 #include <kernel/thread.h>
20 #include <kernel/event.h>
21 #include <kernel/spinlock.h>
22 #include <lib/pktbuf.h>
23 #include <lib/minip.h>
24
25 #define LOCAL_TRACE 0
26
27 struct virtio_net_config {
28 uint8_t mac[6];
29 uint16_t status;
30 uint16_t max_virtqueue_pairs;
31 uint16_t mtu;
32 uint32_t speed;
33 uint8_t duplex;
34 uint8_t rss_max_key_size;
35 uint16_t rss_max_indirection_table_length;
36 uint32_t supported_hash_types;
37 uint32_t supported_tunnel_types;
38 };
39 STATIC_ASSERT(sizeof(struct virtio_net_config) == 28);
40
41 struct virtio_net_hdr {
42 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1
43 #define VIRTIO_NET_HDR_F_DATA_VALID 2
44 #define VIRTIO_NET_HDR_F_RSC_INFO 4
45 uint8_t flags;
46 #define VIRTIO_NET_HDR_GSO_NONE 0
47 #define VIRTIO_NET_HDR_GSO_TCPV4 1
48 #define VIRTIO_NET_HDR_GSO_UDP 3
49 #define VIRTIO_NET_HDR_GSO_TCPV6 4
50 #define VIRTIO_NET_HDR_GSO_UDP_L4 5
51 #define VIRTIO_NET_HDR_GSO_ECN 0x80
52 uint8_t gso_type;
53 uint16_t hdr_len;
54 uint16_t gso_size;
55 uint16_t csum_start;
56 uint16_t csum_offset;
57 uint16_t num_buffers; // unused in tx
58
59 // Only if VIRTIO_NET_HASH_REPORT negotiated
60 //uint32_t hash_value;
61 //uint16_t hash_report;
62 //uint16_t padding_reserved;
63 };
64 STATIC_ASSERT(sizeof(struct virtio_net_hdr) == 12);
65
66 #define VIRTIO_NET_F_CSUM (1<<0)
67 #define VIRTIO_NET_F_GUEST_CSUM (1<<1)
68 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (1<<2)
69 #define VIRTIO_NET_F_MTU (1<<3)
70 #define VIRTIO_NET_F_MAC (1<<5)
71 #define VIRTIO_NET_F_GSO (1<<6) // removed in v1.3
72 #define VIRTIO_NET_F_GUEST_TSO4 (1<<7)
73 #define VIRTIO_NET_F_GUEST_TSO6 (1<<8)
74 #define VIRTIO_NET_F_GUEST_ECN (1<<9)
75 #define VIRTIO_NET_F_GUEST_UFO (1<<10)
76 #define VIRTIO_NET_F_HOST_TSO4 (1<<11)
77 #define VIRTIO_NET_F_HOST_TSO6 (1<<12)
78 #define VIRTIO_NET_F_HOST_ECN (1<<13)
79 #define VIRTIO_NET_F_HOST_UFO (1<<14)
80 #define VIRTIO_NET_F_MRG_RXBUF (1<<15)
81 #define VIRTIO_NET_F_STATUS (1<<16)
82 #define VIRTIO_NET_F_CTRL_VQ (1<<17)
83 #define VIRTIO_NET_F_CTRL_RX (1<<18)
84 #define VIRTIO_NET_F_CTRL_VLAN (1<<19)
85 #define VIRTIO_NET_F_GUEST_ANNOUNCE (1<<21)
86 #define VIRTIO_NET_F_MQ (1<<22)
87 #define VIRTIO_NET_F_CTRL_MAC_ADDR (1<<23)
88 #define VIRTIO_NET_F_HASH_TUNNEL (1ULL<<51)
89 #define VIRTIO_NET_F_VQ_NOTF_COAL (1ULL<<52)
90 #define VIRTIO_NET_F_NOTF_COAL (1ULL<<53)
91 #define VIRTIO_NET_F_GUEST_USO4 (1ULL<<54)
92 #define VIRTIO_NET_F_GUEST_USO6 (1ULL<<55)
93 #define VIRTIO_NET_F_HOST_USO (1ULL<<56)
94 #define VIRTIO_NET_F_HASH_REPORT (1ULL<<57)
95 #define VIRTIO_NET_F_GUEST_HDRLEN (1ULL<<59)
96 #define VIRTIO_NET_F_RSS (1ULL<<60)
97 #define VIRTIO_NET_F_RSC_EXT (1ULL<<61)
98 #define VIRTIO_NET_F_STANDBY (1ULL<<62)
99 #define VIRTIO_NET_F_SPEED_DUPLEX (1ULL<<63)
100
101 #define VIRTIO_NET_S_LINK_UP (1<<0)
102 #define VIRTIO_NET_S_ANNOUNCE (1<<1)
103
104 #define TX_RING_SIZE 16
105 #define RX_RING_SIZE 16
106
107 #define RING_RX 0
108 #define RING_TX 1
109
110 #define VIRTIO_NET_MSS 1514
111
112 struct virtio_net_dev {
113 struct virtio_device *dev;
114 bool started;
115
116 struct virtio_net_config *config;
117
118 spin_lock_t lock;
119 event_t rx_event;
120
121 /* list of active tx/rx packets to be freed at irq time */
122 pktbuf_t *pending_tx_packet[TX_RING_SIZE];
123 pktbuf_t *pending_rx_packet[RX_RING_SIZE];
124
125 uint tx_pending_count;
126 struct list_node completed_rx_queue;
127 };
128
129 static enum handler_return virtio_net_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
130 static int virtio_net_rx_worker(void *arg);
131 static status_t virtio_net_queue_rx(struct virtio_net_dev *ndev, pktbuf_t *p);
132
133 // XXX remove need for this
134 static struct virtio_net_dev *the_ndev;
135
dump_feature_bits(uint64_t feature)136 static void dump_feature_bits(uint64_t feature) {
137 printf("virtio-net host features (%#" PRIx64 "):", feature);
138 if (feature & VIRTIO_NET_F_CSUM) printf(" CSUM");
139 if (feature & VIRTIO_NET_F_GUEST_CSUM) printf(" GUEST_CSUM");
140 if (feature & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) printf(" CTRL_GUEST_OFFLOADS");
141 if (feature & VIRTIO_NET_F_MTU) printf(" MTU");
142 if (feature & VIRTIO_NET_F_MAC) printf(" MAC");
143 if (feature & VIRTIO_NET_F_GSO) printf(" GSO");
144 if (feature & VIRTIO_NET_F_GUEST_TSO4) printf(" GUEST_TSO4");
145 if (feature & VIRTIO_NET_F_GUEST_TSO6) printf(" GUEST_TSO6");
146 if (feature & VIRTIO_NET_F_GUEST_ECN) printf(" GUEST_ECN");
147 if (feature & VIRTIO_NET_F_GUEST_UFO) printf(" GUEST_UFO");
148 if (feature & VIRTIO_NET_F_HOST_TSO4) printf(" HOST_TSO4");
149 if (feature & VIRTIO_NET_F_HOST_TSO6) printf(" HOST_TSO6");
150 if (feature & VIRTIO_NET_F_HOST_ECN) printf(" HOST_ECN");
151 if (feature & VIRTIO_NET_F_HOST_UFO) printf(" HOST_UFO");
152 if (feature & VIRTIO_NET_F_MRG_RXBUF) printf(" MRG_RXBUF");
153 if (feature & VIRTIO_NET_F_STATUS) printf(" STATUS");
154 if (feature & VIRTIO_NET_F_CTRL_VQ) printf(" CTRL_VQ");
155 if (feature & VIRTIO_NET_F_CTRL_RX) printf(" CTRL_RX");
156 if (feature & VIRTIO_NET_F_CTRL_VLAN) printf(" CTRL_VLAN");
157 if (feature & VIRTIO_NET_F_GUEST_ANNOUNCE) printf(" GUEST_ANNOUNCE");
158 if (feature & VIRTIO_NET_F_MQ) printf(" MQ");
159 if (feature & VIRTIO_NET_F_CTRL_MAC_ADDR) printf(" CTRL_MAC_ADDR");
160 if (feature & VIRTIO_NET_F_HASH_TUNNEL) printf(" HASH_TUNNEL");
161 if (feature & VIRTIO_NET_F_VQ_NOTF_COAL) printf(" VQ_NOTF_COAL");
162 if (feature & VIRTIO_NET_F_NOTF_COAL) printf(" NOTF_COAL");
163 if (feature & VIRTIO_NET_F_GUEST_USO4) printf(" GUEST_USO4");
164 if (feature & VIRTIO_NET_F_GUEST_USO6) printf(" GUEST_USO6");
165 if (feature & VIRTIO_NET_F_HOST_USO) printf(" HOST_USO");
166 if (feature & VIRTIO_NET_F_HASH_REPORT) printf(" HASH_REPORT");
167 if (feature & VIRTIO_NET_F_GUEST_HDRLEN) printf(" GUEST_HDRLEN");
168 if (feature & VIRTIO_NET_F_RSS) printf(" RSS");
169 if (feature & VIRTIO_NET_F_RSC_EXT) printf(" RSC_EXT");
170 if (feature & VIRTIO_NET_F_STANDBY) printf(" STANDBY");
171 if (feature & VIRTIO_NET_F_SPEED_DUPLEX) printf(" SPEED_DUPLEX");
172 printf("\n");
173 }
174
virtio_net_init(struct virtio_device * dev)175 status_t virtio_net_init(struct virtio_device *dev) {
176 LTRACEF("dev %p\n", dev);
177
178 /* allocate a new net device */
179 struct virtio_net_dev *ndev = calloc(1, sizeof(struct virtio_net_dev));
180 if (!ndev)
181 return ERR_NO_MEMORY;
182
183 ndev->dev = dev;
184 dev->priv = ndev;
185 ndev->started = false;
186
187 ndev->lock = SPIN_LOCK_INITIAL_VALUE;
188 event_init(&ndev->rx_event, false, EVENT_FLAG_AUTOUNSIGNAL);
189 list_initialize(&ndev->completed_rx_queue);
190
191 ndev->config = (struct virtio_net_config *)dev->config_ptr;
192
193 /* ack and set the driver status bit */
194 virtio_status_acknowledge_driver(dev);
195
196 // XXX check features bits and ack/nak them
197 uint64_t host_features = virtio_read_host_feature_word(dev, 0) | (uint64_t)virtio_read_host_feature_word(dev, 1) << 32;
198 dump_feature_bits(host_features);
199
200 /* set our irq handler */
201 dev->irq_driver_callback = &virtio_net_irq_driver_callback;
202
203 /* set DRIVER_OK */
204 virtio_status_driver_ok(dev);
205
206 /* allocate a pair of virtio rings */
207 virtio_alloc_ring(dev, RING_RX, RX_RING_SIZE); // rx
208 virtio_alloc_ring(dev, RING_TX, TX_RING_SIZE); // tx
209
210 the_ndev = ndev;
211
212 return NO_ERROR;
213 }
214
virtio_net_start(void)215 status_t virtio_net_start(void) {
216 if (the_ndev->started)
217 return ERR_ALREADY_STARTED;
218
219 the_ndev->started = true;
220
221 /* start the rx worker thread */
222 thread_resume(thread_create("virtio_net_rx", &virtio_net_rx_worker, (void *)the_ndev, HIGH_PRIORITY, DEFAULT_STACK_SIZE));
223
224 /* queue up a bunch of rxes */
225 for (uint i = 0; i < RX_RING_SIZE - 1; i++) {
226 pktbuf_t *p = pktbuf_alloc();
227 if (p) {
228 virtio_net_queue_rx(the_ndev, p);
229 }
230 }
231
232 return NO_ERROR;
233 }
234
virtio_net_queue_tx_pktbuf(struct virtio_net_dev * ndev,pktbuf_t * p2)235 static status_t virtio_net_queue_tx_pktbuf(struct virtio_net_dev *ndev, pktbuf_t *p2) {
236 struct virtio_device *vdev = ndev->dev;
237
238 uint16_t i;
239 pktbuf_t *p;
240
241 DEBUG_ASSERT(ndev);
242
243 p = pktbuf_alloc();
244 if (!p)
245 return ERR_NO_MEMORY;
246
247 /* point our header to the base of the first pktbuf */
248 struct virtio_net_hdr *hdr = pktbuf_append(p, sizeof(struct virtio_net_hdr) - 2);
249 memset(hdr, 0, p->dlen);
250
251 spin_lock_saved_state_t state;
252 spin_lock_irqsave(&ndev->lock, state);
253
254 /* only queue if we have enough tx descriptors */
255 if (ndev->tx_pending_count + 2 > TX_RING_SIZE)
256 goto nodesc;
257
258 /* allocate a chain of descriptors for our transfer */
259 struct vring_desc *desc = virtio_alloc_desc_chain(vdev, RING_TX, 2, &i);
260 if (!desc) {
261 spin_unlock_irqrestore(&ndev->lock, state);
262
263 nodesc:
264 TRACEF("out of virtio tx descriptors, tx_pending_count %u\n", ndev->tx_pending_count);
265 pktbuf_free(p, true);
266
267 return ERR_NO_MEMORY;
268 }
269
270 ndev->tx_pending_count += 2;
271
272 /* save a pointer to our pktbufs for the irq handler to free */
273 LTRACEF("saving pointer to pkt in index %u and %u\n", i, desc->next);
274 DEBUG_ASSERT(ndev->pending_tx_packet[i] == NULL);
275 DEBUG_ASSERT(ndev->pending_tx_packet[desc->next] == NULL);
276 ndev->pending_tx_packet[i] = p;
277 ndev->pending_tx_packet[desc->next] = p2;
278
279 /* set up the descriptor pointing to the header */
280 desc->addr = pktbuf_data_phys(p);
281 desc->len = p->dlen;
282 desc->flags |= VRING_DESC_F_NEXT;
283
284 /* set up the descriptor pointing to the buffer */
285 desc = virtio_desc_index_to_desc(vdev, RING_TX, desc->next);
286 desc->addr = pktbuf_data_phys(p2);
287 desc->len = p2->dlen;
288 desc->flags = 0;
289
290 /* submit the transfer */
291 virtio_submit_chain(vdev, RING_TX, i);
292
293 /* kick it off */
294 virtio_kick(vdev, RING_TX);
295
296 spin_unlock_irqrestore(&ndev->lock, state);
297
298 return NO_ERROR;
299 }
300
301 /* variant of the above function that copies the buffer into a pktbuf before sending */
virtio_net_queue_tx(struct virtio_net_dev * ndev,const void * buf,size_t len)302 static status_t virtio_net_queue_tx(struct virtio_net_dev *ndev, const void *buf, size_t len) {
303 DEBUG_ASSERT(ndev);
304 DEBUG_ASSERT(buf);
305
306 pktbuf_t *p = pktbuf_alloc();
307 if (!p)
308 return ERR_NO_MEMORY;
309
310 /* copy the outgoing packet into the pktbuf */
311 p->data = p->buffer;
312 p->dlen = len;
313 memcpy(p->data, buf, len);
314
315 /* call through to the variant of the function that takes a pre-populated pktbuf */
316 status_t err = virtio_net_queue_tx_pktbuf(ndev, p);
317 if (err < 0) {
318 pktbuf_free(p, true);
319 }
320
321 return err;
322 }
323
virtio_net_queue_rx(struct virtio_net_dev * ndev,pktbuf_t * p)324 static status_t virtio_net_queue_rx(struct virtio_net_dev *ndev, pktbuf_t *p) {
325 struct virtio_device *vdev = ndev->dev;
326
327 DEBUG_ASSERT(ndev);
328 DEBUG_ASSERT(p);
329
330 /* point our header to the base of the pktbuf */
331 p->data = p->buffer;
332 struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)p->data;
333 memset(hdr, 0, sizeof(struct virtio_net_hdr) - 2);
334
335 p->dlen = sizeof(struct virtio_net_hdr) - 2 + VIRTIO_NET_MSS;
336
337 spin_lock_saved_state_t state;
338 spin_lock_irqsave(&ndev->lock, state);
339
340 /* allocate a chain of descriptors for our transfer */
341 uint16_t i;
342 struct vring_desc *desc = virtio_alloc_desc_chain(vdev, RING_RX, 1, &i);
343 DEBUG_ASSERT(desc); /* shouldn't be possible not to have a descriptor ready */
344
345 /* save a pointer to our pktbufs for the irq handler to use */
346 DEBUG_ASSERT(ndev->pending_rx_packet[i] == NULL);
347 ndev->pending_rx_packet[i] = p;
348
349 /* set up the descriptor pointing to the header */
350 desc->addr = pktbuf_data_phys(p);
351 desc->len = p->dlen;
352 desc->flags = VRING_DESC_F_WRITE;
353
354 /* submit the transfer */
355 virtio_submit_chain(vdev, RING_RX, i);
356
357 /* kick it off */
358 virtio_kick(vdev, RING_RX);
359
360 spin_unlock_irqrestore(&ndev->lock, state);
361
362 return NO_ERROR;
363 }
364
virtio_net_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)365 static enum handler_return virtio_net_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
366 struct virtio_net_dev *ndev = (struct virtio_net_dev *)dev->priv;
367
368 LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
369
370 spin_lock(&ndev->lock);
371
372 /* parse our descriptor chain, add back to the free queue */
373 uint16_t i = e->id;
374 for (;;) {
375 int next;
376 struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
377
378 if (desc->flags & VRING_DESC_F_NEXT) {
379 next = desc->next;
380 } else {
381 /* end of chain */
382 next = -1;
383 }
384
385 virtio_free_desc(dev, ring, i);
386
387 if (ring == RING_RX) {
388 /* put the freed rx buffer in a queue */
389 pktbuf_t *p = ndev->pending_rx_packet[i];
390 ndev->pending_rx_packet[i] = NULL;
391
392 DEBUG_ASSERT(p);
393 LTRACEF("rx pktbuf %p filled\n", p);
394
395 /* trim the pktbuf according to the written length in the used element descriptor */
396 if (e->len > (sizeof(struct virtio_net_hdr) - 2 + VIRTIO_NET_MSS)) {
397 TRACEF("bad used len on RX %u\n", e->len);
398 p->dlen = 0;
399 } else {
400 p->dlen = e->len;
401 }
402
403 list_add_tail(&ndev->completed_rx_queue, &p->list);
404 } else { // ring == RING_TX
405 /* free the pktbuf associated with the tx packet we just consumed */
406 pktbuf_t *p = ndev->pending_tx_packet[i];
407 ndev->pending_tx_packet[i] = NULL;
408 ndev->tx_pending_count--;
409
410 DEBUG_ASSERT(p);
411 LTRACEF("freeing pktbuf %p\n", p);
412
413 pktbuf_free(p, false);
414 }
415
416 if (next < 0)
417 break;
418 i = next;
419 }
420
421 spin_unlock(&ndev->lock);
422
423 /* if rx ring, signal our event */
424 if (ring == 0) {
425 event_signal(&ndev->rx_event, false);
426 }
427
428 return INT_RESCHEDULE;
429 }
430
virtio_net_rx_worker(void * arg)431 static int virtio_net_rx_worker(void *arg) {
432 struct virtio_net_dev *ndev = (struct virtio_net_dev *)arg;
433
434 for (;;) {
435 event_wait(&ndev->rx_event);
436
437 /* pull some packets from the received queue */
438 for (;;) {
439 spin_lock_saved_state_t state;
440 spin_lock_irqsave(&ndev->lock, state);
441
442 pktbuf_t *p = list_remove_head_type(&ndev->completed_rx_queue, pktbuf_t, list);
443
444 spin_unlock_irqrestore(&ndev->lock, state);
445
446 if (!p)
447 break; /* nothing left in the queue, go back to waiting */
448
449 LTRACEF("got packet len %u\n", p->dlen);
450
451 /* process our packet */
452 struct virtio_net_hdr *hdr = pktbuf_consume(p, sizeof(struct virtio_net_hdr) - 2);
453 if (hdr) {
454 /* call up into the stack */
455 minip_rx_driver_callback(p);
456 }
457
458 /* requeue the pktbuf in the rx queue */
459 virtio_net_queue_rx(ndev, p);
460 }
461 }
462 return 0;
463 }
464
virtio_net_found(void)465 int virtio_net_found(void) {
466 return the_ndev ? 1 : 0;
467 }
468
virtio_net_get_mac_addr(uint8_t mac_addr[6])469 status_t virtio_net_get_mac_addr(uint8_t mac_addr[6]) {
470 if (!the_ndev)
471 return ERR_NOT_FOUND;
472
473 memcpy(mac_addr, the_ndev->config->mac, 6);
474
475 return NO_ERROR;
476 }
477
virtio_net_send_minip_pkt(void * arg,pktbuf_t * p)478 status_t virtio_net_send_minip_pkt(void *arg, pktbuf_t *p) {
479 LTRACEF("p %p, dlen %u, flags 0x%x\n", p, p->dlen, p->flags);
480
481 DEBUG_ASSERT(p && p->dlen);
482
483 if ((p->flags & PKTBUF_FLAG_EOF) == 0) {
484 /* can't handle multi part packets yet */
485 PANIC_UNIMPLEMENTED;
486
487 return ERR_NOT_IMPLEMENTED;
488 }
489
490 /* hand the pktbuf off to the nic, it owns the pktbuf from now on out unless it fails */
491 status_t err = virtio_net_queue_tx_pktbuf(the_ndev, p);
492 if (err < 0) {
493 pktbuf_free(p, true);
494 }
495
496 return err;
497 }
498
499