1 /*
2  * Copyright (c) 2015 Travis Geiselbrecht
3  *
4  * Use of this source code is governed by a MIT-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/MIT
7  */
8 #include <dev/virtio/net.h>
9 
10 #include <stdlib.h>
11 #include <inttypes.h>
12 #include <lk/debug.h>
13 #include <assert.h>
14 #include <lk/trace.h>
15 #include <lk/compiler.h>
16 #include <lk/list.h>
17 #include <string.h>
18 #include <lk/err.h>
19 #include <kernel/thread.h>
20 #include <kernel/event.h>
21 #include <kernel/spinlock.h>
22 #include <lib/pktbuf.h>
23 #include <lib/minip.h>
24 
25 #define LOCAL_TRACE 0
26 
27 struct virtio_net_config {
28     uint8_t mac[6];
29     uint16_t status;
30     uint16_t max_virtqueue_pairs;
31     uint16_t mtu;
32     uint32_t speed;
33     uint8_t  duplex;
34     uint8_t  rss_max_key_size;
35     uint16_t rss_max_indirection_table_length;
36     uint32_t supported_hash_types;
37     uint32_t supported_tunnel_types;
38 };
39 STATIC_ASSERT(sizeof(struct virtio_net_config) == 28);
40 
41 struct virtio_net_hdr {
42 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1
43 #define VIRTIO_NET_HDR_F_DATA_VALID 2
44 #define VIRTIO_NET_HDR_F_RSC_INFO   4
45     uint8_t  flags;
46 #define VIRTIO_NET_HDR_GSO_NONE     0
47 #define VIRTIO_NET_HDR_GSO_TCPV4    1
48 #define VIRTIO_NET_HDR_GSO_UDP      3
49 #define VIRTIO_NET_HDR_GSO_TCPV6    4
50 #define VIRTIO_NET_HDR_GSO_UDP_L4   5
51 #define VIRTIO_NET_HDR_GSO_ECN      0x80
52     uint8_t  gso_type;
53     uint16_t hdr_len;
54     uint16_t gso_size;
55     uint16_t csum_start;
56     uint16_t csum_offset;
57     uint16_t num_buffers; // unused in tx
58 
59     // Only if VIRTIO_NET_HASH_REPORT negotiated
60     //uint32_t hash_value;
61     //uint16_t hash_report;
62     //uint16_t padding_reserved;
63 };
64 STATIC_ASSERT(sizeof(struct virtio_net_hdr) == 12);
65 
66 #define VIRTIO_NET_F_CSUM                   (1<<0)
67 #define VIRTIO_NET_F_GUEST_CSUM             (1<<1)
68 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS    (1<<2)
69 #define VIRTIO_NET_F_MTU                    (1<<3)
70 #define VIRTIO_NET_F_MAC                    (1<<5)
71 #define VIRTIO_NET_F_GSO                    (1<<6) // removed in v1.3
72 #define VIRTIO_NET_F_GUEST_TSO4             (1<<7)
73 #define VIRTIO_NET_F_GUEST_TSO6             (1<<8)
74 #define VIRTIO_NET_F_GUEST_ECN              (1<<9)
75 #define VIRTIO_NET_F_GUEST_UFO              (1<<10)
76 #define VIRTIO_NET_F_HOST_TSO4              (1<<11)
77 #define VIRTIO_NET_F_HOST_TSO6              (1<<12)
78 #define VIRTIO_NET_F_HOST_ECN               (1<<13)
79 #define VIRTIO_NET_F_HOST_UFO               (1<<14)
80 #define VIRTIO_NET_F_MRG_RXBUF              (1<<15)
81 #define VIRTIO_NET_F_STATUS                 (1<<16)
82 #define VIRTIO_NET_F_CTRL_VQ                (1<<17)
83 #define VIRTIO_NET_F_CTRL_RX                (1<<18)
84 #define VIRTIO_NET_F_CTRL_VLAN              (1<<19)
85 #define VIRTIO_NET_F_GUEST_ANNOUNCE         (1<<21)
86 #define VIRTIO_NET_F_MQ                     (1<<22)
87 #define VIRTIO_NET_F_CTRL_MAC_ADDR          (1<<23)
88 #define VIRTIO_NET_F_HASH_TUNNEL            (1ULL<<51)
89 #define VIRTIO_NET_F_VQ_NOTF_COAL           (1ULL<<52)
90 #define VIRTIO_NET_F_NOTF_COAL              (1ULL<<53)
91 #define VIRTIO_NET_F_GUEST_USO4             (1ULL<<54)
92 #define VIRTIO_NET_F_GUEST_USO6             (1ULL<<55)
93 #define VIRTIO_NET_F_HOST_USO               (1ULL<<56)
94 #define VIRTIO_NET_F_HASH_REPORT            (1ULL<<57)
95 #define VIRTIO_NET_F_GUEST_HDRLEN           (1ULL<<59)
96 #define VIRTIO_NET_F_RSS                    (1ULL<<60)
97 #define VIRTIO_NET_F_RSC_EXT                (1ULL<<61)
98 #define VIRTIO_NET_F_STANDBY                (1ULL<<62)
99 #define VIRTIO_NET_F_SPEED_DUPLEX           (1ULL<<63)
100 
101 #define VIRTIO_NET_S_LINK_UP                (1<<0)
102 #define VIRTIO_NET_S_ANNOUNCE               (1<<1)
103 
104 #define TX_RING_SIZE 16
105 #define RX_RING_SIZE 16
106 
107 #define RING_RX 0
108 #define RING_TX 1
109 
110 #define VIRTIO_NET_MSS 1514
111 
112 struct virtio_net_dev {
113     struct virtio_device *dev;
114     bool started;
115 
116     struct virtio_net_config *config;
117 
118     spin_lock_t lock;
119     event_t rx_event;
120 
121     /* list of active tx/rx packets to be freed at irq time */
122     pktbuf_t *pending_tx_packet[TX_RING_SIZE];
123     pktbuf_t *pending_rx_packet[RX_RING_SIZE];
124 
125     uint tx_pending_count;
126     struct list_node completed_rx_queue;
127 };
128 
129 static enum handler_return virtio_net_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
130 static int virtio_net_rx_worker(void *arg);
131 static status_t virtio_net_queue_rx(struct virtio_net_dev *ndev, pktbuf_t *p);
132 
133 // XXX remove need for this
134 static struct virtio_net_dev *the_ndev;
135 
dump_feature_bits(uint64_t feature)136 static void dump_feature_bits(uint64_t feature) {
137     printf("virtio-net host features (%#" PRIx64 "):", feature);
138     if (feature & VIRTIO_NET_F_CSUM) printf(" CSUM");
139     if (feature & VIRTIO_NET_F_GUEST_CSUM) printf(" GUEST_CSUM");
140     if (feature & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) printf(" CTRL_GUEST_OFFLOADS");
141     if (feature & VIRTIO_NET_F_MTU) printf(" MTU");
142     if (feature & VIRTIO_NET_F_MAC) printf(" MAC");
143     if (feature & VIRTIO_NET_F_GSO) printf(" GSO");
144     if (feature & VIRTIO_NET_F_GUEST_TSO4) printf(" GUEST_TSO4");
145     if (feature & VIRTIO_NET_F_GUEST_TSO6) printf(" GUEST_TSO6");
146     if (feature & VIRTIO_NET_F_GUEST_ECN) printf(" GUEST_ECN");
147     if (feature & VIRTIO_NET_F_GUEST_UFO) printf(" GUEST_UFO");
148     if (feature & VIRTIO_NET_F_HOST_TSO4) printf(" HOST_TSO4");
149     if (feature & VIRTIO_NET_F_HOST_TSO6) printf(" HOST_TSO6");
150     if (feature & VIRTIO_NET_F_HOST_ECN) printf(" HOST_ECN");
151     if (feature & VIRTIO_NET_F_HOST_UFO) printf(" HOST_UFO");
152     if (feature & VIRTIO_NET_F_MRG_RXBUF) printf(" MRG_RXBUF");
153     if (feature & VIRTIO_NET_F_STATUS) printf(" STATUS");
154     if (feature & VIRTIO_NET_F_CTRL_VQ) printf(" CTRL_VQ");
155     if (feature & VIRTIO_NET_F_CTRL_RX) printf(" CTRL_RX");
156     if (feature & VIRTIO_NET_F_CTRL_VLAN) printf(" CTRL_VLAN");
157     if (feature & VIRTIO_NET_F_GUEST_ANNOUNCE) printf(" GUEST_ANNOUNCE");
158     if (feature & VIRTIO_NET_F_MQ) printf(" MQ");
159     if (feature & VIRTIO_NET_F_CTRL_MAC_ADDR) printf(" CTRL_MAC_ADDR");
160     if (feature & VIRTIO_NET_F_HASH_TUNNEL) printf(" HASH_TUNNEL");
161     if (feature & VIRTIO_NET_F_VQ_NOTF_COAL) printf(" VQ_NOTF_COAL");
162     if (feature & VIRTIO_NET_F_NOTF_COAL) printf(" NOTF_COAL");
163     if (feature & VIRTIO_NET_F_GUEST_USO4) printf(" GUEST_USO4");
164     if (feature & VIRTIO_NET_F_GUEST_USO6) printf(" GUEST_USO6");
165     if (feature & VIRTIO_NET_F_HOST_USO) printf(" HOST_USO");
166     if (feature & VIRTIO_NET_F_HASH_REPORT) printf(" HASH_REPORT");
167     if (feature & VIRTIO_NET_F_GUEST_HDRLEN) printf(" GUEST_HDRLEN");
168     if (feature & VIRTIO_NET_F_RSS) printf(" RSS");
169     if (feature & VIRTIO_NET_F_RSC_EXT) printf(" RSC_EXT");
170     if (feature & VIRTIO_NET_F_STANDBY) printf(" STANDBY");
171     if (feature & VIRTIO_NET_F_SPEED_DUPLEX) printf(" SPEED_DUPLEX");
172     printf("\n");
173 }
174 
virtio_net_init(struct virtio_device * dev)175 status_t virtio_net_init(struct virtio_device *dev) {
176     LTRACEF("dev %p\n", dev);
177 
178     /* allocate a new net device */
179     struct virtio_net_dev *ndev = calloc(1, sizeof(struct virtio_net_dev));
180     if (!ndev)
181         return ERR_NO_MEMORY;
182 
183     ndev->dev = dev;
184     dev->priv = ndev;
185     ndev->started = false;
186 
187     ndev->lock = SPIN_LOCK_INITIAL_VALUE;
188     event_init(&ndev->rx_event, false, EVENT_FLAG_AUTOUNSIGNAL);
189     list_initialize(&ndev->completed_rx_queue);
190 
191     ndev->config = (struct virtio_net_config *)dev->config_ptr;
192 
193     /* ack and set the driver status bit */
194     virtio_status_acknowledge_driver(dev);
195 
196     // XXX check features bits and ack/nak them
197     uint64_t host_features = virtio_read_host_feature_word(dev, 0) | (uint64_t)virtio_read_host_feature_word(dev, 1) << 32;
198     dump_feature_bits(host_features);
199 
200     /* set our irq handler */
201     dev->irq_driver_callback = &virtio_net_irq_driver_callback;
202 
203     /* set DRIVER_OK */
204     virtio_status_driver_ok(dev);
205 
206     /* allocate a pair of virtio rings */
207     virtio_alloc_ring(dev, RING_RX, RX_RING_SIZE); // rx
208     virtio_alloc_ring(dev, RING_TX, TX_RING_SIZE); // tx
209 
210     the_ndev = ndev;
211 
212     return NO_ERROR;
213 }
214 
virtio_net_start(void)215 status_t virtio_net_start(void) {
216     if (the_ndev->started)
217         return ERR_ALREADY_STARTED;
218 
219     the_ndev->started = true;
220 
221     /* start the rx worker thread */
222     thread_resume(thread_create("virtio_net_rx", &virtio_net_rx_worker, (void *)the_ndev, HIGH_PRIORITY, DEFAULT_STACK_SIZE));
223 
224     /* queue up a bunch of rxes */
225     for (uint i = 0; i < RX_RING_SIZE - 1; i++) {
226         pktbuf_t *p = pktbuf_alloc();
227         if (p) {
228             virtio_net_queue_rx(the_ndev, p);
229         }
230     }
231 
232     return NO_ERROR;
233 }
234 
virtio_net_queue_tx_pktbuf(struct virtio_net_dev * ndev,pktbuf_t * p2)235 static status_t virtio_net_queue_tx_pktbuf(struct virtio_net_dev *ndev, pktbuf_t *p2) {
236     struct virtio_device *vdev = ndev->dev;
237 
238     uint16_t i;
239     pktbuf_t *p;
240 
241     DEBUG_ASSERT(ndev);
242 
243     p = pktbuf_alloc();
244     if (!p)
245         return ERR_NO_MEMORY;
246 
247     /* point our header to the base of the first pktbuf */
248     struct virtio_net_hdr *hdr = pktbuf_append(p, sizeof(struct virtio_net_hdr) - 2);
249     memset(hdr, 0, p->dlen);
250 
251     spin_lock_saved_state_t state;
252     spin_lock_irqsave(&ndev->lock, state);
253 
254     /* only queue if we have enough tx descriptors */
255     if (ndev->tx_pending_count + 2 > TX_RING_SIZE)
256         goto nodesc;
257 
258     /* allocate a chain of descriptors for our transfer */
259     struct vring_desc *desc = virtio_alloc_desc_chain(vdev, RING_TX, 2, &i);
260     if (!desc) {
261         spin_unlock_irqrestore(&ndev->lock, state);
262 
263 nodesc:
264         TRACEF("out of virtio tx descriptors, tx_pending_count %u\n", ndev->tx_pending_count);
265         pktbuf_free(p, true);
266 
267         return ERR_NO_MEMORY;
268     }
269 
270     ndev->tx_pending_count += 2;
271 
272     /* save a pointer to our pktbufs for the irq handler to free */
273     LTRACEF("saving pointer to pkt in index %u and %u\n", i, desc->next);
274     DEBUG_ASSERT(ndev->pending_tx_packet[i] == NULL);
275     DEBUG_ASSERT(ndev->pending_tx_packet[desc->next] == NULL);
276     ndev->pending_tx_packet[i] = p;
277     ndev->pending_tx_packet[desc->next] = p2;
278 
279     /* set up the descriptor pointing to the header */
280     desc->addr = pktbuf_data_phys(p);
281     desc->len = p->dlen;
282     desc->flags |= VRING_DESC_F_NEXT;
283 
284     /* set up the descriptor pointing to the buffer */
285     desc = virtio_desc_index_to_desc(vdev, RING_TX, desc->next);
286     desc->addr = pktbuf_data_phys(p2);
287     desc->len = p2->dlen;
288     desc->flags = 0;
289 
290     /* submit the transfer */
291     virtio_submit_chain(vdev, RING_TX, i);
292 
293     /* kick it off */
294     virtio_kick(vdev, RING_TX);
295 
296     spin_unlock_irqrestore(&ndev->lock, state);
297 
298     return NO_ERROR;
299 }
300 
301 /* variant of the above function that copies the buffer into a pktbuf before sending */
virtio_net_queue_tx(struct virtio_net_dev * ndev,const void * buf,size_t len)302 static status_t virtio_net_queue_tx(struct virtio_net_dev *ndev, const void *buf, size_t len) {
303     DEBUG_ASSERT(ndev);
304     DEBUG_ASSERT(buf);
305 
306     pktbuf_t *p = pktbuf_alloc();
307     if (!p)
308         return ERR_NO_MEMORY;
309 
310     /* copy the outgoing packet into the pktbuf */
311     p->data = p->buffer;
312     p->dlen = len;
313     memcpy(p->data, buf, len);
314 
315     /* call through to the variant of the function that takes a pre-populated pktbuf */
316     status_t err = virtio_net_queue_tx_pktbuf(ndev, p);
317     if (err < 0) {
318         pktbuf_free(p, true);
319     }
320 
321     return err;
322 }
323 
virtio_net_queue_rx(struct virtio_net_dev * ndev,pktbuf_t * p)324 static status_t virtio_net_queue_rx(struct virtio_net_dev *ndev, pktbuf_t *p) {
325     struct virtio_device *vdev = ndev->dev;
326 
327     DEBUG_ASSERT(ndev);
328     DEBUG_ASSERT(p);
329 
330     /* point our header to the base of the pktbuf */
331     p->data = p->buffer;
332     struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)p->data;
333     memset(hdr, 0, sizeof(struct virtio_net_hdr) - 2);
334 
335     p->dlen = sizeof(struct virtio_net_hdr) - 2 + VIRTIO_NET_MSS;
336 
337     spin_lock_saved_state_t state;
338     spin_lock_irqsave(&ndev->lock, state);
339 
340     /* allocate a chain of descriptors for our transfer */
341     uint16_t i;
342     struct vring_desc *desc = virtio_alloc_desc_chain(vdev, RING_RX, 1, &i);
343     DEBUG_ASSERT(desc); /* shouldn't be possible not to have a descriptor ready */
344 
345     /* save a pointer to our pktbufs for the irq handler to use */
346     DEBUG_ASSERT(ndev->pending_rx_packet[i] == NULL);
347     ndev->pending_rx_packet[i] = p;
348 
349     /* set up the descriptor pointing to the header */
350     desc->addr = pktbuf_data_phys(p);
351     desc->len = p->dlen;
352     desc->flags = VRING_DESC_F_WRITE;
353 
354     /* submit the transfer */
355     virtio_submit_chain(vdev, RING_RX, i);
356 
357     /* kick it off */
358     virtio_kick(vdev, RING_RX);
359 
360     spin_unlock_irqrestore(&ndev->lock, state);
361 
362     return NO_ERROR;
363 }
364 
virtio_net_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)365 static enum handler_return virtio_net_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
366     struct virtio_net_dev *ndev = (struct virtio_net_dev *)dev->priv;
367 
368     LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
369 
370     spin_lock(&ndev->lock);
371 
372     /* parse our descriptor chain, add back to the free queue */
373     uint16_t i = e->id;
374     for (;;) {
375         int next;
376         struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
377 
378         if (desc->flags & VRING_DESC_F_NEXT) {
379             next = desc->next;
380         } else {
381             /* end of chain */
382             next = -1;
383         }
384 
385         virtio_free_desc(dev, ring, i);
386 
387         if (ring == RING_RX) {
388             /* put the freed rx buffer in a queue */
389             pktbuf_t *p = ndev->pending_rx_packet[i];
390             ndev->pending_rx_packet[i] = NULL;
391 
392             DEBUG_ASSERT(p);
393             LTRACEF("rx pktbuf %p filled\n", p);
394 
395             /* trim the pktbuf according to the written length in the used element descriptor */
396             if (e->len > (sizeof(struct virtio_net_hdr) - 2 + VIRTIO_NET_MSS)) {
397                 TRACEF("bad used len on RX %u\n", e->len);
398                 p->dlen = 0;
399             } else {
400                 p->dlen = e->len;
401             }
402 
403             list_add_tail(&ndev->completed_rx_queue, &p->list);
404         } else { // ring == RING_TX
405             /* free the pktbuf associated with the tx packet we just consumed */
406             pktbuf_t *p = ndev->pending_tx_packet[i];
407             ndev->pending_tx_packet[i] = NULL;
408             ndev->tx_pending_count--;
409 
410             DEBUG_ASSERT(p);
411             LTRACEF("freeing pktbuf %p\n", p);
412 
413             pktbuf_free(p, false);
414         }
415 
416         if (next < 0)
417             break;
418         i = next;
419     }
420 
421     spin_unlock(&ndev->lock);
422 
423     /* if rx ring, signal our event */
424     if (ring == 0) {
425         event_signal(&ndev->rx_event, false);
426     }
427 
428     return INT_RESCHEDULE;
429 }
430 
virtio_net_rx_worker(void * arg)431 static int virtio_net_rx_worker(void *arg) {
432     struct virtio_net_dev *ndev = (struct virtio_net_dev *)arg;
433 
434     for (;;) {
435         event_wait(&ndev->rx_event);
436 
437         /* pull some packets from the received queue */
438         for (;;) {
439             spin_lock_saved_state_t state;
440             spin_lock_irqsave(&ndev->lock, state);
441 
442             pktbuf_t *p = list_remove_head_type(&ndev->completed_rx_queue, pktbuf_t, list);
443 
444             spin_unlock_irqrestore(&ndev->lock, state);
445 
446             if (!p)
447                 break; /* nothing left in the queue, go back to waiting */
448 
449             LTRACEF("got packet len %u\n", p->dlen);
450 
451             /* process our packet */
452             struct virtio_net_hdr *hdr = pktbuf_consume(p, sizeof(struct virtio_net_hdr) - 2);
453             if (hdr) {
454                 /* call up into the stack */
455                 minip_rx_driver_callback(p);
456             }
457 
458             /* requeue the pktbuf in the rx queue */
459             virtio_net_queue_rx(ndev, p);
460         }
461     }
462     return 0;
463 }
464 
virtio_net_found(void)465 int virtio_net_found(void) {
466     return the_ndev ? 1 : 0;
467 }
468 
virtio_net_get_mac_addr(uint8_t mac_addr[6])469 status_t virtio_net_get_mac_addr(uint8_t mac_addr[6]) {
470     if (!the_ndev)
471         return ERR_NOT_FOUND;
472 
473     memcpy(mac_addr, the_ndev->config->mac, 6);
474 
475     return NO_ERROR;
476 }
477 
virtio_net_send_minip_pkt(void * arg,pktbuf_t * p)478 status_t virtio_net_send_minip_pkt(void *arg, pktbuf_t *p) {
479     LTRACEF("p %p, dlen %u, flags 0x%x\n", p, p->dlen, p->flags);
480 
481     DEBUG_ASSERT(p && p->dlen);
482 
483     if ((p->flags & PKTBUF_FLAG_EOF) == 0) {
484         /* can't handle multi part packets yet */
485         PANIC_UNIMPLEMENTED;
486 
487         return ERR_NOT_IMPLEMENTED;
488     }
489 
490     /* hand the pktbuf off to the nic, it owns the pktbuf from now on out unless it fails */
491     status_t err = virtio_net_queue_tx_pktbuf(the_ndev, p);
492     if (err < 0) {
493         pktbuf_free(p, true);
494     }
495 
496     return err;
497 }
498 
499