1 //
2 // Copyright (c) 2021 Travis Geiselbrecht
3 //
4 // Use of this source code is governed by a MIT-style
5 // license that can be found in the LICENSE file or at
6 // https://opensource.org/licenses/MIT
7
8 #include <arch/atomic.h>
9 #include <lk/init.h>
10 #include <lk/err.h>
11 #include <lk/cpp.h>
12 #include <lk/trace.h>
13 #include <lk/list.h>
14 #include <dev/bus/pci.h>
15 #include <kernel/event.h>
16 #include <kernel/thread.h>
17 #include <kernel/vm.h>
18 #include <lib/minip.h>
19 #include <lib/pktbuf.h>
20 #include <string.h>
21 #include <platform/interrupts.h>
22 #include <type_traits>
23
24 #include "e1000_hw.h"
25
26 #define LOCAL_TRACE 0
27
28 class e1000;
29 static e1000 *the_e; // XXX hack to remember the first e1000 seen and use for minip
30
31 // list of known 8086:x e1000 devices to match against
32 struct e1000_id_features {
33 uint16_t id;
34 bool e1000e;
35 };
36
37 const e1000_id_features e1000_ids[] = {
38 { 0x100c, false }, // 82544GC QEMU 'e1000-82544gc'
39 { 0x100e, false }, // 82540EM QEMU 'e1000'
40 { 0x100f, false }, // 82545EM QEMU 'e1000-82544em'
41 { 0x10d3, true }, // 82574L QEMU 'e1000e'
42 { 0x1533, true }, // i210
43 };
44
45 // i210 ids
46 // 0x1533
47 // 0x1536
48 // 0x1537
49 // 0x1538
50
51 // i219 ids
52 // 0x156f
53 // 0x1570
54 // soc integrated versions?
55 // 0x1a1c // i219-LM (17)
56 // 0x1a1d // i219-V (17)
57 // 0x1a1e // i219-LM (16)
58 // 0x1a1f // i219-V (16)
59
60
61 class e1000 {
62 public:
63 e1000();
64 ~e1000();
65
66 status_t init_device(pci_location_t loc, const e1000_id_features *id);
67
68 int tx(pktbuf_t *p);
69
is_e1000e() const70 bool is_e1000e() const { return id_feat_->e1000e; }
71
mac_addr() const72 const uint8_t *mac_addr() const { return mac_addr_; }
73
74 private:
75 static const size_t rxring_len = 64;
76 static const size_t txring_len = 64;
77 static const size_t rxbuffer_len = 2048;
78
79 uint32_t read_reg(e1000_reg reg);
80 void write_reg(e1000_reg reg, uint32_t val);
81 uint16_t read_eeprom(uint8_t offset);
82
83 handler_return irq_handler();
84
85 void add_pktbuf_to_rxring(pktbuf_t *pkt);
86 void add_pktbuf_to_rxring_locked(pktbuf_t *pkt);
87
88 // counter of configured deices
89 static volatile int global_count_;
90 int unit_ = 0;
91
92 // main spinlock
93 spin_lock_t lock_ = SPIN_LOCK_INITIAL_VALUE;
94
95 // configuration
96 pci_location_t loc_ = {};
97 void *bar0_regs_ = nullptr;
98 uint8_t mac_addr_[6] = {};
99 const e1000_id_features *id_feat_ = nullptr;
100
101 // rx ring
102 rdesc *rxring_ = nullptr;
103 uint32_t rx_last_head_ = 0;
104 uint32_t rx_tail_ = 0;
105 pktbuf_t *rx_pktbuf_[rxring_len] = {};
106 uint8_t *rx_buf_ = nullptr; // rxbuffer_len * rxring_len byte buffer that rx_pktbuf[] points to
107
108 // rx worker thread
109 list_node rx_queue_ = LIST_INITIAL_VALUE(rx_queue_);
110 event_t rx_event_ = EVENT_INITIAL_VALUE(rx_event_, 0, EVENT_FLAG_AUTOUNSIGNAL);
111 thread_t *rx_worker_thread_ = nullptr;
112 int rx_worker_routine();
113
114 // tx ring
115 tdesc *txring_ = nullptr;
116 uint32_t tx_last_head_ = 0;
117 uint32_t tx_tail_ = 0;
118 pktbuf_t *tx_pktbuf_[txring_len] = {};
119 };
120
read_reg(e1000_reg reg)121 uint32_t e1000::read_reg(e1000_reg reg) {
122 volatile uint32_t *r = (volatile uint32_t *)((uintptr_t)bar0_regs_ + (size_t)reg);
123
124 return *r;
125 }
126
write_reg(e1000_reg reg,uint32_t val)127 void e1000::write_reg(e1000_reg reg, uint32_t val) {
128 volatile uint32_t *r = (volatile uint32_t *)((uintptr_t)bar0_regs_ + (size_t)reg);
129
130 *r = val;
131 }
132
read_eeprom(uint8_t offset)133 uint16_t e1000::read_eeprom(uint8_t offset) {
134 // 8257x+ seems to have a different EERD layout
135 uint32_t val;
136 if (is_e1000e()) {
137 write_reg(e1000_reg::EERD, (offset << 2) | 0x1); // data + start bit
138
139 // spin while bit 1 (DONE) is clear
140 while (((val = read_reg(e1000_reg::EERD)) & (1<<1)) == 0)
141 ;
142 } else {
143 write_reg(e1000_reg::EERD, (offset << 8) | 0x1); // data + start bit
144
145 // spin while bit 4 (DONE) is clear
146 while (((val = read_reg(e1000_reg::EERD)) & (1<<4)) == 0)
147 ;
148 }
149 return val >> 16;
150 }
151
152 volatile int e1000::global_count_ = 0;
153
154 e1000::e1000() = default;
~e1000()155 e1000::~e1000() {
156 // TODO: free resources
157 }
158
irq_handler()159 handler_return e1000::irq_handler() {
160 // read the interrupt cause register, which also auto clears all bits
161 auto icr = read_reg(e1000_reg::ICR);
162 if (!icr) {
163 return INT_NO_RESCHEDULE;
164 }
165
166 LTRACEF("icr %#x\n", icr);
167
168 AutoSpinLockNoIrqSave guard(&lock_);
169
170 handler_return ret = INT_NO_RESCHEDULE;
171
172 if (icr & (1<<0)) { // TXDW - transmit descriptor written back
173 PANIC_UNIMPLEMENTED;
174 }
175 if (icr & (1<<1)) { // TXQE - transmit queue empty
176 //PANIC_UNIMPLEMENTED;
177 // nothing to really do here
178 }
179 if (icr & (1<<6)) {
180 printf("e1000: RX OVERRUN\n");
181 }
182 if (icr & (1<<7)) { // RXTO - rx timer interrupt
183 // rx timer fired, packets are probably ready
184 auto rdh = read_reg(e1000_reg::RDH);
185 auto rdt = read_reg(e1000_reg::RDT);
186
187 while (rx_last_head_ != rdh) {
188 // copy the current rx descriptor locally for better cache performance
189 rdesc rxd;
190 copy(&rxd, rxring_ + rx_last_head_);
191
192 LTRACEF("last_head %#x RDH %#x RDT %#x\n", rx_last_head_, rdh, rdt);
193 if (LOCAL_TRACE) rxd.dump();
194
195 // recover the pktbuf we queued in this spot
196 DEBUG_ASSERT(rx_pktbuf_[rx_last_head_]);
197 DEBUG_ASSERT(pktbuf_data_phys(rx_pktbuf_[rx_last_head_]) == rxd.addr);
198 pktbuf_t *pkt = rx_pktbuf_[rx_last_head_];
199
200 bool consumed_pkt = false;
201 if (rxd.status & (1 << 0)) { // descriptor done, we own it now
202 if (rxd.status & (1<<1)) { // end of packet
203 if (rxd.errors == 0) {
204 // good packet, trim data len according to the rx descriptor
205 pkt->dlen = rxd.length;
206 pkt->flags |= PKTBUF_FLAG_EOF; // just to make sure
207
208 // queue it in the rx queue
209 list_add_tail(&rx_queue_, &pkt->list);
210
211 // wake up the rx worker
212 event_signal(&rx_event_, false);
213 ret = INT_RESCHEDULE;
214 consumed_pkt = true;
215 }
216 }
217 }
218 if (!consumed_pkt) {
219 // TODO: return the pkt to the ring
220 add_pktbuf_to_rxring_locked(pkt);
221 }
222
223 rx_last_head_ = (rx_last_head_ + 1) % rxring_len;
224 }
225 }
226 return ret;
227 }
228
rx_worker_routine()229 int e1000::rx_worker_routine() {
230 for (;;) {
231 event_wait(&rx_event_);
232
233 // pull some packets from the received queue
234 for (;;) {
235 pktbuf_t *p;
236
237 {
238 AutoSpinLock guard(&lock_);
239
240 p = list_remove_head_type(&rx_queue_, pktbuf_t, list);
241 }
242
243 if (!p) {
244 break; // nothing left in the queue, go back to waiting
245 }
246
247 if (LOCAL_TRACE) {
248 LTRACEF("got packet: ");
249 pktbuf_dump(p);
250 }
251
252 // push it up the stack
253 minip_rx_driver_callback(p);
254
255 // we own the pktbuf again
256
257 // set the data pointer to the start of the buffer and set dlen to 0
258 pktbuf_reset(p, 0);
259
260 // add it back to the rx ring at the current tail
261 add_pktbuf_to_rxring(p);
262 }
263 }
264
265 return 0;
266 }
267
tx(pktbuf_t * p)268 int e1000::tx(pktbuf_t *p) {
269 LTRACE;
270 if (LOCAL_TRACE) {
271 pktbuf_dump(p);
272 }
273
274 // build a tx descriptor and stuff it in the tx ring
275 tdesc td = {};
276 td.addr = pktbuf_data_phys(p);
277 td.length = p->dlen;
278 td.cmd = (1<<0); // end of packet (EOP)
279 copy(&txring_[tx_tail_], &td);
280
281 // save a copy of the pktbuf in our list
282 tx_pktbuf_[tx_tail_] = p;
283
284 // bump tail forward
285 tx_tail_ = (tx_tail_ + 1) % txring_len;
286 write_reg(e1000_reg::TDT, tx_tail_);
287
288 LTRACEF("TDH %#x TDT %#x\n", read_reg(e1000_reg::TDH), read_reg(e1000_reg::TDT));
289
290 return NO_ERROR;
291 }
292
add_pktbuf_to_rxring_locked(pktbuf_t * p)293 void e1000::add_pktbuf_to_rxring_locked(pktbuf_t *p) {
294 DEBUG_ASSERT(p);
295 DEBUG_ASSERT(p->dlen == 0);
296 DEBUG_ASSERT(p->blen == rxbuffer_len);
297
298 // add it to the next rxring entry at the tail
299 rdesc rd = {};
300 rd.addr = pktbuf_data_phys(p);
301 copy(&rxring_[rx_tail_], &rd);
302
303 // save a copy of the pktbuf in our list
304 rx_pktbuf_[rx_tail_] = p;
305
306 // bump tail forward
307 rx_tail_ = (rx_tail_ + 1) % rxring_len;
308 write_reg(e1000_reg::RDT, rx_tail_);
309
310 LTRACEF("after RDH %#x RDT %#x\n", read_reg(e1000_reg::RDH), read_reg(e1000_reg::RDT));
311 }
312
add_pktbuf_to_rxring(pktbuf_t * pkt)313 void e1000::add_pktbuf_to_rxring(pktbuf_t *pkt) {
314 AutoSpinLock guard(&lock_);
315
316 add_pktbuf_to_rxring_locked(pkt);
317 }
318
init_device(pci_location_t loc,const e1000_id_features * id)319 status_t e1000::init_device(pci_location_t loc, const e1000_id_features *id) {
320 loc_ = loc;
321 id_feat_ = id;
322 char str[32];
323
324 LTRACEF("pci location %s\n", pci_loc_string(loc_, str));
325
326 pci_bar_t bars[6];
327 status_t err = pci_bus_mgr_read_bars(loc_, bars);
328 if (err != NO_ERROR) return err;
329
330 LTRACEF("e1000 BARS:\n");
331 if (LOCAL_TRACE) pci_dump_bars(bars, 6);
332
333 if (!bars[0].valid || bars[0].addr == 0) {
334 return ERR_NOT_FOUND;
335 }
336
337 // allocate a unit number
338 unit_ = atomic_add(&global_count_, 1);
339
340 // map bar 0, main memory mapped register interface, 128KB
341 snprintf(str, sizeof(str), "e1000 %d bar0", unit_);
342 err = vmm_alloc_physical(vmm_get_kernel_aspace(), str, 128*1024, &bar0_regs_, 0,
343 bars[0].addr, /* vmm_flags */ 0, ARCH_MMU_FLAG_UNCACHED_DEVICE);
344 if (err != NO_ERROR) {
345 return ERR_NOT_FOUND;
346 }
347
348 LTRACEF("bar 0 regs mapped to %p\n", bar0_regs_);
349
350 pci_bus_mgr_enable_device(loc_);
351
352 // read the mac address out of the eeprom
353 uint16_t tmp;
354 tmp = read_eeprom(0);
355 mac_addr_[0] = tmp & 0xff;
356 mac_addr_[1] = tmp >> 8;
357 tmp = read_eeprom(1);
358 mac_addr_[2] = tmp & 0xff;
359 mac_addr_[3] = tmp >> 8;
360 tmp = read_eeprom(2);
361 mac_addr_[4] = tmp & 0xff;
362 mac_addr_[5] = tmp >> 8;
363
364 printf("e1000 %d: mac address %02x:%02x:%02x:%02x:%02x:%02x\n", unit_, mac_addr_[0], mac_addr_[1], mac_addr_[2],
365 mac_addr_[3], mac_addr_[4], mac_addr_[5]);
366
367 // allocate and map space for the rx and tx ring
368 snprintf(str, sizeof(str), "e1000 %d rxring", unit_);
369 err = vmm_alloc_contiguous(vmm_get_kernel_aspace(), str, rxring_len * sizeof(rdesc), (void **)&rxring_, 0, 0, ARCH_MMU_FLAG_UNCACHED);
370 if (err != NO_ERROR) {
371 return ERR_NOT_FOUND;
372 }
373 memset(rxring_, 0, rxring_len * sizeof(rdesc));
374
375 paddr_t rxring_phys = vaddr_to_paddr(rxring_);
376 LTRACEF("rx ring at %p, physical %#lx\n", rxring_, rxring_phys);
377
378 snprintf(str, sizeof(str), "e1000 %d txring", unit_);
379 err = vmm_alloc_contiguous(vmm_get_kernel_aspace(), str, txring_len * sizeof(tdesc), (void **)&txring_, 0, 0, ARCH_MMU_FLAG_UNCACHED);
380 if (err != NO_ERROR) {
381 return ERR_NOT_FOUND;
382 }
383 memset(txring_, 0, txring_len * sizeof(rdesc));
384
385 paddr_t txring_phys = vaddr_to_paddr(txring_);
386 LTRACEF("tx ring at %p, physical %#lx\n", txring_, txring_phys);
387
388 // allocate a large array of contiguous buffers to receive into
389 snprintf(str, sizeof(str), "e1000 %d rx buffers", unit_);
390 err = vmm_alloc_contiguous(vmm_get_kernel_aspace(), str, rxring_len * rxbuffer_len, (void **)&rx_buf_, 0, 0, 0);
391 if (err != NO_ERROR) {
392 return ERR_NOT_FOUND;
393 }
394
395 // mask all IRQs
396 write_reg(e1000_reg::IMC, 0xffff);
397
398 // qemus 82574 emulation seems to want IAME to be set to auto-clear ICR bits.
399 if (is_e1000e()) {
400 auto ctrl_ext = read_reg(e1000_reg::CTL_EXT);
401 write_reg(e1000_reg::CTL_EXT, ctrl_ext | (1<<27)); // IAME - interrupt ack auto-mask
402 write_reg(e1000_reg::IAM, 0); // set such that no IMS bits are auto cleared
403 }
404
405 // set the interrupt treshold reg
406 const uint32_t irq_rate = 10000; // max 10k irqs/sec
407 write_reg(e1000_reg::ITR, 1000000 / irq_rate * 4);
408 if (is_e1000e()) {
409 write_reg(e1000_reg::EITR0, 1000000 / irq_rate * 4);
410 write_reg(e1000_reg::EITR1, 1000000 / irq_rate * 4);
411 write_reg(e1000_reg::EITR2, 1000000 / irq_rate * 4);
412 write_reg(e1000_reg::EITR3, 1000000 / irq_rate * 4);
413 write_reg(e1000_reg::EITR4, 1000000 / irq_rate * 4);
414 }
415
416 // disable tx and rx
417 write_reg(e1000_reg::RCTL, 0);
418 write_reg(e1000_reg::TCTL, 0);
419
420 // irq handler lambda to get to inner method
421 auto irq_handler_wrapper = [](void *arg) -> handler_return {
422 e1000 *e = (e1000 *)arg;
423 return e->irq_handler();
424 };
425
426 // allocate a MSI interrupt
427 uint irq_base;
428 err = pci_bus_mgr_allocate_msi(loc_, 1, &irq_base);
429 if (err != NO_ERROR) {
430 // fall back to regular IRQs
431 err = pci_bus_mgr_allocate_irq(loc_, &irq_base);
432 if (err != NO_ERROR) {
433 printf("e1000: unable to allocate IRQ\n");
434 return err;
435 }
436 register_int_handler(irq_base, irq_handler_wrapper, this);
437 } else {
438 register_int_handler_msi(irq_base, irq_handler_wrapper, this, true);
439 }
440 LTRACEF("IRQ number %#x\n", irq_base);
441
442 unmask_interrupt(irq_base);
443
444 // set up the rx ring
445 write_reg(e1000_reg::RDBAL, rxring_phys & 0xffffffff);
446 #if __INTPTR_WIDTH__ == 64
447 write_reg(e1000_reg::RDBAH, rxring_phys >> 32);
448 #else
449 write_reg(e1000_reg::RDBAH, 0);
450 #endif
451 write_reg(e1000_reg::RDLEN, rxring_len * sizeof(rdesc));
452 // set head and tail to 0
453 write_reg(e1000_reg::RDH, 0);
454 write_reg(e1000_reg::RDT, 0);
455
456 // disable receive delay timer and absolute delay timer
457 write_reg(e1000_reg::RDTR, 0);
458 write_reg(e1000_reg::RADV, 0);
459 // disable small packet detect
460 write_reg(e1000_reg::RSRPD, 0);
461
462 // set up the flow control thresholds
463 write_reg(e1000_reg::FCRTL, 0);
464 write_reg(e1000_reg::FCRTH, 0);
465
466 // fill the rx ring with pktbufs
467 rx_last_head_ = read_reg(e1000_reg::RDH);
468 rx_tail_ = read_reg(e1000_reg::RDT);
469 for (size_t i = 0; i < rxring_len - 1; i++) {
470 // construct a 2K pktbuf, pointing outo our rx_buf_ block of memory
471 auto *pkt = pktbuf_alloc_empty();
472 if (!pkt) {
473 break;
474 }
475 pktbuf_add_buffer(pkt, rx_buf_ + i * rxbuffer_len, rxbuffer_len, 0, 0, nullptr, nullptr);
476
477 add_pktbuf_to_rxring_locked(pkt);
478 }
479 //hexdump(rxring_, rxring_len * sizeof(rdesc));
480
481 // start rx worker thread
482 auto wrapper_lambda = [](void *arg) -> int {
483 e1000 *e = (e1000 *)arg;
484 return e->rx_worker_routine();
485 };
486 snprintf(str, sizeof(str), "e1000 %d rx worker", unit_);
487 rx_worker_thread_ = thread_create(str, wrapper_lambda, this, HIGH_PRIORITY, DEFAULT_STACK_SIZE);
488 thread_resume(rx_worker_thread_);
489
490 // start receiver
491 // enable RX, unicast permiscuous, multicast permiscuous, broadcast accept, BSIZE 2048
492 write_reg(e1000_reg::RCTL, (1<<1) | (1<<3) | (1<<4) | (1<<15) | (0<<16));
493
494 // unmask receive irq
495 auto ims = read_reg(e1000_reg::IMS);
496 write_reg(e1000_reg::IMS, ims | (1<<7) | (1<<6)); // RXO, RXTO
497
498 // set up the tx path
499 write_reg(e1000_reg::TDH, 0);
500 write_reg(e1000_reg::TDT, 0);
501 tx_last_head_ = 0;
502 tx_tail_ = 0;
503
504 // set up the tx ring
505 write_reg(e1000_reg::TDBAL, txring_phys & 0xffffffff);
506 #if __INTPTR_WIDTH__ == 64
507 write_reg(e1000_reg::TDBAH, txring_phys >> 32);
508 #else
509 write_reg(e1000_reg::TDBAH, 0);
510 #endif
511 write_reg(e1000_reg::TDLEN, txring_len * sizeof(tdesc));
512
513 // enable the transmitter and appropriate irqs
514 write_reg(e1000_reg::TCTL, (1<<3) | (1<<1)); // short packet pad, tx enable
515
516 // unmask tx irq
517 ims = read_reg(e1000_reg::IMS);
518 write_reg(e1000_reg::IMS, ims | (1<<1) | (1<<0)); // transmit queue empty, tx descriptor write back
519
520 return NO_ERROR;
521 }
522
523 extern "C"
e1000_register_with_minip()524 status_t e1000_register_with_minip() {
525 auto tx_routine = [](void *arg, pktbuf_t *p) {
526 auto *e = static_cast<e1000 *>(arg);
527 return e->tx(p);
528 };
529
530 if (the_e) {
531 minip_set_eth(tx_routine, the_e, the_e->mac_addr());
532 return NO_ERROR;
533 }
534
535 return ERR_NOT_FOUND;
536 }
537
e1000_init(uint level)538 static void e1000_init(uint level) {
539 LTRACE_ENTRY;
540
541 auto ac = lk::make_auto_call([]() { LTRACE_EXIT; });
542
543 // probe pci to find a device
544 for (auto id: e1000_ids) {
545 for (size_t i = 0; ; i++) {
546 pci_location_t loc;
547 status_t err = pci_bus_mgr_find_device(&loc, id.id, 0x8086, i);
548 if (err != NO_ERROR) {
549 break;
550 }
551
552 // we maybe found one, create a new device and initialize it
553 auto e = new e1000;
554 err = e->init_device(loc, &id);
555 if (err != NO_ERROR) {
556 char str[14];
557 printf("e1000: device at %s failed to initialize\n", pci_loc_string(loc, str));
558 delete e;
559 continue;
560 }
561
562 // XXX first e1000 found is remembered
563 the_e = e;
564 }
565 }
566 }
567
568 LK_INIT_HOOK(e1000, &e1000_init, LK_INIT_LEVEL_PLATFORM + 1);
569