1/*
2 * Copyright (c) 2006-2019, RT-Thread Development Team
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Origin Authors: Loongson Technology Corporation Limited,
7 * caogos <1207280597@qq.com>, Jiaxun Yang <jiaxun.yang@flygoat.com>,
8 *
9 * Also thanks to Liu Shiwei <liushiwei@gmail.com> and other Loongson
10 * Community developers.
11 *
12 * Change Logs:
13 * Date           Author       Notes
14 * 2019-12-04     Jiaxun Yang  Initial version
15 */
16
17#include <rtconfig.h>
18
19#ifdef RT_USING_SELF_BOOT
20
21#ifndef __ASSEMBLY__
22#define __ASSEMBLY__
23#endif
24
25#include <mips.h>
26#include "selfboot.h"
27#include "ls1c.h"
28#include "cache.h"
29
30/*
31 *   Register usage:
32 *
33 *	s0	link versus load offset, used to relocate absolute adresses.
34 *	s1	free
35 *	s2	memory size
36 *	s3	free
37 *	s4	free
38 *	s5	dbg
39 *	s6	sdCfg
40 *	s7	rasave
41 *	s8	free
42 */
43
44#define tmpsize		s1
45#define msize		s2
46#define bonito		s4
47#define dbg			s5
48#define sdCfg		s6
49
50
51/* Macros */
52#ifdef RT_SELF_BOOT_DEBUG
53#define	PRINTSTR(str) \
54    .pushsection .selfboot_data; .align 4; 98: .asciz str; .popsection; la a0, 98b; bal stringserial; nop
55#else
56#define PRINTSTR(x)
57#endif
58
59#define	DELAY(count)	\
60    li v0, count;	\
6199:			\
62    bnez	v0, 99b;\
63    addiu	v0, -1
64
65.section ".selfboot", "ax"
66.set	noreorder
67.set	mips32
68.globl	_start
69.extern	start
70_start:
71
72/* NMI/Reset vector starts here*/
73  mtc0  zero, CP0_STATUS /* set cp0 status register to zero */
74  mtc0  zero, CP0_CAUSE /* set cp0 cause register to zero */
75
76  li  t0, ST0_BEV /* set exception vector to in flash location */
77  mtc0 t0, CP0_STATUS
78
79  /* Speed up SPI reading */
80  li  t0, 0xbfe80000 /* load SPI0 controler base address to t0 */
81  li  t1, 0x17 /* load "div 4, fast_read + burst_en + memory_en double I/O" to
82                * to t0 for write, not all the flash chips support this mode */
83  sb  t1, 0x4(t0) /* set sfc_param register */
84  li  t1, 0x05
85  sb  t1, 0x6(t0) /* set sfc_timing register */
86
87  bal	locate			/* branch out of vector and get current address to ra */
88    nop
89
90/* in-flash exception vectors start here */
91/* save the exception types to a0 and print out PANIC message in exc_common */
92#define EXC_TLB_REFILL 0x0
93#define EXC_CACHE_ERR	0x1
94#define EXC_GEN	0x2
95#define EXC_INT	0x3
96
97.org 0x200 /* 0xbfc00200 TLB_REFILL exception */
98  li a0, EXC_TLB_REFILL
99  b exc_common
100  nop
101
102.org 0x300 /* 0xbfc00300 Cache Error exception */
103  li a0, EXC_CACHE_ERR
104  b exc_common
105  nop
106
107.org 0x380 /* 0xbfc00300 General exception */
108  li	a0,EXC_GEN
109  b exc_common
110  nop
111
112.org 0x400 /* 0xbfc00400 Interrupt exception */
113  li a0, EXC_INT
114  b exc_common
115  nop
116
1171: /* impossible to reach here, so make a dead loop */
118  b 1b
119  nop
120
121exc_common: /* try one cause and pass to next */
122  li	s1, EXC_TLB_REFILL
123  bne a0, s1, 1f
124  nop
125  PRINTSTR("\r\nEARLY_PANIC: Exception TLB Refill")
126  b print_cause
127  nop
1281:
129  li	s1, EXC_CACHE_ERR
130  bne a0, s1, 1f
131  nop
132  PRINTSTR("\r\nEARLY_PANIC: CACHE Error: ")
133  mfc0 a0, CP0_CACHEERR
134  bal hexserial
135  nop
136  b print_cause
137  nop
1381:
139  li	s1, EXC_GEN
140  bne a0, s1, 1f
141  nop
142  PRINTSTR("\r\nEARLY_PANIC: General Exception")
143  b print_cause
144  nop
1451:
146  li	s1, EXC_INT
147  bne a0, s1, print_cause /* if all exceptions in a0 not reached,
148                          * print_cause directly*/
149  nop
150  PRINTSTR("\r\nEARLY_PANIC: Interrupt Exception")
151print_cause:
152  PRINTSTR("\r\nCAUSE=")
153  mfc0	a0, CP0_CAUSE
154  bal	hexserial
155  nop
156
157  PRINTSTR("\r\nSTATUS=")
158  mfc0	a0, CP0_STATUS
159  bal	hexserial
160  nop
161
162  PRINTSTR("\r\nERRORPC=")
163  mfc0	a0, CP0_ERROREPC
164  bal	hexserial
165  nop
166
167  PRINTSTR("\r\nEPC=")
168  mfc0	a0, CP0_EPC
169  bal	hexserial
170  nop
171
172  PRINTSTR("\r\nBADADDR=")
173  mfc0	a0, CP0_BADVADDR
174  bal	hexserial
175  nop
176
177  PRINTSTR("\r\nEARLY: LOOP! Noting to do")
1781: /* Make a dead loop here, wait user to reset the MCU */
179  b 1b
180  nop
181
182/* locate here, continue the start progress */
183
184locate:
185  /* fix the absolute address by ra */
186  la  s0, start /* s0 = start */
187  subu  s0, ra, s0 /* s0 = ra - s0 */
188  and s0, 0xffff0000 /* s0 = s0 & 0xffff0000 */
189
190  li	t0, 0xbfe78030 /* load PLL/SDRAM freq config register base to t0 */
191  li	t2, (0x80000008 | (PLL_MULT << 8) | (0x3 << 2) | SDRAM_DIV) /* Set PLL
192                                                                  * MULT and PLL DIV */
193  li	t3, (0x00008003 | (CPU_DIV << 8)) /* set CPU DEV */
194
195  li	t1, 0x2
196    sw	t1, 0x4(t0) /* disable CPU_DIV_VALID firstly for adjustment */
197    sw	t2, 0x0(t0) /* write START_FREQ */
198    sw	t3, 0x4(t0) /* write CLK_DIV_PARAM */
199
200/* start to initialize debug uart port */
201  la	v0, LS1C_UART2_BASE /* load UART2 base to v0, only UART2 can be debug port */
2021:
203    li	v1, FIFO_ENABLE|FIFO_RCV_RST|FIFO_XMT_RST|FIFO_TRIGGER_4 /* clear Rx,Tx FIFO
204                                                                * declear 4 bit int trigger */
205    sb	v1, LS1C_UART_FCR_OFFSET(v0) /* write FCR (FIFO control register) */
206    li	v1, CFCR_DLAB /* reach freq div register */
207    sb	v1, LS1C_UART_LCR_OFFSET(v0) /* write LCR (Line control register)*/
208    /* Set UART2 reuse with GPIO36,37*/
209    li		a0, LS1C_CBUS_FIRST1 /* load CBUS_FIRST1 offset to a0 */
210    lw		a1, 0x10(a0) /* load value from CBUS_SECOND1 to a1 */
211    ori		a1, 0x30 /* a1 |= 0x30, GPIO36,37 as secondary function */
212    sw		a1, 0x10(a0) /* write back modified CBUS_SECOND1 from a1 */
213  /* Caculate PLL and bit rate */
214    li		a0, 0xbfe78030 /* load START_FREQ register address to a0 */
215    lw		a1, 0(a0) /* load value from START_FREQ to a1*/
216    srl		a1, 8 /* a1 >>= 8 */
217    andi	a1, 0xff /* a1 &= 0xff, as a1=PLL_MULT */
218    li		a2, APB_CLK /* a2 = APB_CLK = 24Mhz (External Clock Freq) */
219    srl		a2, 2 /* a2 = a2 >> 2 = APB_CLK/4 */
220    multu	a1, a2 /* hilo = a1 * a2 = PLL_MULT * APB_CLK /4 */
221    mflo	v1 /* v1 = lo. put low 32 bit of a1 * a2 to v1 as PLL freq */
222    /* Determine if we need to devide the clock */
223    lw		a1, 4(a0) /* load value frm CLK_DIV_PARAM to a1 */
224    andi	a2, a1, DIV_CPU_SEL /* a2 = a1 & DIV_CPU_SEL, if CPU_SEL=1, devide the clock,
225                            * if CPU_SEL=0, bypass the clock */
226    bnez	a2, 1f /* if (a2 != 0), branch to next tag 1 */
227    nop
228    li		v1, APB_CLK /* v1 = APB_CLK */
229    b		3f
230    nop
2311: /* Determine if the CPU_DIV is valid*/
232    andi	a2, a1, DIV_CPU_EN /* a2 = a1 & DIV_CPU_EN */
233    bnez	a2, 2f /* if (a2 != 0), branch to next tag 2 */
234    nop
235    srl		v1, 1 /* v1 >>= 1, so v1 = APB_CLK/4 * PLL_MULT/2 */
236    b		3f
237    nop
2382: /* caculate CPU freq */
239    andi	a1, DIV_CPU /* a1 &= DIV_CPU */
240    srl		a1, DIV_CPU_SHIFT /* a1 >>= DIV_CPU_SHIFT */
241    divu	v1, a1 /* lo = v1/a1, hi = v1 % a1 */
242    mflo	v1 /* v1 = lo, CPU Freq */
2433:
244    li		a1, (16 * EARLY_DEBUG_BAUD) /* a1 = 16 * BIT RATE */
245    divu	v1, v1, a1 /* v1 = v1 / a1 */
246    srl     v1, 1  /* v1 >>= 1 */
247    sb	v1, LS1C_UART_LSB_OFFSET(v0) /* write 8bit low into LSB */
248    srl	v1, 8 /* v1 >>= 8 */
249    sb	v1, LS1C_UART_MSB_OFFSET(v0) /* write 8bit low into MSB */
250
251    li	v1, CFCR_8BITS /* 8n1, no check */
252    sb	v1, LS1C_UART_LCR_OFFSET(v0) /* write to LCR (Line Control Register) */
253
254#ifdef EARLY_DEBUG_UART_FLOW_CTRL
255    li	v1, MCR_DTR|MCR_RTS /* valid DTR and RTS */
256    sb	v1, LS1C_UART_MCR_OFFSET(v0) /* write to MCR (MODEM Control Register) */
257#endif
258
259    li	v1, 0x0 /* disable all the interruptions */
260    sb	v1, LS1C_UART_IER_OFFSET(v0) /* write to IER (Interruptions Enable Registers) */
261
262    PRINTSTR("\r\INFO: Loongson 1C300 Starting :) \r\n")
263
264  /* disable all GPIOs for conflict functions */
265  li a0,0xbfd00000
266  sw zero,0x10c0(a0)	/* disable GPIO 0-31 */
267  sw zero,0x10c4(a0)	/* disable GPIO 32-63 */
268  sw zero,0x10c8(a0)	/* disable GPIO 64-95 */
269  sw zero,0x10cc(a0)
270
271  li t0, 0xffffffff
272  sw t0, 0x10d0(a0)
273  sw t0, 0x10d4(a0)
274  sw t0, 0x10d8(a0)
275  sw t0, 0x10dc(a0)
276
277  sw t0, 0x10f0(a0)
278  sw t0, 0x10f4(a0)
279  sw t0, 0x10f8(a0)
280  sw t0, 0x10fc(a0)
281
282  PRINTSTR("\r\INFO: All GPIOs are disabled\r\n")
283
284  /* SDRAM initialize starts here */
285
286  li msize, MEM_SIZE
287
288#ifdef EJTAG_SEL_AS_SDRAM_CS1
289  li		a0, 0xbfd011c0
290  lw		a1, 0x40(a0)
291  ori	a1, 0x01
292  sw		a1, 0x40(a0)
293  PRINTSTR("\r\INFO: EJTAG_SEL PIN as SDRAM_CS1\r\n")
294#endif
295
296/*
297* recommanded by user manual, we should write SD_CONFIG[31:0] first, then
298* write SD_CONFIG[63:32]. Repeat writing for three times, valid the config in
299* the last time.
300*/
301
302/* write first time */
303li  	t1, 0xbfd00410 /* load SD_CONFIG[31:0] address to t1 */
304li		a1, SD_PARA0 /* get the memory config from macro SD_PARA0 */
305sw		a1, 0x0(t1) /* write to SD_CONFIG[31:0] */
306li		a1, SD_PARA1
307sw		a1, 0x4(t1) /* write to SD_CONFIG[63:32] with offset */
308PRINTSTR("\r\INFO: SDRAM Config Pass1\r\n")
309
310/* write second time,the same */
311li		a1, SD_PARA0
312sw		a1, 0x0(t1)
313li		a1, SD_PARA1
314sw		a1, 0x4(t1)
315PRINTSTR("\r\INFO: SDRAM Config Pass2\r\n")
316
317/* write third time, enable controller this time */
318li		a1, SD_PARA0
319sw		a1, 0x0(t1)
320li		a1, SD_PARA1_EN /* enable it */
321sw		a1, 0x4(t1)
322PRINTSTR("\r\INFO: SDRAM initialize compeleted\r\n")
323
324
325/* initialize cache */
326bal     cache_init /* branch to cache_init */
327nop
328
329/* enable cache */
330mfc0   a0, CP0_CONFIG /* load cp0 config to a0 */
331and    a0, a0, ~((1<<12) | 7) /* a0 = a0 & ~((1<<12) | 7) */
332or     a0, a0, 2 /* a0 |= 2 */
333mtc0   a0, CP0_CONFIG /* write back to CP0 config */
334
335/*
336* relocate: copy selfboot code to memory in kseg0, fix PC and jump to kseg0.
337* in order to speed up the copy progress, we will execute copy code in kseg0
338*/
339
340PRINTSTR("\r\INFO: Relocating")
341la		t0, text_copy_start /* load the adress of start tag to t0 */
342move		t2, t0
343addu		t0, s0 /* correct t0 address in rom by s0 */
344la		t1, text_copy_end
345
346selfboot_copy_loop:
347lw		v0, (t0) /* copy from memory address in t0 to register v0  */
348sw		v0, (t2) /* write data in register v0 to memory address t0 */
349addiu		t0, 0x4 /* t0 moves forward 4 bytes */
350addiu		t2, 0x4 /* t2 moves forward 4 bytes */
351ble		t2, t1, selfboot_copy_loop /* if t1 <= t2 loop to continue the copy */
352nop
353
354la		t0, text_copy_start /* load start address to t0 */
355jr		t0 /* jump to 122 in kseg0 to start copy code progress */
356nop
357
358text_copy_start:
359/* Copy code to memory*/
360la		a0, start /* load address of start symbol to a0  */
361addu		a1, a0, s0 /* correct a0 to address in flash */
362la		a2, _edata /* load symbol _edata address to a2 */
363subu	t1, a2, a0 /* t1 = a2 - a0, the space of text area */
364
365move	t0, a0 /* the start address in ram */
366move	t1, a1 /* the start address in rom */
367move	t2, a2 /* the end address in rom (symbol _edata) */
368
369/* copy text section */
3701:
371and	t3, t0, 0x0000ffff /* t3 = t0 & 0x0000ffff, get low 16 bit */
372bnez	t3, 2f /* if t3 != 0, jump to next tag 2 */
373nop
3742:
375lw		t3, 0(t1) /* copy 4 bit from memory address t1 to register t3 */
376nop
377sw		t3, 0(t0) /* copy 4 bit from register t3 to memory address in t0 */
378addu	t0, 4 /* t0 move forward 4 bytes */
379addu	t1, 4 /* t1 move forward 4 bytes */
380bne	t2, t0, 1b /* if t2 != t0, branch to last tag 1 to continue copy */
381nop
382/* copy text section done. */
383
384move	a0, msize  /* a0 = msize, will be passed to main */
385srl	a0, 20 /* a0 >>= 20, convert to unit in MB */
386
387/* execute main */
388la		v0, _rtthread_entry /* load address of function main to v0 */
389jalr	v0 /* call address in v0, congrats! all low_level things done!
390          * switch brain out of assembly */
391nop
392text_copy_end: /* end of self-copy in memory */
393
394loop: /* impossible to reach here, make a dead loop */
395b	loop
396nop
397
398/* functions here */
399
400LEAF(stringserial) /* print out the string in address passed in a0  */
401    nop
402    move	a2, ra  /* save the return address to a2 */
403    addu	a1, a0, s0 /* correct the address in ROM */
404    lbu	a0, 0(a1) /* read the first byte in memory address a1 to a0 */
4051:
406    beqz	a0, 2f /* if a0 == 0, jump to next tag 2, empty char */
407    nop
408    bal	tgt_putchar /* print a char */
409    addiu	a1, 1 /* a1 += 1 move forward to next byte */
410    b	1b /* branch to the last tag 1, continue */
411    lbu	a0, 0(a1) /* load the next bit from address a1 to a0, in delay solt,
412                * will be execuated before branch */
4132:
414    j	a2 /* return */
415    nop
416END(stringserial)
417
418
419LEAF(hexserial) /* print out single hex char passed in register a0 */
420    nop
421    move	a2, ra /* move return address from ra to a2  */
422    move	a1, a0 /* move hex char from register a0 to a1 */
423    li	a3, 7 /* load 7 to a3 */
4241:
425    rol	a0, a1, 4 /* rotate left ward shift for 4 bit in a1 to a0 */
426    move	a1, a0
427    and	a0, 0xf
428    la	v0, hexchar
429
430.pushsection .selfboot_data
431.align	4
432hexchar:
433        .ascii	"0123456789abcdef"
434.popsection
435.align	4
436
437    addu	v0, s0
438    addu	v0, a0
439    bal	tgt_putchar
440    lbu	a0, 0(v0)
441
442    bnez	a3, 1b
443    addu	a3, -1
444
445    j	a2
446    nop
447END(hexserial)
448
449LEAF(tgt_putchar) /* print out a char in a0 */
450    la	v0, LS1C_UART2_BASE /* load UART register address to a0 */
451    lbu	v1, LS1C_UART_LSR_OFFSET(v0) /* load value from LSR to v0 */
4521:
453    and	v1, LSR_TXRDY /* v1 &= LSR_TXRDY determine wether we can send by TFE bit */
454    beqz	v1, 1b /* if (v1 == 0) jump to last 1 tag, waiting until TFE is 1 */
455    lbu	v1, LS1C_UART_LSR_OFFSET(v0) /* load value from LSR to v0 again, in delay solt */
456
457    sb	a0, LS1C_UART_DAT_OFFSET(v0) /* write a0 into DAT, send out */
458    j	ra /*  */
459    nop
460END(tgt_putchar)
461
462LEAF(CPU_SetSR) /* modify SR value, arg 1 = set bits, arg 2 = clear bits. */
463    mfc0    v0, CP0_STATUS
464    not v1, a1
465    and v1, v0
466    or  v1, a0
467    mtc0    v1, CP0_STATUS
468    nop
469        nop
470        nop
471        nop
472        nop
473        nop
474        nop
475        nop
476    j   ra
477    nop
478END(CPU_SetSR)
479
480cache_init:
481    move t1, ra
482####part 2####
483cache_detect_4way:
484    mfc0	t4, CP0_CONFIG,1 /* move CP0 CONFIG to t4 */
485    lui		v0, 0x7 /* v0 = 0x7 << 16 */
486    and		v0, t4, v0 /* v0 = t4 & v0 */
487    srl		t3, v0, 16 /* t3 = v0 >> 16  Icache组相联数 IA */
488
489    li		t5, 0x800 		//32*64
490    srl		v1, t4,22		//v1 = t4 >> 22
491    andi	v1, 7			//Icache每路的组数 64x2^S IS
492    sll		t5, v1			//InstCacheSetSize
493    sll		t5, t3			//t5 InstCacheSize
494
495
496    andi	v0, t4, 0x0380
497    srl		t7, v0, 7		//DA
498
499    li		t6, 0x800       // 32*64
500    srl		v1, t4,13
501    andi	v1, 7			//DS
502    sll		t6, v1          // DataCacheSetSize
503    sll		t6, t7          // t5 DataCacheSize
504
505####part 3####
506    lui		a0, 0x8000			//a0 = 0x8000 << 16
507    addu	a1, $0, t5
508    addu	a2, $0, t6
509cache_init_d2way:
510/* a0=0x80000000, a1=icache_size, a2=dcache_size */
511/* a3, v0 and v1 used as local registers */
512    mtc0	$0, CP0_TAGHI
513    addu	v0, $0, a0 /* v0 = 0 + a0 */
514    addu	v1, a0, a2 /* v1 = a0 + a2 */
5151:	slt		a3, v0, v1 /* a3 = v0 < v1 ? 1 : 0 */
516    beq		a3, $0, 1f /* if (a3 == 0) goto 1f */
517    nop
518    mtc0	$0, CP0_TAGLO
519    cache	Index_Store_Tag_D, 0x0(v0) /* 1 way */
5204:	beq		$0, $0, 1b
521    addiu	v0, v0, 0x20
5221:
523cache_flush_i2way:
524    addu	v0, $0, a0
525    addu	v1, a0, a1
5261:
527    slt		a3, v0, v1
528    beq		a3, $0, 1f
529    nop
530    cache	Index_Invalidate_I, 0x0(v0) /* 1 way */
5314:
532    beq		$0, $0, 1b
533    addiu	v0, v0, 0x20
5341:
535cache_flush_d2way:
536    addu	v0, $0, a0
537    addu	v1, a0, a2
5381:	slt		a3, v0, v1
539    beq		a3, $0, 1f
540    nop
541    cache	Index_Writeback_Inv_D, 0x0(v0) /* 1 way */
5424:	beq		$0, $0, 1b
543    addiu	v0, v0, 0x20
544
5451:
546cache_init_finish:
547    jr	t1
548    nop
549#endif
550