1/* $Id: memset.S,v 1.1 2000/04/14 16:49:01 mjd Exp $ 2 * 3 * "memset" implementation of SuperH 4 * 5 * Copyright (C) 1999 Niibe Yutaka 6 * 7 * Copyright (c) 2009 STMicroelectronics Ltd 8 * Optimised using 64bit data transfer (via FPU) and the movca.l inst. 9 * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> 10 * 11 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. 12 */ 13 14/* 15 * void *memset(void *s, int c, size_t n); 16 */ 17 18#include <sysdep.h> 19 20#if defined (__LITTLE_ENDIAN__) && defined (__SH_FPU_ANY__) 21#define MEMSET_USES_FPU 22/* Use paired single precision load or store mode for 64-bit tranfering. 23 * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300. 24 * Currenlty it has been only implemented and tested for little endian mode. */ 25.macro FPU_SET_PAIRED_PREC 26 sts fpscr, r3 27 mov #0x10, r1 ! PR=0 SZ=1 28 shll16 r1 29 lds r1, fpscr 30.endm 31.macro RESTORE_FPSCR 32 lds r3, fpscr 33.endm 34#endif 35 36ENTRY(memset) 37 mov #12,r0 38 add r6,r4 39 cmp/gt r6,r0 40 bt/s 40f ! if it's too small, set a byte at once 41 mov r4,r0 42 and #3,r0 43 cmp/eq #0,r0 44 bt/s 2f ! It's aligned 45 sub r0,r6 461: 47 dt r0 48 bf/s 1b 49 mov.b r5,@-r4 502: ! make VVVV 51 extu.b r5,r5 52 swap.b r5,r0 ! V0 53 or r0,r5 ! VV 54 swap.w r5,r0 ! VV00 55 or r0,r5 ! VVVV 56 57 ! Check if enough bytes need to be copied to be worth the big loop 58 mov #0x40, r0 ! (MT) 59 cmp/gt r6,r0 ! (MT) 64 > len => slow loop 60 61 bt/s 22f 62 mov r6,r0 63 64 ! align the dst to the cache block size if necessary 65 mov r4, r3 66 mov #~(0x1f), r1 67 68 and r3, r1 69 cmp/eq r3, r1 70 71 bt/s 11f ! dst is already aligned 72 sub r1, r3 ! r3-r1 -> r3 73 shlr2 r3 ! number of loops 74 7510: mov.l r5,@-r4 76 dt r3 77 bf/s 10b 78 add #-4, r6 79 8011: ! dst is 32byte aligned 81 mov r6,r2 82 mov #-5,r0 83 shld r0,r2 ! number of loops 84 85 add #-32, r4 86 mov r5, r0 87 88#ifdef MEMSET_USES_FPU 89 lds r5, fpul ! (CO) 90 fsts fpul, fr0 ! Dr0 will be 'VVVVVVVV' 91 fsts fpul, fr1 92 93 FPU_SET_PAIRED_PREC 9412: 95 movca.l r0, @r4 96 mov.l r5, @(4, r4) 97 add #32, r4 98 fmov dr0, @-r4 99 fmov dr0, @-r4 100 add #-0x20, r6 101 fmov dr0, @-r4 102 dt r2 103 bf/s 12b 104 add #-40, r4 105 106 RESTORE_FPSCR 107#else 10812: 109 movca.l r0,@r4 110 mov.l r5,@(4, r4) 111 mov.l r5,@(8, r4) 112 mov.l r5,@(12,r4) 113 mov.l r5,@(16,r4) 114 mov.l r5,@(20,r4) 115 add #-0x20, r6 116 mov.l r5,@(24,r4) 117 dt r2 118 mov.l r5,@(28,r4) 119 bf/s 12b 120 add #-32, r4 121 122#endif 123 add #32, r4 124 mov #8, r0 125 cmp/ge r0, r6 126 bf 40f 127 128 mov r6,r0 12922: 130 shlr2 r0 131 shlr r0 ! r0 = r6 >> 3 1323: 133 dt r0 134 mov.l r5,@-r4 ! set 8-byte at once 135 bf/s 3b 136 mov.l r5,@-r4 137 ! 138 mov #7,r0 139 and r0,r6 140 141 ! fill bytes (length may be zero) 14240: tst r6,r6 143 bt 5f 1444: 145 dt r6 146 bf/s 4b 147 mov.b r5,@-r4 1485: 149 rts 150 mov r4,r0 151END(memset) 152libc_hidden_def (memset) 153