1/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ 2/* Modified by SuperH, Inc. September 2003 */ 3! 4! Fast SH memset 5! 6! by Toshiyasu Morita (tm@netcom.com) 7! 8! SH5 code by J"orn Rennecke (joern.rennecke@superh.com) 9! Copyright 2002 SuperH Ltd. 10! 11 12#include <features.h> 13#include <endian.h> 14 15#if __BYTE_ORDER == __LITTLE_ENDIAN 16#define SHHI shlld 17#define SHLO shlrd 18#else 19#define SHHI shlrd 20#define SHLO shlld 21#endif 22 23 .section .text..SHmedia32,"ax" 24 .globl memset 25 .type memset, @function 26 27 .align 5 28 29memset: 30 pta/l multiquad, tr0 31 andi r2, 7, r22 32 ptabs r18, tr2 33 mshflo.b r3,r3,r3 34 add r4, r22, r23 35 mperm.w r3, r63, r3 /* Fill pattern now in every byte of r3 */ 36 37 movi 8, r9 38 bgtu/u r23, r9, tr0 /* multiquad */ 39 40 beqi/u r4, 0, tr2 /* Return with size 0 - ensures no mem accesses */ 41 ldlo.q r2, 0, r7 42 shlli r4, 2, r4 43 movi -1, r8 44 SHHI r8, r4, r8 45 SHHI r8, r4, r8 46 mcmv r7, r8, r3 47 stlo.q r2, 0, r3 48 blink tr2, r63 49 50multiquad: 51 pta/l lastquad, tr0 52 stlo.q r2, 0, r3 53 shlri r23, 3, r24 54 add r2, r4, r5 55 beqi/u r24, 1, tr0 /* lastquad */ 56 pta/l loop, tr1 57 sub r2, r22, r25 58 andi r5, -8, r20 /* calculate end address and */ 59 addi r20, -7*8, r8 /* loop end address; This might overflow, so we need 60 to use a different test before we start the loop 61 */ 62 bge/u r24, r9, tr1 /* loop */ 63 st.q r25, 8, r3 64 st.q r20, -8, r3 65 shlri r24, 1, r24 66 beqi/u r24, 1, tr0 /* lastquad */ 67 st.q r25, 16, r3 68 st.q r20, -16, r3 69 beqi/u r24, 2, tr0 /* lastquad */ 70 st.q r25, 24, r3 71 st.q r20, -24, r3 72lastquad: 73 sthi.q r5, -1, r3 74 blink tr2,r63 75 76loop: 77!!! alloco r25, 32 /* QQQ comment out for short-term fix to SHUK #3895. 78 QQQ commenting out is locically correct, but sub-optimal 79 QQQ Sean McGoogan - 4th April 2003. */ 80 st.q r25, 8, r3 81 st.q r25, 16, r3 82 st.q r25, 24, r3 83 st.q r25, 32, r3 84 addi r25, 32, r25 85 bgeu/l r8, r25, tr1 /* loop */ 86 87 st.q r20, -40, r3 88 st.q r20, -32, r3 89 st.q r20, -24, r3 90 st.q r20, -16, r3 91 st.q r20, -8, r3 92 sthi.q r5, -1, r3 93 blink tr2,r63 94 95 .size memset,.-memset 96 97libc_hidden_def(memset) 98