Optimize bzero() by unrolling the aligned quadword loop and moving the length

manipulation away from the length comparison.  Measurements on beast.cdrom.com
show >3X improvement over the original code on large block sizes, putting the
performance on par with the optimized assembly code in libc.
This commit is contained in:
Alexander Langer 1998-11-02 00:14:50 +00:00
parent e0b7bc571a
commit 9184fb847b
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=40814

View File

@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: machdep.c,v 1.18 1998/10/15 22:00:54 dfr Exp $
* $Id: machdep.c,v 1.19 1998/10/30 05:41:07 msmith Exp $
*/
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
@ -1029,10 +1029,22 @@ bzero(void *buf, size_t len)
*p++ = 0;
len--;
}
while (len >= sizeof(u_long) * 8) {
*(u_long*) p = 0;
*((u_long*) p + 1) = 0;
*((u_long*) p + 2) = 0;
*((u_long*) p + 3) = 0;
len -= sizeof(u_long) * 8;
*((u_long*) p + 4) = 0;
*((u_long*) p + 5) = 0;
*((u_long*) p + 6) = 0;
*((u_long*) p + 7) = 0;
p += sizeof(u_long) * 8;
}
while (len >= sizeof(u_long)) {
*(u_long*) p = 0;
p += sizeof(u_long);
len -= sizeof(u_long);
p += sizeof(u_long);
}
while (len) {
*p++ = 0;