Patch from Joakim Tjernlund <joakim.tjernlund@lumentis.se>

Here is another update(against BK curr) for crc32(). A kind soul pointed out
the optimizations below.

lib/crc32defs.h:
 - Make it possible to define new values for CRC_LE_BITS/CRC_BE_BITS without
   modifying the source.

lib/crc32.c:
 - Eliminate the need for ENDIAN_SHIFT. Saves a 24 bit shift in the byte
   loops.

 - Swap the XOR expression in DO_CRC. gcc for x86 can not do that simple
   optimization itself(gcc 3.2.2 and RH gcc 2.96 tested). Will improve
   performance with 20-25% on x86.
 
           Joakim Tjernlund



 dev/null    |    0 
 crc32.c     |   40 ++++++++++++++++------------------------
 crc32defs.h |    8 ++++++--
 3 files changed, 22 insertions(+), 26 deletions(-)

diff -puN lib/crc32.c~crc32-speedup-2 lib/crc32.c
--- 25/lib/crc32.c~crc32-speedup-2	2003-02-18 02:02:17.000000000 -0800
+++ 25-akpm/lib/crc32.c	2003-02-18 02:02:17.000000000 -0800
@@ -90,19 +90,16 @@ u32 attribute((pure)) crc32_le(u32 crc, 
 	const u32      *tab = crc32table_le;
 
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
-#  define ENDIAN_SHIFT 0
+#  define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
 # else
-#  define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
-#  define ENDIAN_SHIFT 24
+#  define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
 # endif
 
 	crc = __cpu_to_le32(crc);
 	/* Align it */
 	if(unlikely(((long)b)&3 && len)){
 		do {
-			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
-			DO_CRC;
+			DO_CRC(*((u8 *)b)++);
 		} while ((--len) && ((long)b)&3 );
 	}
 	if(likely(len >= 4)){
@@ -112,10 +109,10 @@ u32 attribute((pure)) crc32_le(u32 crc, 
 		--b; /* use pre increment below(*++b) for speed */
 		do {
 			crc ^= *++b;
-			DO_CRC;
-			DO_CRC;
-			DO_CRC;
-			DO_CRC;
+			DO_CRC(0);
+			DO_CRC(0);
+			DO_CRC(0);
+			DO_CRC(0);
 		} while (--len);
 		b++; /* point to next byte(s) */
 		len = save_len;
@@ -123,8 +120,7 @@ u32 attribute((pure)) crc32_le(u32 crc, 
 	/* And the last few bytes */
 	if(len){
 		do {
-			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
-			DO_CRC;
+			DO_CRC(*((u8 *)b)++);
 		} while (--len);
 	}
 
@@ -195,19 +191,16 @@ u32 attribute((pure)) crc32_be(u32 crc, 
 	const u32      *tab = crc32table_be;
 
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC crc = (crc>>8) ^ tab[ crc & 255 ]
-#  define ENDIAN_SHIFT 24
+#  define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
 # else
-#  define DO_CRC crc = (crc<<8) ^ tab[ crc >> 24 ]
-#  define ENDIAN_SHIFT 0
+#  define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
 # endif
 
 	crc = __cpu_to_be32(crc);
 	/* Align it */
 	if(unlikely(((long)b)&3 && len)){
 		do {
-			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
-			DO_CRC;
+			DO_CRC(*((u8 *)b)++);
 		} while ((--len) && ((long)b)&3 );
 	}
 	if(likely(len >= 4)){
@@ -217,10 +210,10 @@ u32 attribute((pure)) crc32_be(u32 crc, 
 		--b; /* use pre increment below(*++b) for speed */
 		do {
 			crc ^= *++b;
-			DO_CRC;
-			DO_CRC;
-			DO_CRC;
-			DO_CRC;
+			DO_CRC(0);
+			DO_CRC(0);
+			DO_CRC(0);
+			DO_CRC(0);
 		} while (--len);
 		b++; /* point to next byte(s) */
 		len = save_len;
@@ -228,8 +221,7 @@ u32 attribute((pure)) crc32_be(u32 crc, 
 	/* And the last few bytes */
 	if(len){
 		do {
-			crc ^= *((u8 *)b)++ << ENDIAN_SHIFT;
-			DO_CRC;
+			DO_CRC(*((u8 *)b)++);
 		} while (--len);
 	}
 	return __be32_to_cpu(crc);
diff -puN lib/crc32defs.h~crc32-speedup-2 lib/crc32defs.h
--- 25/lib/crc32defs.h~crc32-speedup-2	2003-02-18 02:02:17.000000000 -0800
+++ 25-akpm/lib/crc32defs.h	2003-02-18 02:02:17.000000000 -0800
@@ -8,8 +8,12 @@
 
 /* How many bits at a time to use.  Requires a table of 4<<CRC_xx_BITS bytes. */
 /* For less performance-sensitive, use 4 */
-#define CRC_LE_BITS 8
-#define CRC_BE_BITS 8
+#ifndef CRC_LE_BITS 
+# define CRC_LE_BITS 8
+#endif
+#ifndef CRC_BE_BITS
+# define CRC_BE_BITS 8
+#endif
 
 /*
  * Little-endian CRC computation.  Used with serial bit streams sent
diff -puN -L lib/new.crc32.c /dev/null /dev/null
diff -puN -L lib/new.crc32defs.h /dev/null /dev/null

_