Mercurial > cgi-bin > hgwebdir.cgi > VMS > C_Libraries > Hash_impl
diff jenkinsHash_lookup8.c @ 25:b398837ef4aa
added jenkins hash files
| author | Sean Halle <seanhalle@yahoo.com> |
|---|---|
| date | Tue, 26 Jun 2012 03:09:05 -0700 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/jenkinsHash_lookup8.c Tue Jun 26 03:09:05 2012 -0700 1.3 @@ -0,0 +1,451 @@ 1.4 +/* 1.5 +-------------------------------------------------------------------- 1.6 +lookup8.c, by Bob Jenkins, January 4 1997, Public Domain. 1.7 +hash(), hash2(), hash3, and mix() are externally useful functions. 1.8 +Routines to test the hash are included if SELF_TEST is defined. 1.9 +You can use this free for any purpose. It has no warranty. 1.10 + 1.11 +2009: This is obsolete. I recently timed lookup3.c as being faster 1.12 +at producing 64-bit results. 1.13 +-------------------------------------------------------------------- 1.14 +*/ 1.15 +//#define SELF_TEST 1.16 + 1.17 +#include <stdio.h> 1.18 +#include <stddef.h> 1.19 +#include <stdlib.h> 1.20 +typedef unsigned long long ub8; /* unsigned 8-byte quantities */ 1.21 +typedef unsigned long int ub4; /* unsigned 4-byte quantities */ 1.22 +typedef unsigned char ub1; 1.23 + 1.24 +#define hashsize(n) ((ub8)1<<(n)) 1.25 +#define hashmask(n) (hashsize(n)-1) 1.26 + 1.27 +/* 1.28 +-------------------------------------------------------------------- 1.29 +mix -- mix 3 64-bit values reversibly. 1.30 +mix() takes 48 machine instructions, but only 24 cycles on a superscalar 1.31 + machine (like Intel's new MMX architecture). It requires 4 64-bit 1.32 + registers for 4::2 parallelism. 1.33 +All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of 1.34 + (a,b,c), and all deltas of bottom bits were tested. All deltas were 1.35 + tested both on random keys and on keys that were nearly all zero. 1.36 + These deltas all cause every bit of c to change between 1/3 and 2/3 1.37 + of the time (well, only 113/400 to 287/400 of the time for some 1.38 + 2-bit delta). These deltas all cause at least 80 bits to change 1.39 + among (a,b,c) when the mix is run either forward or backward (yes it 1.40 + is reversible). 1.41 +This implies that a hash using mix64 has no funnels. There may be 1.42 + characteristics with 3-bit deltas or bigger, I didn't test for 1.43 + those. 1.44 +-------------------------------------------------------------------- 1.45 +*/ 1.46 +#define mix64(a,b,c) \ 1.47 +{ \ 1.48 + a -= b; a -= c; a ^= (c>>43); \ 1.49 + b -= c; b -= a; b ^= (a<<9); \ 1.50 + c -= a; c -= b; c ^= (b>>8); \ 1.51 + a -= b; a -= c; a ^= (c>>38); \ 1.52 + b -= c; b -= a; b ^= (a<<23); \ 1.53 + c -= a; c -= b; c ^= (b>>5); \ 1.54 + a -= b; a -= c; a ^= (c>>35); \ 1.55 + b -= c; b -= a; b ^= (a<<49); \ 1.56 + c -= a; c -= b; c ^= (b>>11); \ 1.57 + a -= b; a -= c; a ^= (c>>12); \ 1.58 + b -= c; b -= a; b ^= (a<<18); \ 1.59 + c -= a; c -= b; c ^= (b>>22); \ 1.60 +} 1.61 + 1.62 +/* 1.63 +-------------------------------------------------------------------- 1.64 +hash() -- hash a variable-length key into a 64-bit value 1.65 + k : the key (the unaligned variable-length array of bytes) 1.66 + len : the length of the key, counting by bytes 1.67 + level : can be any 8-byte value 1.68 +Returns a 64-bit value. Every bit of the key affects every bit of 1.69 +the return value. No funnels. Every 1-bit and 2-bit delta achieves 1.70 +avalanche. About 41+5len instructions. 1.71 + 1.72 +The best hash table sizes are powers of 2. There is no need to do 1.73 +mod a prime (mod is sooo slow!). If you need less than 64 bits, 1.74 +use a bitmask. For example, if you need only 10 bits, do 1.75 + h = (h & hashmask(10)); 1.76 +In which case, the hash table should have hashsize(10) elements. 1.77 + 1.78 +If you are hashing n strings (ub1 **)k, do it like this: 1.79 + for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 1.80 + 1.81 +By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may 1.82 +use this code any way you wish, private, educational, or commercial, 1.83 +but I would appreciate if you give me credit. 1.84 + 1.85 +See http://burtleburtle.net/bob/hash/evahash.html 1.86 +Use for hash table lookup, or anything where one collision in 2^^64 1.87 +is acceptable. Do NOT use for cryptographic purposes. 1.88 +-------------------------------------------------------------------- 1.89 +*/ 1.90 + 1.91 +ub8 hash( k, length, level) 1.92 +register ub1 *k; /* the key */ 1.93 +register ub8 length; /* the length of the key */ 1.94 +register ub8 level; /* the previous hash, or an arbitrary value */ 1.95 +{ 1.96 + register ub8 a,b,c,len; 1.97 + 1.98 + /* Set up the internal state */ 1.99 + len = length; 1.100 + a = b = level; /* the previous hash value */ 1.101 + c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ 1.102 + 1.103 + /*---------------------------------------- handle most of the key */ 1.104 + while (len >= 24) 1.105 + { 1.106 + a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24) 1.107 + +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56)); 1.108 + b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24) 1.109 + +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56)); 1.110 + c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24) 1.111 + +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56)); 1.112 + mix64(a,b,c); 1.113 + k += 24; len -= 24; 1.114 + } 1.115 + 1.116 + /*------------------------------------- handle the last 23 bytes */ 1.117 + c += length; 1.118 + switch(len) /* all the case statements fall through */ 1.119 + { 1.120 + case 23: c+=((ub8)k[22]<<56); 1.121 + case 22: c+=((ub8)k[21]<<48); 1.122 + case 21: c+=((ub8)k[20]<<40); 1.123 + case 20: c+=((ub8)k[19]<<32); 1.124 + case 19: c+=((ub8)k[18]<<24); 1.125 + case 18: c+=((ub8)k[17]<<16); 1.126 + case 17: c+=((ub8)k[16]<<8); 1.127 + /* the first byte of c is reserved for the length */ 1.128 + case 16: b+=((ub8)k[15]<<56); 1.129 + case 15: b+=((ub8)k[14]<<48); 1.130 + case 14: b+=((ub8)k[13]<<40); 1.131 + case 13: b+=((ub8)k[12]<<32); 1.132 + case 12: b+=((ub8)k[11]<<24); 1.133 + case 11: b+=((ub8)k[10]<<16); 1.134 + case 10: b+=((ub8)k[ 9]<<8); 1.135 + case 9: b+=((ub8)k[ 8]); 1.136 + case 8: a+=((ub8)k[ 7]<<56); 1.137 + case 7: a+=((ub8)k[ 6]<<48); 1.138 + case 6: a+=((ub8)k[ 5]<<40); 1.139 + case 5: a+=((ub8)k[ 4]<<32); 1.140 + case 4: a+=((ub8)k[ 3]<<24); 1.141 + case 3: a+=((ub8)k[ 2]<<16); 1.142 + case 2: a+=((ub8)k[ 1]<<8); 1.143 + case 1: a+=((ub8)k[ 0]); 1.144 + /* case 0: nothing left to add */ 1.145 + } 1.146 + mix64(a,b,c); 1.147 + /*-------------------------------------------- report the result */ 1.148 + return c; 1.149 +} 1.150 + 1.151 +/* 1.152 +-------------------------------------------------------------------- 1.153 + This works on all machines, is identical to hash() on little-endian 1.154 + machines, and it is much faster than hash(), but it requires 1.155 + -- that the key be an array of ub8's, and 1.156 + -- that all your machines have the same endianness, and 1.157 + -- that the length be the number of ub8's in the key 1.158 +-------------------------------------------------------------------- 1.159 +*/ 1.160 +ub8 hash2( k, length, level) 1.161 +register ub8 *k; /* the key */ 1.162 +register ub8 length; /* the length of the key */ 1.163 +register ub8 level; /* the previous hash, or an arbitrary value */ 1.164 +{ 1.165 + register ub8 a,b,c,len; 1.166 + 1.167 + /* Set up the internal state */ 1.168 + len = length; 1.169 + a = b = level; /* the previous hash value */ 1.170 + c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ 1.171 + 1.172 + /*---------------------------------------- handle most of the key */ 1.173 + while (len >= 3) 1.174 + { 1.175 + a += k[0]; 1.176 + b += k[1]; 1.177 + c += k[2]; 1.178 + mix64(a,b,c); 1.179 + k += 3; len -= 3; 1.180 + } 1.181 + 1.182 + /*-------------------------------------- handle the last 2 ub8's */ 1.183 + c += (length<<3); 1.184 + switch(len) /* all the case statements fall through */ 1.185 + { 1.186 + /* c is reserved for the length */ 1.187 + case 2: b+=k[1]; 1.188 + case 1: a+=k[0]; 1.189 + /* case 0: nothing left to add */ 1.190 + } 1.191 + mix64(a,b,c); 1.192 + /*-------------------------------------------- report the result */ 1.193 + return c; 1.194 +} 1.195 + 1.196 +/* 1.197 +-------------------------------------------------------------------- 1.198 + This is identical to hash() on little-endian machines, and it is much 1.199 + faster than hash(), but a little slower than hash2(), and it requires 1.200 + -- that all your machines be little-endian, for example all Intel x86 1.201 + chips or all VAXen. It gives wrong results on big-endian machines. 1.202 +-------------------------------------------------------------------- 1.203 +*/ 1.204 + 1.205 +ub8 hash3( k, length, level) 1.206 +register ub1 *k; /* the key */ 1.207 +register ub8 length; /* the length of the key */ 1.208 +register ub8 level; /* the previous hash, or an arbitrary value */ 1.209 +{ 1.210 + register ub8 a,b,c,len; 1.211 + 1.212 + /* Set up the internal state */ 1.213 + len = length; 1.214 + a = b = level; /* the previous hash value */ 1.215 + c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ 1.216 + 1.217 + /*---------------------------------------- handle most of the key */ 1.218 + if (((size_t)k)&7) 1.219 + { 1.220 + while (len >= 24) 1.221 + { 1.222 + a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24) 1.223 + +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56)); 1.224 + b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24) 1.225 + +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56)); 1.226 + c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24) 1.227 + +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56)); 1.228 + mix64(a,b,c); 1.229 + k += 24; len -= 24; 1.230 + } 1.231 + } 1.232 + else 1.233 + { 1.234 + while (len >= 24) /* aligned */ 1.235 + { 1.236 + a += *(ub8 *)(k+0); 1.237 + b += *(ub8 *)(k+8); 1.238 + c += *(ub8 *)(k+16); 1.239 + mix64(a,b,c); 1.240 + k += 24; len -= 24; 1.241 + } 1.242 + } 1.243 + 1.244 + /*------------------------------------- handle the last 23 bytes */ 1.245 + c += length; 1.246 + switch(len) /* all the case statements fall through */ 1.247 + { 1.248 + case 23: c+=((ub8)k[22]<<56); 1.249 + case 22: c+=((ub8)k[21]<<48); 1.250 + case 21: c+=((ub8)k[20]<<40); 1.251 + case 20: c+=((ub8)k[19]<<32); 1.252 + case 19: c+=((ub8)k[18]<<24); 1.253 + case 18: c+=((ub8)k[17]<<16); 1.254 + case 17: c+=((ub8)k[16]<<8); 1.255 + /* the first byte of c is reserved for the length */ 1.256 + case 16: b+=((ub8)k[15]<<56); 1.257 + case 15: b+=((ub8)k[14]<<48); 1.258 + case 14: b+=((ub8)k[13]<<40); 1.259 + case 13: b+=((ub8)k[12]<<32); 1.260 + case 12: b+=((ub8)k[11]<<24); 1.261 + case 11: b+=((ub8)k[10]<<16); 1.262 + case 10: b+=((ub8)k[ 9]<<8); 1.263 + case 9: b+=((ub8)k[ 8]); 1.264 + case 8: a+=((ub8)k[ 7]<<56); 1.265 + case 7: a+=((ub8)k[ 6]<<48); 1.266 + case 6: a+=((ub8)k[ 5]<<40); 1.267 + case 5: a+=((ub8)k[ 4]<<32); 1.268 + case 4: a+=((ub8)k[ 3]<<24); 1.269 + case 3: a+=((ub8)k[ 2]<<16); 1.270 + case 2: a+=((ub8)k[ 1]<<8); 1.271 + case 1: a+=((ub8)k[ 0]); 1.272 + /* case 0: nothing left to add */ 1.273 + } 1.274 + mix64(a,b,c); 1.275 + /*-------------------------------------------- report the result */ 1.276 + return c; 1.277 +} 1.278 + 1.279 +#ifdef SELF_TEST 1.280 + 1.281 +/* used for timings */ 1.282 +void driver1() 1.283 +{ 1.284 + ub8 buf[256]; 1.285 + ub8 i; 1.286 + ub8 h=0; 1.287 + 1.288 + for (i=0; i<256; ++i) 1.289 + { 1.290 + h = hash(buf,i,h); 1.291 + } 1.292 +} 1.293 + 1.294 +/* check that every input bit changes every output bit half the time */ 1.295 +#define HASHSTATE 1 1.296 +#define HASHLEN 1 1.297 +#define MAXPAIR 80 1.298 +#define MAXLEN 5 1.299 +void driver2() 1.300 +{ 1.301 + ub1 qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; 1.302 + ub8 c[HASHSTATE], d[HASHSTATE], i, j=0, k, l, m, z; 1.303 + ub8 e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; 1.304 + ub8 x[HASHSTATE],y[HASHSTATE]; 1.305 + ub8 hlen; 1.306 + 1.307 + printf("No more than %d trials should ever be needed \n",MAXPAIR/2); 1.308 + for (hlen=0; hlen < MAXLEN; ++hlen) 1.309 + { 1.310 + z=0; 1.311 + for (i=0; i<hlen; ++i) /*----------------------- for each byte, */ 1.312 + { 1.313 + for (j=0; j<8; ++j) /*------------------------ for each bit, */ 1.314 + { 1.315 + for (m=0; m<8; ++m) /*-------- for serveral possible levels, */ 1.316 + { 1.317 + for (l=0; l<HASHSTATE; ++l) e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((ub8)0); 1.318 + 1.319 + /*---- check that every input bit affects every output bit */ 1.320 + for (k=0; k<MAXPAIR; k+=2) 1.321 + { 1.322 + ub8 finished=1; 1.323 + /* keys have one bit different */ 1.324 + for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (ub1)0;} 1.325 + /* have a and b be two keys differing in only one bit */ 1.326 + a[i] ^= (k<<j); 1.327 + a[i] ^= (k>>(8-j)); 1.328 + c[0] = hash(a, hlen, m); 1.329 + b[i] ^= ((k+1)<<j); 1.330 + b[i] ^= ((k+1)>>(8-j)); 1.331 + d[0] = hash(b, hlen, m); 1.332 + /* check every bit is 1, 0, set, and not set at least once */ 1.333 + for (l=0; l<HASHSTATE; ++l) 1.334 + { 1.335 + e[l] &= (c[l]^d[l]); 1.336 + f[l] &= ~(c[l]^d[l]); 1.337 + g[l] &= c[l]; 1.338 + h[l] &= ~c[l]; 1.339 + x[l] &= d[l]; 1.340 + y[l] &= ~d[l]; 1.341 + if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0; 1.342 + } 1.343 + if (finished) break; 1.344 + } 1.345 + if (k>z) z=k; 1.346 + if (k==MAXPAIR) 1.347 + { 1.348 + 1.349 + printf("Some bit didn't change: "); 1.350 + printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx ", 1.351 + e[0],f[0],g[0],h[0],x[0],y[0]); 1.352 + printf("i %ld j %ld m %ld len %ld\n", 1.353 + (ub4)i,(ub4)j,(ub4)m,(ub4)hlen); 1.354 + 1.355 + } 1.356 + if (z==MAXPAIR) goto done; 1.357 + } 1.358 + } 1.359 + } 1.360 + done: 1.361 + if (z < MAXPAIR) 1.362 + { 1.363 + printf("Mix success %2ld bytes %2ld levels ",(ub4)i,(ub4)m); 1.364 + printf("required %ld trials\n",(ub4)(z/2)); 1.365 + } 1.366 + } 1.367 + printf("\n"); 1.368 +} 1.369 + 1.370 +/* Check for reading beyond the end of the buffer and alignment problems */ 1.371 +void driver3() 1.372 +{ 1.373 + ub1 buf[MAXLEN+20], *b; 1.374 + ub8 len; 1.375 + ub1 q[] = "This is the time for all good men to come to the aid of their country"; 1.376 + ub1 qq[] = "xThis is the time for all good men to come to the aid of their country"; 1.377 + ub1 qqq[] = "xxThis is the time for all good men to come to the aid of their country"; 1.378 + ub1 qqqq[] = "xxxThis is the time for all good men to come to the aid of their country"; 1.379 + ub1 o[] = "xxxxThis is the time for all good men to come to the aid of their country"; 1.380 + ub1 oo[] = "xxxxxThis is the time for all good men to come to the aid of their country"; 1.381 + ub1 ooo[] = "xxxxxxThis is the time for all good men to come to the aid of their country"; 1.382 + ub1 oooo[] = "xxxxxxxThis is the time for all good men to come to the aid of their country"; 1.383 + ub8 h,i,j,ref,x,y; 1.384 + 1.385 + printf("Endianness. These should all be the same:\n"); 1.386 + h = hash(q+0, (ub8)(sizeof(q)-1), (ub8)0); 1.387 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.388 + h = hash(qq+1, (ub8)(sizeof(q)-1), (ub8)0); 1.389 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.390 + h = hash(qqq+2, (ub8)(sizeof(q)-1), (ub8)0); 1.391 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.392 + h = hash(qqqq+3, (ub8)(sizeof(q)-1), (ub8)0); 1.393 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.394 + h = hash(o+4, (ub8)(sizeof(q)-1), (ub8)0); 1.395 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.396 + h = hash(oo+5, (ub8)(sizeof(q)-1), (ub8)0); 1.397 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.398 + h = hash(ooo+6, (ub8)(sizeof(q)-1), (ub8)0); 1.399 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.400 + h = hash(oooo+7, (ub8)(sizeof(q)-1), (ub8)0); 1.401 + printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32)); 1.402 + printf("\n"); 1.403 + for (h=0, b=buf+1; h<8; ++h, ++b) 1.404 + { 1.405 + for (i=0; i<MAXLEN; ++i) 1.406 + { 1.407 + len = i; 1.408 + for (j=0; j<i; ++j) *(b+j)=0; 1.409 + 1.410 + /* these should all be equal */ 1.411 + ref = hash(b, len, (ub8)1); 1.412 + *(b+i)=(ub1)~0; 1.413 + *(b-1)=(ub1)~0; 1.414 + x = hash(b, len, (ub8)1); 1.415 + y = hash(b, len, (ub8)1); 1.416 + if ((ref != x) || (ref != y)) 1.417 + { 1.418 + 1.419 + printf("alignment error: %.8lx %.8lx %.8lx %ld %ld\n",ref,x,y,h,i); 1.420 + 1.421 + } 1.422 + } 1.423 + } 1.424 +} 1.425 + 1.426 +/* check for problems with nulls */ 1.427 + void driver4() 1.428 +{ 1.429 + ub1 buf[1]; 1.430 + ub8 h,i,state[HASHSTATE]; 1.431 + 1.432 + 1.433 + buf[0] = ~0; 1.434 + for (i=0; i<HASHSTATE; ++i) state[i] = 1; 1.435 + printf("These should all be different\n"); 1.436 + for (i=0, h=0; i<8; ++i) 1.437 + { 1.438 + h = hash(buf, (ub8)0, h); 1.439 + printf("%2ld 0-byte strings, hash is %.8lx%.8lx\n", (ub4)i, 1.440 + (ub4)h,(ub4)(h>>32)); 1.441 + } 1.442 +} 1.443 + 1.444 + 1.445 +int test_lookup8() 1.446 +{ 1.447 + driver1(); /* test that the key is hashed: used for timings */ 1.448 + driver2(); /* test that whole key is hashed thoroughly */ 1.449 + driver3(); /* test that nothing but the key is hashed */ 1.450 + driver4(); /* test hashing multiple buffers (all buffers are null) */ 1.451 + return 1; 1.452 +} 1.453 + 1.454 +#endif /* SELF_TEST */
