Mercurial > cgi-bin > hgwebdir.cgi > VMS > C_Libraries > Hash_impl
view jenkinsHash_lookup8.c @ 31:1d42a512f482
Renamed VMS to PR, in new branch
| author | Sean Halle <seanhalle@yahoo.com> |
|---|---|
| date | Fri, 08 Mar 2013 05:44:24 -0800 |
| parents | |
| children |
line source
1 /*
2 --------------------------------------------------------------------
3 lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
4 hash(), hash2(), hash3, and mix() are externally useful functions.
5 Routines to test the hash are included if SELF_TEST is defined.
6 You can use this free for any purpose. It has no warranty.
8 2009: This is obsolete. I recently timed lookup3.c as being faster
9 at producing 64-bit results.
10 --------------------------------------------------------------------
11 */
12 //#define SELF_TEST
14 #include <stdio.h>
15 #include <stddef.h>
16 #include <stdlib.h>
17 typedef unsigned long long ub8; /* unsigned 8-byte quantities */
18 typedef unsigned long int ub4; /* unsigned 4-byte quantities */
19 typedef unsigned char ub1;
21 #define hashsize(n) ((ub8)1<<(n))
22 #define hashmask(n) (hashsize(n)-1)
24 /*
25 --------------------------------------------------------------------
26 mix -- mix 3 64-bit values reversibly.
27 mix() takes 48 machine instructions, but only 24 cycles on a superscalar
28 machine (like Intel's new MMX architecture). It requires 4 64-bit
29 registers for 4::2 parallelism.
30 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
31 (a,b,c), and all deltas of bottom bits were tested. All deltas were
32 tested both on random keys and on keys that were nearly all zero.
33 These deltas all cause every bit of c to change between 1/3 and 2/3
34 of the time (well, only 113/400 to 287/400 of the time for some
35 2-bit delta). These deltas all cause at least 80 bits to change
36 among (a,b,c) when the mix is run either forward or backward (yes it
37 is reversible).
38 This implies that a hash using mix64 has no funnels. There may be
39 characteristics with 3-bit deltas or bigger, I didn't test for
40 those.
41 --------------------------------------------------------------------
42 */
43 #define mix64(a,b,c) \
44 { \
45 a -= b; a -= c; a ^= (c>>43); \
46 b -= c; b -= a; b ^= (a<<9); \
47 c -= a; c -= b; c ^= (b>>8); \
48 a -= b; a -= c; a ^= (c>>38); \
49 b -= c; b -= a; b ^= (a<<23); \
50 c -= a; c -= b; c ^= (b>>5); \
51 a -= b; a -= c; a ^= (c>>35); \
52 b -= c; b -= a; b ^= (a<<49); \
53 c -= a; c -= b; c ^= (b>>11); \
54 a -= b; a -= c; a ^= (c>>12); \
55 b -= c; b -= a; b ^= (a<<18); \
56 c -= a; c -= b; c ^= (b>>22); \
57 }
59 /*
60 --------------------------------------------------------------------
61 hash() -- hash a variable-length key into a 64-bit value
62 k : the key (the unaligned variable-length array of bytes)
63 len : the length of the key, counting by bytes
64 level : can be any 8-byte value
65 Returns a 64-bit value. Every bit of the key affects every bit of
66 the return value. No funnels. Every 1-bit and 2-bit delta achieves
67 avalanche. About 41+5len instructions.
69 The best hash table sizes are powers of 2. There is no need to do
70 mod a prime (mod is sooo slow!). If you need less than 64 bits,
71 use a bitmask. For example, if you need only 10 bits, do
72 h = (h & hashmask(10));
73 In which case, the hash table should have hashsize(10) elements.
75 If you are hashing n strings (ub1 **)k, do it like this:
76 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
78 By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may
79 use this code any way you wish, private, educational, or commercial,
80 but I would appreciate if you give me credit.
82 See http://burtleburtle.net/bob/hash/evahash.html
83 Use for hash table lookup, or anything where one collision in 2^^64
84 is acceptable. Do NOT use for cryptographic purposes.
85 --------------------------------------------------------------------
86 */
88 ub8 hash( k, length, level)
89 register ub1 *k; /* the key */
90 register ub8 length; /* the length of the key */
91 register ub8 level; /* the previous hash, or an arbitrary value */
92 {
93 register ub8 a,b,c,len;
95 /* Set up the internal state */
96 len = length;
97 a = b = level; /* the previous hash value */
98 c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
100 /*---------------------------------------- handle most of the key */
101 while (len >= 24)
102 {
103 a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24)
104 +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56));
105 b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24)
106 +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56));
107 c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24)
108 +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56));
109 mix64(a,b,c);
110 k += 24; len -= 24;
111 }
113 /*------------------------------------- handle the last 23 bytes */
114 c += length;
115 switch(len) /* all the case statements fall through */
116 {
117 case 23: c+=((ub8)k[22]<<56);
118 case 22: c+=((ub8)k[21]<<48);
119 case 21: c+=((ub8)k[20]<<40);
120 case 20: c+=((ub8)k[19]<<32);
121 case 19: c+=((ub8)k[18]<<24);
122 case 18: c+=((ub8)k[17]<<16);
123 case 17: c+=((ub8)k[16]<<8);
124 /* the first byte of c is reserved for the length */
125 case 16: b+=((ub8)k[15]<<56);
126 case 15: b+=((ub8)k[14]<<48);
127 case 14: b+=((ub8)k[13]<<40);
128 case 13: b+=((ub8)k[12]<<32);
129 case 12: b+=((ub8)k[11]<<24);
130 case 11: b+=((ub8)k[10]<<16);
131 case 10: b+=((ub8)k[ 9]<<8);
132 case 9: b+=((ub8)k[ 8]);
133 case 8: a+=((ub8)k[ 7]<<56);
134 case 7: a+=((ub8)k[ 6]<<48);
135 case 6: a+=((ub8)k[ 5]<<40);
136 case 5: a+=((ub8)k[ 4]<<32);
137 case 4: a+=((ub8)k[ 3]<<24);
138 case 3: a+=((ub8)k[ 2]<<16);
139 case 2: a+=((ub8)k[ 1]<<8);
140 case 1: a+=((ub8)k[ 0]);
141 /* case 0: nothing left to add */
142 }
143 mix64(a,b,c);
144 /*-------------------------------------------- report the result */
145 return c;
146 }
148 /*
149 --------------------------------------------------------------------
150 This works on all machines, is identical to hash() on little-endian
151 machines, and it is much faster than hash(), but it requires
152 -- that the key be an array of ub8's, and
153 -- that all your machines have the same endianness, and
154 -- that the length be the number of ub8's in the key
155 --------------------------------------------------------------------
156 */
157 ub8 hash2( k, length, level)
158 register ub8 *k; /* the key */
159 register ub8 length; /* the length of the key */
160 register ub8 level; /* the previous hash, or an arbitrary value */
161 {
162 register ub8 a,b,c,len;
164 /* Set up the internal state */
165 len = length;
166 a = b = level; /* the previous hash value */
167 c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
169 /*---------------------------------------- handle most of the key */
170 while (len >= 3)
171 {
172 a += k[0];
173 b += k[1];
174 c += k[2];
175 mix64(a,b,c);
176 k += 3; len -= 3;
177 }
179 /*-------------------------------------- handle the last 2 ub8's */
180 c += (length<<3);
181 switch(len) /* all the case statements fall through */
182 {
183 /* c is reserved for the length */
184 case 2: b+=k[1];
185 case 1: a+=k[0];
186 /* case 0: nothing left to add */
187 }
188 mix64(a,b,c);
189 /*-------------------------------------------- report the result */
190 return c;
191 }
193 /*
194 --------------------------------------------------------------------
195 This is identical to hash() on little-endian machines, and it is much
196 faster than hash(), but a little slower than hash2(), and it requires
197 -- that all your machines be little-endian, for example all Intel x86
198 chips or all VAXen. It gives wrong results on big-endian machines.
199 --------------------------------------------------------------------
200 */
202 ub8 hash3( k, length, level)
203 register ub1 *k; /* the key */
204 register ub8 length; /* the length of the key */
205 register ub8 level; /* the previous hash, or an arbitrary value */
206 {
207 register ub8 a,b,c,len;
209 /* Set up the internal state */
210 len = length;
211 a = b = level; /* the previous hash value */
212 c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
214 /*---------------------------------------- handle most of the key */
215 if (((size_t)k)&7)
216 {
217 while (len >= 24)
218 {
219 a += (k[0] +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24)
220 +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56));
221 b += (k[8] +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24)
222 +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56));
223 c += (k[16] +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24)
224 +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56));
225 mix64(a,b,c);
226 k += 24; len -= 24;
227 }
228 }
229 else
230 {
231 while (len >= 24) /* aligned */
232 {
233 a += *(ub8 *)(k+0);
234 b += *(ub8 *)(k+8);
235 c += *(ub8 *)(k+16);
236 mix64(a,b,c);
237 k += 24; len -= 24;
238 }
239 }
241 /*------------------------------------- handle the last 23 bytes */
242 c += length;
243 switch(len) /* all the case statements fall through */
244 {
245 case 23: c+=((ub8)k[22]<<56);
246 case 22: c+=((ub8)k[21]<<48);
247 case 21: c+=((ub8)k[20]<<40);
248 case 20: c+=((ub8)k[19]<<32);
249 case 19: c+=((ub8)k[18]<<24);
250 case 18: c+=((ub8)k[17]<<16);
251 case 17: c+=((ub8)k[16]<<8);
252 /* the first byte of c is reserved for the length */
253 case 16: b+=((ub8)k[15]<<56);
254 case 15: b+=((ub8)k[14]<<48);
255 case 14: b+=((ub8)k[13]<<40);
256 case 13: b+=((ub8)k[12]<<32);
257 case 12: b+=((ub8)k[11]<<24);
258 case 11: b+=((ub8)k[10]<<16);
259 case 10: b+=((ub8)k[ 9]<<8);
260 case 9: b+=((ub8)k[ 8]);
261 case 8: a+=((ub8)k[ 7]<<56);
262 case 7: a+=((ub8)k[ 6]<<48);
263 case 6: a+=((ub8)k[ 5]<<40);
264 case 5: a+=((ub8)k[ 4]<<32);
265 case 4: a+=((ub8)k[ 3]<<24);
266 case 3: a+=((ub8)k[ 2]<<16);
267 case 2: a+=((ub8)k[ 1]<<8);
268 case 1: a+=((ub8)k[ 0]);
269 /* case 0: nothing left to add */
270 }
271 mix64(a,b,c);
272 /*-------------------------------------------- report the result */
273 return c;
274 }
276 #ifdef SELF_TEST
278 /* used for timings */
279 void driver1()
280 {
281 ub8 buf[256];
282 ub8 i;
283 ub8 h=0;
285 for (i=0; i<256; ++i)
286 {
287 h = hash(buf,i,h);
288 }
289 }
291 /* check that every input bit changes every output bit half the time */
292 #define HASHSTATE 1
293 #define HASHLEN 1
294 #define MAXPAIR 80
295 #define MAXLEN 5
296 void driver2()
297 {
298 ub1 qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
299 ub8 c[HASHSTATE], d[HASHSTATE], i, j=0, k, l, m, z;
300 ub8 e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
301 ub8 x[HASHSTATE],y[HASHSTATE];
302 ub8 hlen;
304 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
305 for (hlen=0; hlen < MAXLEN; ++hlen)
306 {
307 z=0;
308 for (i=0; i<hlen; ++i) /*----------------------- for each byte, */
309 {
310 for (j=0; j<8; ++j) /*------------------------ for each bit, */
311 {
312 for (m=0; m<8; ++m) /*-------- for serveral possible levels, */
313 {
314 for (l=0; l<HASHSTATE; ++l) e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((ub8)0);
316 /*---- check that every input bit affects every output bit */
317 for (k=0; k<MAXPAIR; k+=2)
318 {
319 ub8 finished=1;
320 /* keys have one bit different */
321 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (ub1)0;}
322 /* have a and b be two keys differing in only one bit */
323 a[i] ^= (k<<j);
324 a[i] ^= (k>>(8-j));
325 c[0] = hash(a, hlen, m);
326 b[i] ^= ((k+1)<<j);
327 b[i] ^= ((k+1)>>(8-j));
328 d[0] = hash(b, hlen, m);
329 /* check every bit is 1, 0, set, and not set at least once */
330 for (l=0; l<HASHSTATE; ++l)
331 {
332 e[l] &= (c[l]^d[l]);
333 f[l] &= ~(c[l]^d[l]);
334 g[l] &= c[l];
335 h[l] &= ~c[l];
336 x[l] &= d[l];
337 y[l] &= ~d[l];
338 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
339 }
340 if (finished) break;
341 }
342 if (k>z) z=k;
343 if (k==MAXPAIR)
344 {
346 printf("Some bit didn't change: ");
347 printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx ",
348 e[0],f[0],g[0],h[0],x[0],y[0]);
349 printf("i %ld j %ld m %ld len %ld\n",
350 (ub4)i,(ub4)j,(ub4)m,(ub4)hlen);
352 }
353 if (z==MAXPAIR) goto done;
354 }
355 }
356 }
357 done:
358 if (z < MAXPAIR)
359 {
360 printf("Mix success %2ld bytes %2ld levels ",(ub4)i,(ub4)m);
361 printf("required %ld trials\n",(ub4)(z/2));
362 }
363 }
364 printf("\n");
365 }
367 /* Check for reading beyond the end of the buffer and alignment problems */
368 void driver3()
369 {
370 ub1 buf[MAXLEN+20], *b;
371 ub8 len;
372 ub1 q[] = "This is the time for all good men to come to the aid of their country";
373 ub1 qq[] = "xThis is the time for all good men to come to the aid of their country";
374 ub1 qqq[] = "xxThis is the time for all good men to come to the aid of their country";
375 ub1 qqqq[] = "xxxThis is the time for all good men to come to the aid of their country";
376 ub1 o[] = "xxxxThis is the time for all good men to come to the aid of their country";
377 ub1 oo[] = "xxxxxThis is the time for all good men to come to the aid of their country";
378 ub1 ooo[] = "xxxxxxThis is the time for all good men to come to the aid of their country";
379 ub1 oooo[] = "xxxxxxxThis is the time for all good men to come to the aid of their country";
380 ub8 h,i,j,ref,x,y;
382 printf("Endianness. These should all be the same:\n");
383 h = hash(q+0, (ub8)(sizeof(q)-1), (ub8)0);
384 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
385 h = hash(qq+1, (ub8)(sizeof(q)-1), (ub8)0);
386 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
387 h = hash(qqq+2, (ub8)(sizeof(q)-1), (ub8)0);
388 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
389 h = hash(qqqq+3, (ub8)(sizeof(q)-1), (ub8)0);
390 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
391 h = hash(o+4, (ub8)(sizeof(q)-1), (ub8)0);
392 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
393 h = hash(oo+5, (ub8)(sizeof(q)-1), (ub8)0);
394 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
395 h = hash(ooo+6, (ub8)(sizeof(q)-1), (ub8)0);
396 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
397 h = hash(oooo+7, (ub8)(sizeof(q)-1), (ub8)0);
398 printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
399 printf("\n");
400 for (h=0, b=buf+1; h<8; ++h, ++b)
401 {
402 for (i=0; i<MAXLEN; ++i)
403 {
404 len = i;
405 for (j=0; j<i; ++j) *(b+j)=0;
407 /* these should all be equal */
408 ref = hash(b, len, (ub8)1);
409 *(b+i)=(ub1)~0;
410 *(b-1)=(ub1)~0;
411 x = hash(b, len, (ub8)1);
412 y = hash(b, len, (ub8)1);
413 if ((ref != x) || (ref != y))
414 {
416 printf("alignment error: %.8lx %.8lx %.8lx %ld %ld\n",ref,x,y,h,i);
418 }
419 }
420 }
421 }
423 /* check for problems with nulls */
424 void driver4()
425 {
426 ub1 buf[1];
427 ub8 h,i,state[HASHSTATE];
430 buf[0] = ~0;
431 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
432 printf("These should all be different\n");
433 for (i=0, h=0; i<8; ++i)
434 {
435 h = hash(buf, (ub8)0, h);
436 printf("%2ld 0-byte strings, hash is %.8lx%.8lx\n", (ub4)i,
437 (ub4)h,(ub4)(h>>32));
438 }
439 }
442 int test_lookup8()
443 {
444 driver1(); /* test that the key is hashed: used for timings */
445 driver2(); /* test that whole key is hashed thoroughly */
446 driver3(); /* test that nothing but the key is hashed */
447 driver4(); /* test hashing multiple buffers (all buffers are null) */
448 return 1;
449 }
451 #endif /* SELF_TEST */
