annotate jenkinsHash_lookup3.c @ 28:18a72865dd78

marked TODO: doubleTableSize corrupts mem
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Wed, 06 Mar 2013 14:35:01 +0100
parents b398837ef4aa
children
rev   line source
seanhalle@25 1 /*
seanhalle@25 2 -------------------------------------------------------------------------------
seanhalle@25 3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
seanhalle@25 4
seanhalle@25 5 These are functions for producing 32-bit hashes for hash table lookup.
seanhalle@25 6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
seanhalle@25 7 are externally useful functions. Routines to test the hash are included
seanhalle@25 8 if SELF_TEST is defined. You can use this free for any purpose. It's in
seanhalle@25 9 the public domain. It has no warranty.
seanhalle@25 10
seanhalle@25 11 You probably want to use hashlittle(). hashlittle() and hashbig()
seanhalle@25 12 hash byte arrays. hashlittle() is is faster than hashbig() on
seanhalle@25 13 little-endian machines. Intel and AMD are little-endian machines.
seanhalle@25 14 On second thought, you probably want hashlittle2(), which is identical to
seanhalle@25 15 hashlittle() except it returns two 32-bit hashes for the price of one.
seanhalle@25 16 You could implement hashbig2() if you wanted but I haven't bothered here.
seanhalle@25 17
seanhalle@25 18 If you want to find a hash of, say, exactly 7 integers, do
seanhalle@25 19 a = i1; b = i2; c = i3;
seanhalle@25 20 mix(a,b,c);
seanhalle@25 21 a += i4; b += i5; c += i6;
seanhalle@25 22 mix(a,b,c);
seanhalle@25 23 a += i7;
seanhalle@25 24 final(a,b,c);
seanhalle@25 25 then use c as the hash value. If you have a variable length array of
seanhalle@25 26 4-byte integers to hash, use hashword(). If you have a byte array (like
seanhalle@25 27 a character string), use hashlittle(). If you have several byte arrays, or
seanhalle@25 28 a mix of things, see the comments above hashlittle().
seanhalle@25 29
seanhalle@25 30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
seanhalle@25 31 then mix those integers. This is fast (you can do a lot more thorough
seanhalle@25 32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
seanhalle@25 33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
seanhalle@25 34 -------------------------------------------------------------------------------
seanhalle@25 35 */
seanhalle@25 36 //#define SELF_TEST 1
seanhalle@25 37
seanhalle@25 38 #include <stdio.h> /* defines printf for tests */
seanhalle@25 39 #include <time.h> /* defines time_t for timings in the test */
seanhalle@25 40 #include <stdint.h> /* defines uint32_t etc */
seanhalle@25 41 #include <sys/param.h> /* attempt to define endianness */
seanhalle@25 42 #ifdef linux
seanhalle@25 43 # include <endian.h> /* attempt to define endianness */
seanhalle@25 44 #endif
seanhalle@25 45
seanhalle@25 46 /*
seanhalle@25 47 * My best guess at if you are big-endian or little-endian. This may
seanhalle@25 48 * need adjustment.
seanhalle@25 49 */
seanhalle@25 50 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
seanhalle@25 51 __BYTE_ORDER == __LITTLE_ENDIAN) || \
seanhalle@25 52 (defined(i386) || defined(__i386__) || defined(__i486__) || \
seanhalle@25 53 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
seanhalle@25 54 # define HASH_LITTLE_ENDIAN 1
seanhalle@25 55 # define HASH_BIG_ENDIAN 0
seanhalle@25 56 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
seanhalle@25 57 __BYTE_ORDER == __BIG_ENDIAN) || \
seanhalle@25 58 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
seanhalle@25 59 # define HASH_LITTLE_ENDIAN 0
seanhalle@25 60 # define HASH_BIG_ENDIAN 1
seanhalle@25 61 #else
seanhalle@25 62 # define HASH_LITTLE_ENDIAN 0
seanhalle@25 63 # define HASH_BIG_ENDIAN 0
seanhalle@25 64 #endif
seanhalle@25 65
seanhalle@25 66 #define hashsize(n) ((uint32_t)1<<(n))
seanhalle@25 67 #define hashmask(n) (hashsize(n)-1)
seanhalle@25 68 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
seanhalle@25 69
seanhalle@25 70 /*
seanhalle@25 71 -------------------------------------------------------------------------------
seanhalle@25 72 mix -- mix 3 32-bit values reversibly.
seanhalle@25 73
seanhalle@25 74 This is reversible, so any information in (a,b,c) before mix() is
seanhalle@25 75 still in (a,b,c) after mix().
seanhalle@25 76
seanhalle@25 77 If four pairs of (a,b,c) inputs are run through mix(), or through
seanhalle@25 78 mix() in reverse, there are at least 32 bits of the output that
seanhalle@25 79 are sometimes the same for one pair and different for another pair.
seanhalle@25 80 This was tested for:
seanhalle@25 81 * pairs that differed by one bit, by two bits, in any combination
seanhalle@25 82 of top bits of (a,b,c), or in any combination of bottom bits of
seanhalle@25 83 (a,b,c).
seanhalle@25 84 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
seanhalle@25 85 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
seanhalle@25 86 is commonly produced by subtraction) look like a single 1-bit
seanhalle@25 87 difference.
seanhalle@25 88 * the base values were pseudorandom, all zero but one bit set, or
seanhalle@25 89 all zero plus a counter that starts at zero.
seanhalle@25 90
seanhalle@25 91 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
seanhalle@25 92 satisfy this are
seanhalle@25 93 4 6 8 16 19 4
seanhalle@25 94 9 15 3 18 27 15
seanhalle@25 95 14 9 3 7 17 3
seanhalle@25 96 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
seanhalle@25 97 for "differ" defined as + with a one-bit base and a two-bit delta. I
seanhalle@25 98 used http://burtleburtle.net/bob/hash/avalanche.html to choose
seanhalle@25 99 the operations, constants, and arrangements of the variables.
seanhalle@25 100
seanhalle@25 101 This does not achieve avalanche. There are input bits of (a,b,c)
seanhalle@25 102 that fail to affect some output bits of (a,b,c), especially of a. The
seanhalle@25 103 most thoroughly mixed value is c, but it doesn't really even achieve
seanhalle@25 104 avalanche in c.
seanhalle@25 105
seanhalle@25 106 This allows some parallelism. Read-after-writes are good at doubling
seanhalle@25 107 the number of bits affected, so the goal of mixing pulls in the opposite
seanhalle@25 108 direction as the goal of parallelism. I did what I could. Rotates
seanhalle@25 109 seem to cost as much as shifts on every machine I could lay my hands
seanhalle@25 110 on, and rotates are much kinder to the top and bottom bits, so I used
seanhalle@25 111 rotates.
seanhalle@25 112 -------------------------------------------------------------------------------
seanhalle@25 113 */
seanhalle@25 114 #define mix(a,b,c) \
seanhalle@25 115 { \
seanhalle@25 116 a -= c; a ^= rot(c, 4); c += b; \
seanhalle@25 117 b -= a; b ^= rot(a, 6); a += c; \
seanhalle@25 118 c -= b; c ^= rot(b, 8); b += a; \
seanhalle@25 119 a -= c; a ^= rot(c,16); c += b; \
seanhalle@25 120 b -= a; b ^= rot(a,19); a += c; \
seanhalle@25 121 c -= b; c ^= rot(b, 4); b += a; \
seanhalle@25 122 }
seanhalle@25 123
seanhalle@25 124 /*
seanhalle@25 125 -------------------------------------------------------------------------------
seanhalle@25 126 final -- final mixing of 3 32-bit values (a,b,c) into c
seanhalle@25 127
seanhalle@25 128 Pairs of (a,b,c) values differing in only a few bits will usually
seanhalle@25 129 produce values of c that look totally different. This was tested for
seanhalle@25 130 * pairs that differed by one bit, by two bits, in any combination
seanhalle@25 131 of top bits of (a,b,c), or in any combination of bottom bits of
seanhalle@25 132 (a,b,c).
seanhalle@25 133 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
seanhalle@25 134 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
seanhalle@25 135 is commonly produced by subtraction) look like a single 1-bit
seanhalle@25 136 difference.
seanhalle@25 137 * the base values were pseudorandom, all zero but one bit set, or
seanhalle@25 138 all zero plus a counter that starts at zero.
seanhalle@25 139
seanhalle@25 140 These constants passed:
seanhalle@25 141 14 11 25 16 4 14 24
seanhalle@25 142 12 14 25 16 4 14 24
seanhalle@25 143 and these came close:
seanhalle@25 144 4 8 15 26 3 22 24
seanhalle@25 145 10 8 15 26 3 22 24
seanhalle@25 146 11 8 15 26 3 22 24
seanhalle@25 147 -------------------------------------------------------------------------------
seanhalle@25 148 */
seanhalle@25 149 #define final(a,b,c) \
seanhalle@25 150 { \
seanhalle@25 151 c ^= b; c -= rot(b,14); \
seanhalle@25 152 a ^= c; a -= rot(c,11); \
seanhalle@25 153 b ^= a; b -= rot(a,25); \
seanhalle@25 154 c ^= b; c -= rot(b,16); \
seanhalle@25 155 a ^= c; a -= rot(c,4); \
seanhalle@25 156 b ^= a; b -= rot(a,14); \
seanhalle@25 157 c ^= b; c -= rot(b,24); \
seanhalle@25 158 }
seanhalle@25 159
seanhalle@25 160 /*
seanhalle@25 161 --------------------------------------------------------------------
seanhalle@25 162 This works on all machines. To be useful, it requires
seanhalle@25 163 -- that the key be an array of uint32_t's, and
seanhalle@25 164 -- that the length be the number of uint32_t's in the key
seanhalle@25 165
seanhalle@25 166 The function hashword() is identical to hashlittle() on little-endian
seanhalle@25 167 machines, and identical to hashbig() on big-endian machines,
seanhalle@25 168 except that the length has to be measured in uint32_ts rather than in
seanhalle@25 169 bytes. hashlittle() is more complicated than hashword() only because
seanhalle@25 170 hashlittle() has to dance around fitting the key bytes into registers.
seanhalle@25 171 --------------------------------------------------------------------
seanhalle@25 172 */
nengel@27 173 uint32_t
seanhalle@25 174 jenkHash32(
seanhalle@25 175 const uint32_t *k, /* the key, an array of uint32_t values */
seanhalle@25 176 size_t length) /* the length of the key, in uint32_ts */
seanhalle@25 177 {
seanhalle@25 178 uint32_t a,b,c;
seanhalle@25 179
seanhalle@25 180 /* Set up the internal state */
seanhalle@25 181 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + 0x7291f8a3;//arbitrary
seanhalle@25 182
seanhalle@25 183 /*------------------------------------------------- handle most of the key */
seanhalle@25 184 while (length > 3)
seanhalle@25 185 {
seanhalle@25 186 a += k[0];
seanhalle@25 187 b += k[1];
seanhalle@25 188 c += k[2];
seanhalle@25 189 mix(a,b,c);
seanhalle@25 190 length -= 3;
seanhalle@25 191 k += 3;
seanhalle@25 192 }
seanhalle@25 193
seanhalle@25 194 /*------------------------------------------- handle the last 3 uint32_t's */
seanhalle@25 195 switch(length) /* all the case statements fall through */
seanhalle@25 196 {
seanhalle@25 197 case 3 : c+=k[2];
seanhalle@25 198 case 2 : b+=k[1];
seanhalle@25 199 case 1 : a+=k[0];
seanhalle@25 200 final(a,b,c);
seanhalle@25 201 case 0: /* case 0: nothing left to add */
seanhalle@25 202 break;
seanhalle@25 203 }
seanhalle@25 204 /*------------------------------------------------------ report the result */
seanhalle@25 205 return c;
seanhalle@25 206 }
seanhalle@25 207
seanhalle@25 208
seanhalle@25 209 /*
seanhalle@25 210 --------------------------------------------------------------------
seanhalle@25 211 hashword2() -- same as hashword(), but take two seeds and return two
seanhalle@25 212 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
seanhalle@25 213 both be initialized with seeds. If you pass in (*pb)==0, the output
seanhalle@25 214 (*pc) will be the same as the return value from hashword().
seanhalle@25 215 --------------------------------------------------------------------
seanhalle@25 216 */
seanhalle@25 217 void hashword2 (
seanhalle@25 218 const uint32_t *k, /* the key, an array of uint32_t values */
seanhalle@25 219 size_t length, /* the length of the key, in uint32_ts */
seanhalle@25 220 uint32_t *pc, /* IN: seed OUT: primary hash value */
seanhalle@25 221 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
seanhalle@25 222 {
seanhalle@25 223 uint32_t a,b,c;
seanhalle@25 224
seanhalle@25 225 /* Set up the internal state */
seanhalle@25 226 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
seanhalle@25 227 c += *pb;
seanhalle@25 228
seanhalle@25 229 /*------------------------------------------------- handle most of the key */
seanhalle@25 230 while (length > 3)
seanhalle@25 231 {
seanhalle@25 232 a += k[0];
seanhalle@25 233 b += k[1];
seanhalle@25 234 c += k[2];
seanhalle@25 235 mix(a,b,c);
seanhalle@25 236 length -= 3;
seanhalle@25 237 k += 3;
seanhalle@25 238 }
seanhalle@25 239
seanhalle@25 240 /*------------------------------------------- handle the last 3 uint32_t's */
seanhalle@25 241 switch(length) /* all the case statements fall through */
seanhalle@25 242 {
seanhalle@25 243 case 3 : c+=k[2];
seanhalle@25 244 case 2 : b+=k[1];
seanhalle@25 245 case 1 : a+=k[0];
seanhalle@25 246 final(a,b,c);
seanhalle@25 247 case 0: /* case 0: nothing left to add */
seanhalle@25 248 break;
seanhalle@25 249 }
seanhalle@25 250 /*------------------------------------------------------ report the result */
seanhalle@25 251 *pc=c; *pb=b;
seanhalle@25 252 }
seanhalle@25 253
seanhalle@25 254
seanhalle@25 255 /*
seanhalle@25 256 -------------------------------------------------------------------------------
seanhalle@25 257 hashlittle() -- hash a variable-length key into a 32-bit value
seanhalle@25 258 k : the key (the unaligned variable-length array of bytes)
seanhalle@25 259 length : the length of the key, counting by bytes
seanhalle@25 260 initval : can be any 4-byte value
seanhalle@25 261 Returns a 32-bit value. Every bit of the key affects every bit of
seanhalle@25 262 the return value. Two keys differing by one or two bits will have
seanhalle@25 263 totally different hash values.
seanhalle@25 264
seanhalle@25 265 The best hash table sizes are powers of 2. There is no need to do
seanhalle@25 266 mod a prime (mod is sooo slow!). If you need less than 32 bits,
seanhalle@25 267 use a bitmask. For example, if you need only 10 bits, do
seanhalle@25 268 h = (h & hashmask(10));
seanhalle@25 269 In which case, the hash table should have hashsize(10) elements.
seanhalle@25 270
seanhalle@25 271 If you are hashing n strings (uint8_t **)k, do it like this:
seanhalle@25 272 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
seanhalle@25 273
seanhalle@25 274 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
seanhalle@25 275 code any way you wish, private, educational, or commercial. It's free.
seanhalle@25 276
seanhalle@25 277 Use for hash table lookup, or anything where one collision in 2^^32 is
seanhalle@25 278 acceptable. Do NOT use for cryptographic purposes.
seanhalle@25 279 -------------------------------------------------------------------------------
seanhalle@25 280 */
seanhalle@25 281
seanhalle@25 282 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
seanhalle@25 283 {
seanhalle@25 284 uint32_t a,b,c; /* internal state */
seanhalle@25 285 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
seanhalle@25 286
seanhalle@25 287 /* Set up the internal state */
seanhalle@25 288 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
seanhalle@25 289
seanhalle@25 290 u.ptr = key;
seanhalle@25 291 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
seanhalle@25 292 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
seanhalle@25 293 const uint8_t *k8;
seanhalle@25 294
seanhalle@25 295 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
seanhalle@25 296 while (length > 12)
seanhalle@25 297 {
seanhalle@25 298 a += k[0];
seanhalle@25 299 b += k[1];
seanhalle@25 300 c += k[2];
seanhalle@25 301 mix(a,b,c);
seanhalle@25 302 length -= 12;
seanhalle@25 303 k += 3;
seanhalle@25 304 }
seanhalle@25 305
seanhalle@25 306 /*----------------------------- handle the last (probably partial) block */
seanhalle@25 307 /*
seanhalle@25 308 * "k[2]&0xffffff" actually reads beyond the end of the string, but
seanhalle@25 309 * then masks off the part it's not allowed to read. Because the
seanhalle@25 310 * string is aligned, the masked-off tail is in the same word as the
seanhalle@25 311 * rest of the string. Every machine with memory protection I've seen
seanhalle@25 312 * does it on word boundaries, so is OK with this. But VALGRIND will
seanhalle@25 313 * still catch it and complain. The masking trick does make the hash
seanhalle@25 314 * noticably faster for short strings (like English words).
seanhalle@25 315 */
seanhalle@25 316 #ifndef VALGRIND
seanhalle@25 317
seanhalle@25 318 switch(length)
seanhalle@25 319 {
seanhalle@25 320 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 321 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
seanhalle@25 322 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
seanhalle@25 323 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
seanhalle@25 324 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 325 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
seanhalle@25 326 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
seanhalle@25 327 case 5 : b+=k[1]&0xff; a+=k[0]; break;
seanhalle@25 328 case 4 : a+=k[0]; break;
seanhalle@25 329 case 3 : a+=k[0]&0xffffff; break;
seanhalle@25 330 case 2 : a+=k[0]&0xffff; break;
seanhalle@25 331 case 1 : a+=k[0]&0xff; break;
seanhalle@25 332 case 0 : return c; /* zero length strings require no mixing */
seanhalle@25 333 }
seanhalle@25 334
seanhalle@25 335 #else /* make valgrind happy */
seanhalle@25 336
seanhalle@25 337 k8 = (const uint8_t *)k;
seanhalle@25 338 switch(length)
seanhalle@25 339 {
seanhalle@25 340 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 341 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
seanhalle@25 342 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
seanhalle@25 343 case 9 : c+=k8[8]; /* fall through */
seanhalle@25 344 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 345 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
seanhalle@25 346 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
seanhalle@25 347 case 5 : b+=k8[4]; /* fall through */
seanhalle@25 348 case 4 : a+=k[0]; break;
seanhalle@25 349 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
seanhalle@25 350 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
seanhalle@25 351 case 1 : a+=k8[0]; break;
seanhalle@25 352 case 0 : return c;
seanhalle@25 353 }
seanhalle@25 354
seanhalle@25 355 #endif /* !valgrind */
seanhalle@25 356
seanhalle@25 357 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
seanhalle@25 358 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
seanhalle@25 359 const uint8_t *k8;
seanhalle@25 360
seanhalle@25 361 /*--------------- all but last block: aligned reads and different mixing */
seanhalle@25 362 while (length > 12)
seanhalle@25 363 {
seanhalle@25 364 a += k[0] + (((uint32_t)k[1])<<16);
seanhalle@25 365 b += k[2] + (((uint32_t)k[3])<<16);
seanhalle@25 366 c += k[4] + (((uint32_t)k[5])<<16);
seanhalle@25 367 mix(a,b,c);
seanhalle@25 368 length -= 12;
seanhalle@25 369 k += 6;
seanhalle@25 370 }
seanhalle@25 371
seanhalle@25 372 /*----------------------------- handle the last (probably partial) block */
seanhalle@25 373 k8 = (const uint8_t *)k;
seanhalle@25 374 switch(length)
seanhalle@25 375 {
seanhalle@25 376 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
seanhalle@25 377 b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 378 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 379 break;
seanhalle@25 380 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
seanhalle@25 381 case 10: c+=k[4];
seanhalle@25 382 b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 383 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 384 break;
seanhalle@25 385 case 9 : c+=k8[8]; /* fall through */
seanhalle@25 386 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 387 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 388 break;
seanhalle@25 389 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
seanhalle@25 390 case 6 : b+=k[2];
seanhalle@25 391 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 392 break;
seanhalle@25 393 case 5 : b+=k8[4]; /* fall through */
seanhalle@25 394 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 395 break;
seanhalle@25 396 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
seanhalle@25 397 case 2 : a+=k[0];
seanhalle@25 398 break;
seanhalle@25 399 case 1 : a+=k8[0];
seanhalle@25 400 break;
seanhalle@25 401 case 0 : return c; /* zero length requires no mixing */
seanhalle@25 402 }
seanhalle@25 403
seanhalle@25 404 } else { /* need to read the key one byte at a time */
seanhalle@25 405 const uint8_t *k = (const uint8_t *)key;
seanhalle@25 406
seanhalle@25 407 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
seanhalle@25 408 while (length > 12)
seanhalle@25 409 {
seanhalle@25 410 a += k[0];
seanhalle@25 411 a += ((uint32_t)k[1])<<8;
seanhalle@25 412 a += ((uint32_t)k[2])<<16;
seanhalle@25 413 a += ((uint32_t)k[3])<<24;
seanhalle@25 414 b += k[4];
seanhalle@25 415 b += ((uint32_t)k[5])<<8;
seanhalle@25 416 b += ((uint32_t)k[6])<<16;
seanhalle@25 417 b += ((uint32_t)k[7])<<24;
seanhalle@25 418 c += k[8];
seanhalle@25 419 c += ((uint32_t)k[9])<<8;
seanhalle@25 420 c += ((uint32_t)k[10])<<16;
seanhalle@25 421 c += ((uint32_t)k[11])<<24;
seanhalle@25 422 mix(a,b,c);
seanhalle@25 423 length -= 12;
seanhalle@25 424 k += 12;
seanhalle@25 425 }
seanhalle@25 426
seanhalle@25 427 /*-------------------------------- last block: affect all 32 bits of (c) */
seanhalle@25 428 switch(length) /* all the case statements fall through */
seanhalle@25 429 {
seanhalle@25 430 case 12: c+=((uint32_t)k[11])<<24;
seanhalle@25 431 case 11: c+=((uint32_t)k[10])<<16;
seanhalle@25 432 case 10: c+=((uint32_t)k[9])<<8;
seanhalle@25 433 case 9 : c+=k[8];
seanhalle@25 434 case 8 : b+=((uint32_t)k[7])<<24;
seanhalle@25 435 case 7 : b+=((uint32_t)k[6])<<16;
seanhalle@25 436 case 6 : b+=((uint32_t)k[5])<<8;
seanhalle@25 437 case 5 : b+=k[4];
seanhalle@25 438 case 4 : a+=((uint32_t)k[3])<<24;
seanhalle@25 439 case 3 : a+=((uint32_t)k[2])<<16;
seanhalle@25 440 case 2 : a+=((uint32_t)k[1])<<8;
seanhalle@25 441 case 1 : a+=k[0];
seanhalle@25 442 break;
seanhalle@25 443 case 0 : return c;
seanhalle@25 444 }
seanhalle@25 445 }
seanhalle@25 446
seanhalle@25 447 final(a,b,c);
seanhalle@25 448 return c;
seanhalle@25 449 }
seanhalle@25 450
seanhalle@25 451
seanhalle@25 452 /*
seanhalle@25 453 * hashlittle2: return 2 32-bit hash values
seanhalle@25 454 *
seanhalle@25 455 * This is identical to hashlittle(), except it returns two 32-bit hash
seanhalle@25 456 * values instead of just one. This is good enough for hash table
seanhalle@25 457 * lookup with 2^^64 buckets, or if you want a second hash if you're not
seanhalle@25 458 * happy with the first, or if you want a probably-unique 64-bit ID for
seanhalle@25 459 * the key. *pc is better mixed than *pb, so use *pc first. If you want
seanhalle@25 460 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
seanhalle@25 461 */
seanhalle@25 462 void hashlittle2(
seanhalle@25 463 const void *key, /* the key to hash */
seanhalle@25 464 size_t length, /* length of the key */
seanhalle@25 465 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
seanhalle@25 466 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
seanhalle@25 467 {
seanhalle@25 468 uint32_t a,b,c; /* internal state */
seanhalle@25 469 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
seanhalle@25 470
seanhalle@25 471 /* Set up the internal state */
seanhalle@25 472 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
seanhalle@25 473 c += *pb;
seanhalle@25 474
seanhalle@25 475 u.ptr = key;
seanhalle@25 476 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
seanhalle@25 477 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
seanhalle@25 478 const uint8_t *k8;
seanhalle@25 479
seanhalle@25 480 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
seanhalle@25 481 while (length > 12)
seanhalle@25 482 {
seanhalle@25 483 a += k[0];
seanhalle@25 484 b += k[1];
seanhalle@25 485 c += k[2];
seanhalle@25 486 mix(a,b,c);
seanhalle@25 487 length -= 12;
seanhalle@25 488 k += 3;
seanhalle@25 489 }
seanhalle@25 490
seanhalle@25 491 /*----------------------------- handle the last (probably partial) block */
seanhalle@25 492 /*
seanhalle@25 493 * "k[2]&0xffffff" actually reads beyond the end of the string, but
seanhalle@25 494 * then masks off the part it's not allowed to read. Because the
seanhalle@25 495 * string is aligned, the masked-off tail is in the same word as the
seanhalle@25 496 * rest of the string. Every machine with memory protection I've seen
seanhalle@25 497 * does it on word boundaries, so is OK with this. But VALGRIND will
seanhalle@25 498 * still catch it and complain. The masking trick does make the hash
seanhalle@25 499 * noticably faster for short strings (like English words).
seanhalle@25 500 */
seanhalle@25 501 #ifndef VALGRIND
seanhalle@25 502
seanhalle@25 503 switch(length)
seanhalle@25 504 {
seanhalle@25 505 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 506 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
seanhalle@25 507 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
seanhalle@25 508 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
seanhalle@25 509 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 510 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
seanhalle@25 511 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
seanhalle@25 512 case 5 : b+=k[1]&0xff; a+=k[0]; break;
seanhalle@25 513 case 4 : a+=k[0]; break;
seanhalle@25 514 case 3 : a+=k[0]&0xffffff; break;
seanhalle@25 515 case 2 : a+=k[0]&0xffff; break;
seanhalle@25 516 case 1 : a+=k[0]&0xff; break;
seanhalle@25 517 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
seanhalle@25 518 }
seanhalle@25 519
seanhalle@25 520 #else /* make valgrind happy */
seanhalle@25 521
seanhalle@25 522 k8 = (const uint8_t *)k;
seanhalle@25 523 switch(length)
seanhalle@25 524 {
seanhalle@25 525 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 526 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
seanhalle@25 527 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
seanhalle@25 528 case 9 : c+=k8[8]; /* fall through */
seanhalle@25 529 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 530 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
seanhalle@25 531 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
seanhalle@25 532 case 5 : b+=k8[4]; /* fall through */
seanhalle@25 533 case 4 : a+=k[0]; break;
seanhalle@25 534 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
seanhalle@25 535 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
seanhalle@25 536 case 1 : a+=k8[0]; break;
seanhalle@25 537 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
seanhalle@25 538 }
seanhalle@25 539
seanhalle@25 540 #endif /* !valgrind */
seanhalle@25 541
seanhalle@25 542 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
seanhalle@25 543 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
seanhalle@25 544 const uint8_t *k8;
seanhalle@25 545
seanhalle@25 546 /*--------------- all but last block: aligned reads and different mixing */
seanhalle@25 547 while (length > 12)
seanhalle@25 548 {
seanhalle@25 549 a += k[0] + (((uint32_t)k[1])<<16);
seanhalle@25 550 b += k[2] + (((uint32_t)k[3])<<16);
seanhalle@25 551 c += k[4] + (((uint32_t)k[5])<<16);
seanhalle@25 552 mix(a,b,c);
seanhalle@25 553 length -= 12;
seanhalle@25 554 k += 6;
seanhalle@25 555 }
seanhalle@25 556
seanhalle@25 557 /*----------------------------- handle the last (probably partial) block */
seanhalle@25 558 k8 = (const uint8_t *)k;
seanhalle@25 559 switch(length)
seanhalle@25 560 {
seanhalle@25 561 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
seanhalle@25 562 b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 563 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 564 break;
seanhalle@25 565 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
seanhalle@25 566 case 10: c+=k[4];
seanhalle@25 567 b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 568 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 569 break;
seanhalle@25 570 case 9 : c+=k8[8]; /* fall through */
seanhalle@25 571 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
seanhalle@25 572 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 573 break;
seanhalle@25 574 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
seanhalle@25 575 case 6 : b+=k[2];
seanhalle@25 576 a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 577 break;
seanhalle@25 578 case 5 : b+=k8[4]; /* fall through */
seanhalle@25 579 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
seanhalle@25 580 break;
seanhalle@25 581 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
seanhalle@25 582 case 2 : a+=k[0];
seanhalle@25 583 break;
seanhalle@25 584 case 1 : a+=k8[0];
seanhalle@25 585 break;
seanhalle@25 586 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
seanhalle@25 587 }
seanhalle@25 588
seanhalle@25 589 } else { /* need to read the key one byte at a time */
seanhalle@25 590 const uint8_t *k = (const uint8_t *)key;
seanhalle@25 591
seanhalle@25 592 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
seanhalle@25 593 while (length > 12)
seanhalle@25 594 {
seanhalle@25 595 a += k[0];
seanhalle@25 596 a += ((uint32_t)k[1])<<8;
seanhalle@25 597 a += ((uint32_t)k[2])<<16;
seanhalle@25 598 a += ((uint32_t)k[3])<<24;
seanhalle@25 599 b += k[4];
seanhalle@25 600 b += ((uint32_t)k[5])<<8;
seanhalle@25 601 b += ((uint32_t)k[6])<<16;
seanhalle@25 602 b += ((uint32_t)k[7])<<24;
seanhalle@25 603 c += k[8];
seanhalle@25 604 c += ((uint32_t)k[9])<<8;
seanhalle@25 605 c += ((uint32_t)k[10])<<16;
seanhalle@25 606 c += ((uint32_t)k[11])<<24;
seanhalle@25 607 mix(a,b,c);
seanhalle@25 608 length -= 12;
seanhalle@25 609 k += 12;
seanhalle@25 610 }
seanhalle@25 611
seanhalle@25 612 /*-------------------------------- last block: affect all 32 bits of (c) */
seanhalle@25 613 switch(length) /* all the case statements fall through */
seanhalle@25 614 {
seanhalle@25 615 case 12: c+=((uint32_t)k[11])<<24;
seanhalle@25 616 case 11: c+=((uint32_t)k[10])<<16;
seanhalle@25 617 case 10: c+=((uint32_t)k[9])<<8;
seanhalle@25 618 case 9 : c+=k[8];
seanhalle@25 619 case 8 : b+=((uint32_t)k[7])<<24;
seanhalle@25 620 case 7 : b+=((uint32_t)k[6])<<16;
seanhalle@25 621 case 6 : b+=((uint32_t)k[5])<<8;
seanhalle@25 622 case 5 : b+=k[4];
seanhalle@25 623 case 4 : a+=((uint32_t)k[3])<<24;
seanhalle@25 624 case 3 : a+=((uint32_t)k[2])<<16;
seanhalle@25 625 case 2 : a+=((uint32_t)k[1])<<8;
seanhalle@25 626 case 1 : a+=k[0];
seanhalle@25 627 break;
seanhalle@25 628 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
seanhalle@25 629 }
seanhalle@25 630 }
seanhalle@25 631
seanhalle@25 632 final(a,b,c);
seanhalle@25 633 *pc=c; *pb=b;
seanhalle@25 634 }
seanhalle@25 635
seanhalle@25 636
seanhalle@25 637
seanhalle@25 638 /*
seanhalle@25 639 * hashbig():
seanhalle@25 640 * This is the same as hashword() on big-endian machines. It is different
seanhalle@25 641 * from hashlittle() on all machines. hashbig() takes advantage of
seanhalle@25 642 * big-endian byte ordering.
seanhalle@25 643 */
seanhalle@25 644 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
seanhalle@25 645 {
seanhalle@25 646 uint32_t a,b,c;
seanhalle@25 647 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
seanhalle@25 648
seanhalle@25 649 /* Set up the internal state */
seanhalle@25 650 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
seanhalle@25 651
seanhalle@25 652 u.ptr = key;
seanhalle@25 653 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
seanhalle@25 654 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
seanhalle@25 655 const uint8_t *k8;
seanhalle@25 656
seanhalle@25 657 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
seanhalle@25 658 while (length > 12)
seanhalle@25 659 {
seanhalle@25 660 a += k[0];
seanhalle@25 661 b += k[1];
seanhalle@25 662 c += k[2];
seanhalle@25 663 mix(a,b,c);
seanhalle@25 664 length -= 12;
seanhalle@25 665 k += 3;
seanhalle@25 666 }
seanhalle@25 667
seanhalle@25 668 /*----------------------------- handle the last (probably partial) block */
seanhalle@25 669 /*
seanhalle@25 670 * "k[2]<<8" actually reads beyond the end of the string, but
seanhalle@25 671 * then shifts out the part it's not allowed to read. Because the
seanhalle@25 672 * string is aligned, the illegal read is in the same word as the
seanhalle@25 673 * rest of the string. Every machine with memory protection I've seen
seanhalle@25 674 * does it on word boundaries, so is OK with this. But VALGRIND will
seanhalle@25 675 * still catch it and complain. The masking trick does make the hash
seanhalle@25 676 * noticably faster for short strings (like English words).
seanhalle@25 677 */
seanhalle@25 678 #ifndef VALGRIND
seanhalle@25 679
seanhalle@25 680 switch(length)
seanhalle@25 681 {
seanhalle@25 682 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 683 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
seanhalle@25 684 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
seanhalle@25 685 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
seanhalle@25 686 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 687 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
seanhalle@25 688 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
seanhalle@25 689 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
seanhalle@25 690 case 4 : a+=k[0]; break;
seanhalle@25 691 case 3 : a+=k[0]&0xffffff00; break;
seanhalle@25 692 case 2 : a+=k[0]&0xffff0000; break;
seanhalle@25 693 case 1 : a+=k[0]&0xff000000; break;
seanhalle@25 694 case 0 : return c; /* zero length strings require no mixing */
seanhalle@25 695 }
seanhalle@25 696
seanhalle@25 697 #else /* make valgrind happy */
seanhalle@25 698
seanhalle@25 699 k8 = (const uint8_t *)k;
seanhalle@25 700 switch(length) /* all the case statements fall through */
seanhalle@25 701 {
seanhalle@25 702 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
seanhalle@25 703 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
seanhalle@25 704 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
seanhalle@25 705 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
seanhalle@25 706 case 8 : b+=k[1]; a+=k[0]; break;
seanhalle@25 707 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
seanhalle@25 708 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
seanhalle@25 709 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
seanhalle@25 710 case 4 : a+=k[0]; break;
seanhalle@25 711 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
seanhalle@25 712 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
seanhalle@25 713 case 1 : a+=((uint32_t)k8[0])<<24; break;
seanhalle@25 714 case 0 : return c;
seanhalle@25 715 }
seanhalle@25 716
seanhalle@25 717 #endif /* !VALGRIND */
seanhalle@25 718
seanhalle@25 719 } else { /* need to read the key one byte at a time */
seanhalle@25 720 const uint8_t *k = (const uint8_t *)key;
seanhalle@25 721
seanhalle@25 722 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
seanhalle@25 723 while (length > 12)
seanhalle@25 724 {
seanhalle@25 725 a += ((uint32_t)k[0])<<24;
seanhalle@25 726 a += ((uint32_t)k[1])<<16;
seanhalle@25 727 a += ((uint32_t)k[2])<<8;
seanhalle@25 728 a += ((uint32_t)k[3]);
seanhalle@25 729 b += ((uint32_t)k[4])<<24;
seanhalle@25 730 b += ((uint32_t)k[5])<<16;
seanhalle@25 731 b += ((uint32_t)k[6])<<8;
seanhalle@25 732 b += ((uint32_t)k[7]);
seanhalle@25 733 c += ((uint32_t)k[8])<<24;
seanhalle@25 734 c += ((uint32_t)k[9])<<16;
seanhalle@25 735 c += ((uint32_t)k[10])<<8;
seanhalle@25 736 c += ((uint32_t)k[11]);
seanhalle@25 737 mix(a,b,c);
seanhalle@25 738 length -= 12;
seanhalle@25 739 k += 12;
seanhalle@25 740 }
seanhalle@25 741
seanhalle@25 742 /*-------------------------------- last block: affect all 32 bits of (c) */
seanhalle@25 743 switch(length) /* all the case statements fall through */
seanhalle@25 744 {
seanhalle@25 745 case 12: c+=k[11];
seanhalle@25 746 case 11: c+=((uint32_t)k[10])<<8;
seanhalle@25 747 case 10: c+=((uint32_t)k[9])<<16;
seanhalle@25 748 case 9 : c+=((uint32_t)k[8])<<24;
seanhalle@25 749 case 8 : b+=k[7];
seanhalle@25 750 case 7 : b+=((uint32_t)k[6])<<8;
seanhalle@25 751 case 6 : b+=((uint32_t)k[5])<<16;
seanhalle@25 752 case 5 : b+=((uint32_t)k[4])<<24;
seanhalle@25 753 case 4 : a+=k[3];
seanhalle@25 754 case 3 : a+=((uint32_t)k[2])<<8;
seanhalle@25 755 case 2 : a+=((uint32_t)k[1])<<16;
seanhalle@25 756 case 1 : a+=((uint32_t)k[0])<<24;
seanhalle@25 757 break;
seanhalle@25 758 case 0 : return c;
seanhalle@25 759 }
seanhalle@25 760 }
seanhalle@25 761
seanhalle@25 762 final(a,b,c);
seanhalle@25 763 return c;
seanhalle@25 764 }
seanhalle@25 765
seanhalle@25 766
seanhalle@25 767 #ifdef SELF_TEST
seanhalle@25 768
seanhalle@25 769 /* used for timings */
seanhalle@25 770 void driver1()
seanhalle@25 771 {
seanhalle@25 772 uint8_t buf[256];
seanhalle@25 773 uint32_t i;
seanhalle@25 774 uint32_t h=0;
seanhalle@25 775 time_t a,z;
seanhalle@25 776
seanhalle@25 777 time(&a);
seanhalle@25 778 for (i=0; i<256; ++i) buf[i] = 'x';
seanhalle@25 779 for (i=0; i<1; ++i)
seanhalle@25 780 {
seanhalle@25 781 h = hashlittle(&buf[0],1,h);
seanhalle@25 782 }
seanhalle@25 783 time(&z);
seanhalle@25 784
seanhalle@25 785 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
seanhalle@25 786
seanhalle@25 787 }
seanhalle@25 788
seanhalle@25 789 /* check that every input bit changes every output bit half the time */
seanhalle@25 790 #define HASHSTATE 1
seanhalle@25 791 #define HASHLEN 1
seanhalle@25 792 #define MAXPAIR 60
seanhalle@25 793 #define MAXLEN 70
seanhalle@25 794 void driver2()
seanhalle@25 795 {
seanhalle@25 796 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
seanhalle@25 797 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
seanhalle@25 798 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
seanhalle@25 799 uint32_t x[HASHSTATE],y[HASHSTATE];
seanhalle@25 800 uint32_t hlen;
seanhalle@25 801
seanhalle@25 802 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
seanhalle@25 803 for (hlen=0; hlen < MAXLEN; ++hlen)
seanhalle@25 804 {
seanhalle@25 805 z=0;
seanhalle@25 806 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
seanhalle@25 807 {
seanhalle@25 808 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
seanhalle@25 809 {
seanhalle@25 810 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
seanhalle@25 811 {
seanhalle@25 812 for (l=0; l<HASHSTATE; ++l)
seanhalle@25 813 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
seanhalle@25 814
seanhalle@25 815 /*---- check that every output bit is affected by that input bit */
seanhalle@25 816 for (k=0; k<MAXPAIR; k+=2)
seanhalle@25 817 {
seanhalle@25 818 uint32_t finished=1;
seanhalle@25 819 /* keys have one bit different */
seanhalle@25 820 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
seanhalle@25 821 /* have a and b be two keys differing in only one bit */
seanhalle@25 822 a[i] ^= (k<<j);
seanhalle@25 823 a[i] ^= (k>>(8-j));
seanhalle@25 824 c[0] = hashlittle(a, hlen, m);
seanhalle@25 825 b[i] ^= ((k+1)<<j);
seanhalle@25 826 b[i] ^= ((k+1)>>(8-j));
seanhalle@25 827 d[0] = hashlittle(b, hlen, m);
seanhalle@25 828 /* check every bit is 1, 0, set, and not set at least once */
seanhalle@25 829 for (l=0; l<HASHSTATE; ++l)
seanhalle@25 830 {
seanhalle@25 831 e[l] &= (c[l]^d[l]);
seanhalle@25 832 f[l] &= ~(c[l]^d[l]);
seanhalle@25 833 g[l] &= c[l];
seanhalle@25 834 h[l] &= ~c[l];
seanhalle@25 835 x[l] &= d[l];
seanhalle@25 836 y[l] &= ~d[l];
seanhalle@25 837 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
seanhalle@25 838 }
seanhalle@25 839 if (finished) break;
seanhalle@25 840 }
seanhalle@25 841 if (k>z) z=k;
seanhalle@25 842 if (k==MAXPAIR)
seanhalle@25 843 {
seanhalle@25 844 printf("Some bit didn't change: ");
seanhalle@25 845 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
seanhalle@25 846 e[0],f[0],g[0],h[0],x[0],y[0]);
seanhalle@25 847 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
seanhalle@25 848 }
seanhalle@25 849 if (z==MAXPAIR) goto done;
seanhalle@25 850 }
seanhalle@25 851 }
seanhalle@25 852 }
seanhalle@25 853 done:
seanhalle@25 854 if (z < MAXPAIR)
seanhalle@25 855 {
seanhalle@25 856 printf("Mix success %2d bytes %2d initvals ",i,m);
seanhalle@25 857 printf("required %d trials\n", z/2);
seanhalle@25 858 }
seanhalle@25 859 }
seanhalle@25 860 printf("\n");
seanhalle@25 861 }
seanhalle@25 862
seanhalle@25 863 /* Check for reading beyond the end of the buffer and alignment problems */
seanhalle@25 864 void driver3()
seanhalle@25 865 {
seanhalle@25 866 uint8_t buf[MAXLEN+20], *b;
seanhalle@25 867 uint32_t len;
seanhalle@25 868 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
seanhalle@25 869 uint32_t h;
seanhalle@25 870 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
seanhalle@25 871 uint32_t i;
seanhalle@25 872 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
seanhalle@25 873 uint32_t j;
seanhalle@25 874 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
seanhalle@25 875 uint32_t ref,x,y;
seanhalle@25 876 uint8_t *p;
seanhalle@25 877
seanhalle@25 878 printf("Endianness. These lines should all be the same (for values filled in):\n");
seanhalle@25 879 printf("%.8x %.8x %.8x\n",
seanhalle@25 880 jenkHash32((const uint32_t *)q, (sizeof(q)-1)/4, 13),
seanhalle@25 881 jenkHash32((const uint32_t *)q, (sizeof(q)-5)/4, 13),
seanhalle@25 882 jenkHash32((const uint32_t *)q, (sizeof(q)-9)/4, 13));
seanhalle@25 883 p = q;
seanhalle@25 884 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
seanhalle@25 885 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
seanhalle@25 886 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
seanhalle@25 887 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
seanhalle@25 888 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
seanhalle@25 889 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
seanhalle@25 890 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
seanhalle@25 891 p = &qq[1];
seanhalle@25 892 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
seanhalle@25 893 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
seanhalle@25 894 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
seanhalle@25 895 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
seanhalle@25 896 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
seanhalle@25 897 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
seanhalle@25 898 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
seanhalle@25 899 p = &qqq[2];
seanhalle@25 900 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
seanhalle@25 901 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
seanhalle@25 902 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
seanhalle@25 903 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
seanhalle@25 904 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
seanhalle@25 905 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
seanhalle@25 906 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
seanhalle@25 907 p = &qqqq[3];
seanhalle@25 908 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
seanhalle@25 909 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
seanhalle@25 910 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
seanhalle@25 911 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
seanhalle@25 912 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
seanhalle@25 913 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
seanhalle@25 914 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
seanhalle@25 915 printf("\n");
seanhalle@25 916
seanhalle@25 917 /* check that hashlittle2 and hashlittle produce the same results */
seanhalle@25 918 i=47; j=0;
seanhalle@25 919 hashlittle2(q, sizeof(q), &i, &j);
seanhalle@25 920 if (hashlittle(q, sizeof(q), 47) != i)
seanhalle@25 921 printf("hashlittle2 and hashlittle mismatch\n");
seanhalle@25 922
seanhalle@25 923 /* check that hashword2 and hashword produce the same results */
seanhalle@25 924 len = 0xdeadbeef;
seanhalle@25 925 i=47, j=0;
seanhalle@25 926 hashword2(&len, 1, &i, &j);
seanhalle@25 927 if (jenkHash32(&len, 1, 47) != i)
seanhalle@25 928 printf("hashword2 and hashword mismatch %x %x\n",
seanhalle@25 929 i, jenkHash32(&len, 1, 47));
seanhalle@25 930
seanhalle@25 931 /* check hashlittle doesn't read before or after the ends of the string */
seanhalle@25 932 for (h=0, b=buf+1; h<8; ++h, ++b)
seanhalle@25 933 {
seanhalle@25 934 for (i=0; i<MAXLEN; ++i)
seanhalle@25 935 {
seanhalle@25 936 len = i;
seanhalle@25 937 for (j=0; j<i; ++j) *(b+j)=0;
seanhalle@25 938
seanhalle@25 939 /* these should all be equal */
seanhalle@25 940 ref = hashlittle(b, len, (uint32_t)1);
seanhalle@25 941 *(b+i)=(uint8_t)~0;
seanhalle@25 942 *(b-1)=(uint8_t)~0;
seanhalle@25 943 x = hashlittle(b, len, (uint32_t)1);
seanhalle@25 944 y = hashlittle(b, len, (uint32_t)1);
seanhalle@25 945 if ((ref != x) || (ref != y))
seanhalle@25 946 {
seanhalle@25 947 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
seanhalle@25 948 h, i);
seanhalle@25 949 }
seanhalle@25 950 }
seanhalle@25 951 }
seanhalle@25 952 }
seanhalle@25 953
seanhalle@25 954 /* check for problems with nulls */
seanhalle@25 955 void driver4()
seanhalle@25 956 {
seanhalle@25 957 uint8_t buf[1];
seanhalle@25 958 uint32_t h,i,state[HASHSTATE];
seanhalle@25 959
seanhalle@25 960
seanhalle@25 961 buf[0] = ~0;
seanhalle@25 962 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
seanhalle@25 963 printf("These should all be different\n");
seanhalle@25 964 for (i=0, h=0; i<8; ++i)
seanhalle@25 965 {
seanhalle@25 966 h = hashlittle(buf, 0, h);
seanhalle@25 967
seanhalle@25 968 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
seanhalle@25 969
seanhalle@25 970 }
seanhalle@25 971 }
seanhalle@25 972
seanhalle@25 973 void driver5()
seanhalle@25 974 {
seanhalle@25 975
seanhalle@25 976 uint32_t b,c;
seanhalle@25 977 b=0, c=0, hashlittle2("", 0, &c, &b);
seanhalle@25 978 printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
seanhalle@25 979 b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
seanhalle@25 980 printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
seanhalle@25 981 b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
seanhalle@25 982 printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
seanhalle@25 983 b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
seanhalle@25 984 printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
seanhalle@25 985 b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
seanhalle@25 986 printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
seanhalle@25 987 b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
seanhalle@25 988 printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
seanhalle@25 989 c = hashlittle("Four score and seven years ago", 30, 0);
seanhalle@25 990 printf("hash is %.8lx\n", c); /* 17770551 */
seanhalle@25 991 c = hashlittle("Four score and seven years ago", 30, 1);
seanhalle@25 992 printf("hash is %.8lx\n", c); /* cd628161 */
seanhalle@25 993
seanhalle@25 994 }
seanhalle@25 995
seanhalle@25 996
seanhalle@25 997 int test_lookup3()
seanhalle@25 998 {
seanhalle@25 999 driver1(); /* test that the key is hashed: used for timings */
seanhalle@25 1000 driver2(); /* test that whole key is hashed thoroughly */
seanhalle@25 1001 driver3(); /* test that nothing but the key is hashed */
seanhalle@25 1002 driver4(); /* test hashing multiple buffers (all buffers are null) */
seanhalle@25 1003 driver5(); /* test the hash against known vectors */
seanhalle@25 1004 return 1;
seanhalle@25 1005 }
seanhalle@25 1006
seanhalle@25 1007 #endif /* SELF_TEST */