Mercurial > cgi-bin > hgwebdir.cgi > VMS > C_Libraries > Hash_impl

changeset 25:b398837ef4aa MC_shared
added jenkins hash files
author: Sean Halle <seanhalle@yahoo.com>
date: Tue, 26 Jun 2012 03:09:05 -0700
parents: b032601956bb
children: f0f5da84c4c4 18ec64d06e35
files: jenkinsHash_lookup3.c jenkinsHash_lookup8.c
diffstat: 2 files changed, 1458 insertions(+), 0 deletions(-) [+]
[-]

jenkinsHash_lookup3.c 1007

jenkinsHash_lookup8.c 451 jenkinsHash_lookup3.c 1007 jenkinsHash_lookup8.c 451
jenkinsHash_lookup3.c 1007
jenkinsHash_lookup8.c 451
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/jenkinsHash_lookup3.c	Tue Jun 26 03:09:05 2012 -0700
     1.3 @@ -0,0 +1,1007 @@
     1.4 +/*
     1.5 +-------------------------------------------------------------------------------
     1.6 +lookup3.c, by Bob Jenkins, May 2006, Public Domain.
     1.7 +
     1.8 +These are functions for producing 32-bit hashes for hash table lookup.
     1.9 +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 
    1.10 +are externally useful functions.  Routines to test the hash are included 
    1.11 +if SELF_TEST is defined.  You can use this free for any purpose.  It's in
    1.12 +the public domain.  It has no warranty.
    1.13 +
    1.14 +You probably want to use hashlittle().  hashlittle() and hashbig()
    1.15 +hash byte arrays.  hashlittle() is is faster than hashbig() on
    1.16 +little-endian machines.  Intel and AMD are little-endian machines.
    1.17 +On second thought, you probably want hashlittle2(), which is identical to
    1.18 +hashlittle() except it returns two 32-bit hashes for the price of one.  
    1.19 +You could implement hashbig2() if you wanted but I haven't bothered here.
    1.20 +
    1.21 +If you want to find a hash of, say, exactly 7 integers, do
    1.22 +  a = i1;  b = i2;  c = i3;
    1.23 +  mix(a,b,c);
    1.24 +  a += i4; b += i5; c += i6;
    1.25 +  mix(a,b,c);
    1.26 +  a += i7;
    1.27 +  final(a,b,c);
    1.28 +then use c as the hash value.  If you have a variable length array of
    1.29 +4-byte integers to hash, use hashword().  If you have a byte array (like
    1.30 +a character string), use hashlittle().  If you have several byte arrays, or
    1.31 +a mix of things, see the comments above hashlittle().  
    1.32 +
    1.33 +Why is this so big?  I read 12 bytes at a time into 3 4-byte integers, 
    1.34 +then mix those integers.  This is fast (you can do a lot more thorough
    1.35 +mixing with 12*3 instructions on 3 integers than you can with 3 instructions
    1.36 +on 1 byte), but shoehorning those bytes into integers efficiently is messy.
    1.37 +-------------------------------------------------------------------------------
    1.38 +*/
    1.39 +//#define SELF_TEST 1
    1.40 +
    1.41 +#include <stdio.h>      /* defines printf for tests */
    1.42 +#include <time.h>       /* defines time_t for timings in the test */
    1.43 +#include <stdint.h>     /* defines uint32_t etc */
    1.44 +#include <sys/param.h>  /* attempt to define endianness */
    1.45 +#ifdef linux
    1.46 +# include <endian.h>    /* attempt to define endianness */
    1.47 +#endif
    1.48 +
    1.49 +/*
    1.50 + * My best guess at if you are big-endian or little-endian.  This may
    1.51 + * need adjustment.
    1.52 + */
    1.53 +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
    1.54 +     __BYTE_ORDER == __LITTLE_ENDIAN) || \
    1.55 +    (defined(i386) || defined(__i386__) || defined(__i486__) || \
    1.56 +     defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
    1.57 +# define HASH_LITTLE_ENDIAN 1
    1.58 +# define HASH_BIG_ENDIAN 0
    1.59 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
    1.60 +       __BYTE_ORDER == __BIG_ENDIAN) || \
    1.61 +      (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
    1.62 +# define HASH_LITTLE_ENDIAN 0
    1.63 +# define HASH_BIG_ENDIAN 1
    1.64 +#else
    1.65 +# define HASH_LITTLE_ENDIAN 0
    1.66 +# define HASH_BIG_ENDIAN 0
    1.67 +#endif
    1.68 +
    1.69 +#define hashsize(n) ((uint32_t)1<<(n))
    1.70 +#define hashmask(n) (hashsize(n)-1)
    1.71 +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
    1.72 +
    1.73 +/*
    1.74 +-------------------------------------------------------------------------------
    1.75 +mix -- mix 3 32-bit values reversibly.
    1.76 +
    1.77 +This is reversible, so any information in (a,b,c) before mix() is
    1.78 +still in (a,b,c) after mix().
    1.79 +
    1.80 +If four pairs of (a,b,c) inputs are run through mix(), or through
    1.81 +mix() in reverse, there are at least 32 bits of the output that
    1.82 +are sometimes the same for one pair and different for another pair.
    1.83 +This was tested for:
    1.84 +* pairs that differed by one bit, by two bits, in any combination
    1.85 +  of top bits of (a,b,c), or in any combination of bottom bits of
    1.86 +  (a,b,c).
    1.87 +* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
    1.88 +  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
    1.89 +  is commonly produced by subtraction) look like a single 1-bit
    1.90 +  difference.
    1.91 +* the base values were pseudorandom, all zero but one bit set, or 
    1.92 +  all zero plus a counter that starts at zero.
    1.93 +
    1.94 +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
    1.95 +satisfy this are
    1.96 +    4  6  8 16 19  4
    1.97 +    9 15  3 18 27 15
    1.98 +   14  9  3  7 17  3
    1.99 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
   1.100 +for "differ" defined as + with a one-bit base and a two-bit delta.  I
   1.101 +used http://burtleburtle.net/bob/hash/avalanche.html to choose 
   1.102 +the operations, constants, and arrangements of the variables.
   1.103 +
   1.104 +This does not achieve avalanche.  There are input bits of (a,b,c)
   1.105 +that fail to affect some output bits of (a,b,c), especially of a.  The
   1.106 +most thoroughly mixed value is c, but it doesn't really even achieve
   1.107 +avalanche in c.
   1.108 +
   1.109 +This allows some parallelism.  Read-after-writes are good at doubling
   1.110 +the number of bits affected, so the goal of mixing pulls in the opposite
   1.111 +direction as the goal of parallelism.  I did what I could.  Rotates
   1.112 +seem to cost as much as shifts on every machine I could lay my hands
   1.113 +on, and rotates are much kinder to the top and bottom bits, so I used
   1.114 +rotates.
   1.115 +-------------------------------------------------------------------------------
   1.116 +*/
   1.117 +#define mix(a,b,c) \
   1.118 +{ \
   1.119 +  a -= c;  a ^= rot(c, 4);  c += b; \
   1.120 +  b -= a;  b ^= rot(a, 6);  a += c; \
   1.121 +  c -= b;  c ^= rot(b, 8);  b += a; \
   1.122 +  a -= c;  a ^= rot(c,16);  c += b; \
   1.123 +  b -= a;  b ^= rot(a,19);  a += c; \
   1.124 +  c -= b;  c ^= rot(b, 4);  b += a; \
   1.125 +}
   1.126 +
   1.127 +/*
   1.128 +-------------------------------------------------------------------------------
   1.129 +final -- final mixing of 3 32-bit values (a,b,c) into c
   1.130 +
   1.131 +Pairs of (a,b,c) values differing in only a few bits will usually
   1.132 +produce values of c that look totally different.  This was tested for
   1.133 +* pairs that differed by one bit, by two bits, in any combination
   1.134 +  of top bits of (a,b,c), or in any combination of bottom bits of
   1.135 +  (a,b,c).
   1.136 +* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
   1.137 +  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
   1.138 +  is commonly produced by subtraction) look like a single 1-bit
   1.139 +  difference.
   1.140 +* the base values were pseudorandom, all zero but one bit set, or 
   1.141 +  all zero plus a counter that starts at zero.
   1.142 +
   1.143 +These constants passed:
   1.144 + 14 11 25 16 4 14 24
   1.145 + 12 14 25 16 4 14 24
   1.146 +and these came close:
   1.147 +  4  8 15 26 3 22 24
   1.148 + 10  8 15 26 3 22 24
   1.149 + 11  8 15 26 3 22 24
   1.150 +-------------------------------------------------------------------------------
   1.151 +*/
   1.152 +#define final(a,b,c) \
   1.153 +{ \
   1.154 +  c ^= b; c -= rot(b,14); \
   1.155 +  a ^= c; a -= rot(c,11); \
   1.156 +  b ^= a; b -= rot(a,25); \
   1.157 +  c ^= b; c -= rot(b,16); \
   1.158 +  a ^= c; a -= rot(c,4);  \
   1.159 +  b ^= a; b -= rot(a,14); \
   1.160 +  c ^= b; c -= rot(b,24); \
   1.161 +}
   1.162 +
   1.163 +/*
   1.164 +--------------------------------------------------------------------
   1.165 + This works on all machines.  To be useful, it requires
   1.166 + -- that the key be an array of uint32_t's, and
   1.167 + -- that the length be the number of uint32_t's in the key
   1.168 +
   1.169 + The function hashword() is identical to hashlittle() on little-endian
   1.170 + machines, and identical to hashbig() on big-endian machines,
   1.171 + except that the length has to be measured in uint32_ts rather than in
   1.172 + bytes.  hashlittle() is more complicated than hashword() only because
   1.173 + hashlittle() has to dance around fitting the key bytes into registers.
   1.174 +--------------------------------------------------------------------
   1.175 +*/
   1.176 +inline uint32_t 
   1.177 +jenkHash32(
   1.178 +const uint32_t *k,        /* the key, an array of uint32_t values */
   1.179 +size_t          length)   /* the length of the key, in uint32_ts */
   1.180 +{
   1.181 +  uint32_t a,b,c;
   1.182 +
   1.183 +  /* Set up the internal state */
   1.184 +  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + 0x7291f8a3;//arbitrary
   1.185 +
   1.186 +  /*------------------------------------------------- handle most of the key */
   1.187 +  while (length > 3)
   1.188 +  {
   1.189 +    a += k[0];
   1.190 +    b += k[1];
   1.191 +    c += k[2];
   1.192 +    mix(a,b,c);
   1.193 +    length -= 3;
   1.194 +    k += 3;
   1.195 +  }
   1.196 +
   1.197 +  /*------------------------------------------- handle the last 3 uint32_t's */
   1.198 +  switch(length)                     /* all the case statements fall through */
   1.199 +  { 
   1.200 +  case 3 : c+=k[2];
   1.201 +  case 2 : b+=k[1];
   1.202 +  case 1 : a+=k[0];
   1.203 +    final(a,b,c);
   1.204 +  case 0:     /* case 0: nothing left to add */
   1.205 +    break;
   1.206 +  }
   1.207 +  /*------------------------------------------------------ report the result */
   1.208 +  return c;
   1.209 +}
   1.210 +
   1.211 +
   1.212 +/*
   1.213 +--------------------------------------------------------------------
   1.214 +hashword2() -- same as hashword(), but take two seeds and return two
   1.215 +32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
   1.216 +both be initialized with seeds.  If you pass in (*pb)==0, the output 
   1.217 +(*pc) will be the same as the return value from hashword().
   1.218 +--------------------------------------------------------------------
   1.219 +*/
   1.220 +void hashword2 (
   1.221 +const uint32_t *k,                   /* the key, an array of uint32_t values */
   1.222 +size_t          length,               /* the length of the key, in uint32_ts */
   1.223 +uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
   1.224 +uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
   1.225 +{
   1.226 +  uint32_t a,b,c;
   1.227 +
   1.228 +  /* Set up the internal state */
   1.229 +  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
   1.230 +  c += *pb;
   1.231 +
   1.232 +  /*------------------------------------------------- handle most of the key */
   1.233 +  while (length > 3)
   1.234 +  {
   1.235 +    a += k[0];
   1.236 +    b += k[1];
   1.237 +    c += k[2];
   1.238 +    mix(a,b,c);
   1.239 +    length -= 3;
   1.240 +    k += 3;
   1.241 +  }
   1.242 +
   1.243 +  /*------------------------------------------- handle the last 3 uint32_t's */
   1.244 +  switch(length)                     /* all the case statements fall through */
   1.245 +  { 
   1.246 +  case 3 : c+=k[2];
   1.247 +  case 2 : b+=k[1];
   1.248 +  case 1 : a+=k[0];
   1.249 +    final(a,b,c);
   1.250 +  case 0:     /* case 0: nothing left to add */
   1.251 +    break;
   1.252 +  }
   1.253 +  /*------------------------------------------------------ report the result */
   1.254 +  *pc=c; *pb=b;
   1.255 +}
   1.256 +
   1.257 +
   1.258 +/*
   1.259 +-------------------------------------------------------------------------------
   1.260 +hashlittle() -- hash a variable-length key into a 32-bit value
   1.261 +  k       : the key (the unaligned variable-length array of bytes)
   1.262 +  length  : the length of the key, counting by bytes
   1.263 +  initval : can be any 4-byte value
   1.264 +Returns a 32-bit value.  Every bit of the key affects every bit of
   1.265 +the return value.  Two keys differing by one or two bits will have
   1.266 +totally different hash values.
   1.267 +
   1.268 +The best hash table sizes are powers of 2.  There is no need to do
   1.269 +mod a prime (mod is sooo slow!).  If you need less than 32 bits,
   1.270 +use a bitmask.  For example, if you need only 10 bits, do
   1.271 +  h = (h & hashmask(10));
   1.272 +In which case, the hash table should have hashsize(10) elements.
   1.273 +
   1.274 +If you are hashing n strings (uint8_t **)k, do it like this:
   1.275 +  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
   1.276 +
   1.277 +By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
   1.278 +code any way you wish, private, educational, or commercial.  It's free.
   1.279 +
   1.280 +Use for hash table lookup, or anything where one collision in 2^^32 is
   1.281 +acceptable.  Do NOT use for cryptographic purposes.
   1.282 +-------------------------------------------------------------------------------
   1.283 +*/
   1.284 +
   1.285 +uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
   1.286 +{
   1.287 +  uint32_t a,b,c;                                          /* internal state */
   1.288 +  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
   1.289 +
   1.290 +  /* Set up the internal state */
   1.291 +  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
   1.292 +
   1.293 +  u.ptr = key;
   1.294 +  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
   1.295 +    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
   1.296 +    const uint8_t  *k8;
   1.297 +
   1.298 +    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
   1.299 +    while (length > 12)
   1.300 +    {
   1.301 +      a += k[0];
   1.302 +      b += k[1];
   1.303 +      c += k[2];
   1.304 +      mix(a,b,c);
   1.305 +      length -= 12;
   1.306 +      k += 3;
   1.307 +    }
   1.308 +
   1.309 +    /*----------------------------- handle the last (probably partial) block */
   1.310 +    /* 
   1.311 +     * "k[2]&0xffffff" actually reads beyond the end of the string, but
   1.312 +     * then masks off the part it's not allowed to read.  Because the
   1.313 +     * string is aligned, the masked-off tail is in the same word as the
   1.314 +     * rest of the string.  Every machine with memory protection I've seen
   1.315 +     * does it on word boundaries, so is OK with this.  But VALGRIND will
   1.316 +     * still catch it and complain.  The masking trick does make the hash
   1.317 +     * noticably faster for short strings (like English words).
   1.318 +     */
   1.319 +#ifndef VALGRIND
   1.320 +
   1.321 +    switch(length)
   1.322 +    {
   1.323 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.324 +    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
   1.325 +    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
   1.326 +    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
   1.327 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.328 +    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
   1.329 +    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
   1.330 +    case 5 : b+=k[1]&0xff; a+=k[0]; break;
   1.331 +    case 4 : a+=k[0]; break;
   1.332 +    case 3 : a+=k[0]&0xffffff; break;
   1.333 +    case 2 : a+=k[0]&0xffff; break;
   1.334 +    case 1 : a+=k[0]&0xff; break;
   1.335 +    case 0 : return c;              /* zero length strings require no mixing */
   1.336 +    }
   1.337 +
   1.338 +#else /* make valgrind happy */
   1.339 +
   1.340 +    k8 = (const uint8_t *)k;
   1.341 +    switch(length)
   1.342 +    {
   1.343 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.344 +    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
   1.345 +    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
   1.346 +    case 9 : c+=k8[8];                   /* fall through */
   1.347 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.348 +    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
   1.349 +    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
   1.350 +    case 5 : b+=k8[4];                   /* fall through */
   1.351 +    case 4 : a+=k[0]; break;
   1.352 +    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
   1.353 +    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
   1.354 +    case 1 : a+=k8[0]; break;
   1.355 +    case 0 : return c;
   1.356 +    }
   1.357 +
   1.358 +#endif /* !valgrind */
   1.359 +
   1.360 +  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
   1.361 +    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
   1.362 +    const uint8_t  *k8;
   1.363 +
   1.364 +    /*--------------- all but last block: aligned reads and different mixing */
   1.365 +    while (length > 12)
   1.366 +    {
   1.367 +      a += k[0] + (((uint32_t)k[1])<<16);
   1.368 +      b += k[2] + (((uint32_t)k[3])<<16);
   1.369 +      c += k[4] + (((uint32_t)k[5])<<16);
   1.370 +      mix(a,b,c);
   1.371 +      length -= 12;
   1.372 +      k += 6;
   1.373 +    }
   1.374 +
   1.375 +    /*----------------------------- handle the last (probably partial) block */
   1.376 +    k8 = (const uint8_t *)k;
   1.377 +    switch(length)
   1.378 +    {
   1.379 +    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
   1.380 +             b+=k[2]+(((uint32_t)k[3])<<16);
   1.381 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.382 +             break;
   1.383 +    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
   1.384 +    case 10: c+=k[4];
   1.385 +             b+=k[2]+(((uint32_t)k[3])<<16);
   1.386 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.387 +             break;
   1.388 +    case 9 : c+=k8[8];                      /* fall through */
   1.389 +    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
   1.390 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.391 +             break;
   1.392 +    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
   1.393 +    case 6 : b+=k[2];
   1.394 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.395 +             break;
   1.396 +    case 5 : b+=k8[4];                      /* fall through */
   1.397 +    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
   1.398 +             break;
   1.399 +    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
   1.400 +    case 2 : a+=k[0];
   1.401 +             break;
   1.402 +    case 1 : a+=k8[0];
   1.403 +             break;
   1.404 +    case 0 : return c;                     /* zero length requires no mixing */
   1.405 +    }
   1.406 +
   1.407 +  } else {                        /* need to read the key one byte at a time */
   1.408 +    const uint8_t *k = (const uint8_t *)key;
   1.409 +
   1.410 +    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
   1.411 +    while (length > 12)
   1.412 +    {
   1.413 +      a += k[0];
   1.414 +      a += ((uint32_t)k[1])<<8;
   1.415 +      a += ((uint32_t)k[2])<<16;
   1.416 +      a += ((uint32_t)k[3])<<24;
   1.417 +      b += k[4];
   1.418 +      b += ((uint32_t)k[5])<<8;
   1.419 +      b += ((uint32_t)k[6])<<16;
   1.420 +      b += ((uint32_t)k[7])<<24;
   1.421 +      c += k[8];
   1.422 +      c += ((uint32_t)k[9])<<8;
   1.423 +      c += ((uint32_t)k[10])<<16;
   1.424 +      c += ((uint32_t)k[11])<<24;
   1.425 +      mix(a,b,c);
   1.426 +      length -= 12;
   1.427 +      k += 12;
   1.428 +    }
   1.429 +
   1.430 +    /*-------------------------------- last block: affect all 32 bits of (c) */
   1.431 +    switch(length)                   /* all the case statements fall through */
   1.432 +    {
   1.433 +    case 12: c+=((uint32_t)k[11])<<24;
   1.434 +    case 11: c+=((uint32_t)k[10])<<16;
   1.435 +    case 10: c+=((uint32_t)k[9])<<8;
   1.436 +    case 9 : c+=k[8];
   1.437 +    case 8 : b+=((uint32_t)k[7])<<24;
   1.438 +    case 7 : b+=((uint32_t)k[6])<<16;
   1.439 +    case 6 : b+=((uint32_t)k[5])<<8;
   1.440 +    case 5 : b+=k[4];
   1.441 +    case 4 : a+=((uint32_t)k[3])<<24;
   1.442 +    case 3 : a+=((uint32_t)k[2])<<16;
   1.443 +    case 2 : a+=((uint32_t)k[1])<<8;
   1.444 +    case 1 : a+=k[0];
   1.445 +             break;
   1.446 +    case 0 : return c;
   1.447 +    }
   1.448 +  }
   1.449 +
   1.450 +  final(a,b,c);
   1.451 +  return c;
   1.452 +}
   1.453 +
   1.454 +
   1.455 +/*
   1.456 + * hashlittle2: return 2 32-bit hash values
   1.457 + *
   1.458 + * This is identical to hashlittle(), except it returns two 32-bit hash
   1.459 + * values instead of just one.  This is good enough for hash table
   1.460 + * lookup with 2^^64 buckets, or if you want a second hash if you're not
   1.461 + * happy with the first, or if you want a probably-unique 64-bit ID for
   1.462 + * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
   1.463 + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
   1.464 + */
   1.465 +void hashlittle2( 
   1.466 +  const void *key,       /* the key to hash */
   1.467 +  size_t      length,    /* length of the key */
   1.468 +  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
   1.469 +  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
   1.470 +{
   1.471 +  uint32_t a,b,c;                                          /* internal state */
   1.472 +  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
   1.473 +
   1.474 +  /* Set up the internal state */
   1.475 +  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
   1.476 +  c += *pb;
   1.477 +
   1.478 +  u.ptr = key;
   1.479 +  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
   1.480 +    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
   1.481 +    const uint8_t  *k8;
   1.482 +
   1.483 +    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
   1.484 +    while (length > 12)
   1.485 +    {
   1.486 +      a += k[0];
   1.487 +      b += k[1];
   1.488 +      c += k[2];
   1.489 +      mix(a,b,c);
   1.490 +      length -= 12;
   1.491 +      k += 3;
   1.492 +    }
   1.493 +
   1.494 +    /*----------------------------- handle the last (probably partial) block */
   1.495 +    /* 
   1.496 +     * "k[2]&0xffffff" actually reads beyond the end of the string, but
   1.497 +     * then masks off the part it's not allowed to read.  Because the
   1.498 +     * string is aligned, the masked-off tail is in the same word as the
   1.499 +     * rest of the string.  Every machine with memory protection I've seen
   1.500 +     * does it on word boundaries, so is OK with this.  But VALGRIND will
   1.501 +     * still catch it and complain.  The masking trick does make the hash
   1.502 +     * noticably faster for short strings (like English words).
   1.503 +     */
   1.504 +#ifndef VALGRIND
   1.505 +
   1.506 +    switch(length)
   1.507 +    {
   1.508 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.509 +    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
   1.510 +    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
   1.511 +    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
   1.512 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.513 +    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
   1.514 +    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
   1.515 +    case 5 : b+=k[1]&0xff; a+=k[0]; break;
   1.516 +    case 4 : a+=k[0]; break;
   1.517 +    case 3 : a+=k[0]&0xffffff; break;
   1.518 +    case 2 : a+=k[0]&0xffff; break;
   1.519 +    case 1 : a+=k[0]&0xff; break;
   1.520 +    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
   1.521 +    }
   1.522 +
   1.523 +#else /* make valgrind happy */
   1.524 +
   1.525 +    k8 = (const uint8_t *)k;
   1.526 +    switch(length)
   1.527 +    {
   1.528 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.529 +    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
   1.530 +    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
   1.531 +    case 9 : c+=k8[8];                   /* fall through */
   1.532 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.533 +    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
   1.534 +    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
   1.535 +    case 5 : b+=k8[4];                   /* fall through */
   1.536 +    case 4 : a+=k[0]; break;
   1.537 +    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
   1.538 +    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
   1.539 +    case 1 : a+=k8[0]; break;
   1.540 +    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
   1.541 +    }
   1.542 +
   1.543 +#endif /* !valgrind */
   1.544 +
   1.545 +  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
   1.546 +    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
   1.547 +    const uint8_t  *k8;
   1.548 +
   1.549 +    /*--------------- all but last block: aligned reads and different mixing */
   1.550 +    while (length > 12)
   1.551 +    {
   1.552 +      a += k[0] + (((uint32_t)k[1])<<16);
   1.553 +      b += k[2] + (((uint32_t)k[3])<<16);
   1.554 +      c += k[4] + (((uint32_t)k[5])<<16);
   1.555 +      mix(a,b,c);
   1.556 +      length -= 12;
   1.557 +      k += 6;
   1.558 +    }
   1.559 +
   1.560 +    /*----------------------------- handle the last (probably partial) block */
   1.561 +    k8 = (const uint8_t *)k;
   1.562 +    switch(length)
   1.563 +    {
   1.564 +    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
   1.565 +             b+=k[2]+(((uint32_t)k[3])<<16);
   1.566 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.567 +             break;
   1.568 +    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
   1.569 +    case 10: c+=k[4];
   1.570 +             b+=k[2]+(((uint32_t)k[3])<<16);
   1.571 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.572 +             break;
   1.573 +    case 9 : c+=k8[8];                      /* fall through */
   1.574 +    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
   1.575 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.576 +             break;
   1.577 +    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
   1.578 +    case 6 : b+=k[2];
   1.579 +             a+=k[0]+(((uint32_t)k[1])<<16);
   1.580 +             break;
   1.581 +    case 5 : b+=k8[4];                      /* fall through */
   1.582 +    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
   1.583 +             break;
   1.584 +    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
   1.585 +    case 2 : a+=k[0];
   1.586 +             break;
   1.587 +    case 1 : a+=k8[0];
   1.588 +             break;
   1.589 +    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
   1.590 +    }
   1.591 +
   1.592 +  } else {                        /* need to read the key one byte at a time */
   1.593 +    const uint8_t *k = (const uint8_t *)key;
   1.594 +
   1.595 +    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
   1.596 +    while (length > 12)
   1.597 +    {
   1.598 +      a += k[0];
   1.599 +      a += ((uint32_t)k[1])<<8;
   1.600 +      a += ((uint32_t)k[2])<<16;
   1.601 +      a += ((uint32_t)k[3])<<24;
   1.602 +      b += k[4];
   1.603 +      b += ((uint32_t)k[5])<<8;
   1.604 +      b += ((uint32_t)k[6])<<16;
   1.605 +      b += ((uint32_t)k[7])<<24;
   1.606 +      c += k[8];
   1.607 +      c += ((uint32_t)k[9])<<8;
   1.608 +      c += ((uint32_t)k[10])<<16;
   1.609 +      c += ((uint32_t)k[11])<<24;
   1.610 +      mix(a,b,c);
   1.611 +      length -= 12;
   1.612 +      k += 12;
   1.613 +    }
   1.614 +
   1.615 +    /*-------------------------------- last block: affect all 32 bits of (c) */
   1.616 +    switch(length)                   /* all the case statements fall through */
   1.617 +    {
   1.618 +    case 12: c+=((uint32_t)k[11])<<24;
   1.619 +    case 11: c+=((uint32_t)k[10])<<16;
   1.620 +    case 10: c+=((uint32_t)k[9])<<8;
   1.621 +    case 9 : c+=k[8];
   1.622 +    case 8 : b+=((uint32_t)k[7])<<24;
   1.623 +    case 7 : b+=((uint32_t)k[6])<<16;
   1.624 +    case 6 : b+=((uint32_t)k[5])<<8;
   1.625 +    case 5 : b+=k[4];
   1.626 +    case 4 : a+=((uint32_t)k[3])<<24;
   1.627 +    case 3 : a+=((uint32_t)k[2])<<16;
   1.628 +    case 2 : a+=((uint32_t)k[1])<<8;
   1.629 +    case 1 : a+=k[0];
   1.630 +             break;
   1.631 +    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
   1.632 +    }
   1.633 +  }
   1.634 +
   1.635 +  final(a,b,c);
   1.636 +  *pc=c; *pb=b;
   1.637 +}
   1.638 +
   1.639 +
   1.640 +
   1.641 +/*
   1.642 + * hashbig():
   1.643 + * This is the same as hashword() on big-endian machines.  It is different
   1.644 + * from hashlittle() on all machines.  hashbig() takes advantage of
   1.645 + * big-endian byte ordering. 
   1.646 + */
   1.647 +uint32_t hashbig( const void *key, size_t length, uint32_t initval)
   1.648 +{
   1.649 +  uint32_t a,b,c;
   1.650 +  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
   1.651 +
   1.652 +  /* Set up the internal state */
   1.653 +  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
   1.654 +
   1.655 +  u.ptr = key;
   1.656 +  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
   1.657 +    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
   1.658 +    const uint8_t  *k8;
   1.659 +
   1.660 +    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
   1.661 +    while (length > 12)
   1.662 +    {
   1.663 +      a += k[0];
   1.664 +      b += k[1];
   1.665 +      c += k[2];
   1.666 +      mix(a,b,c);
   1.667 +      length -= 12;
   1.668 +      k += 3;
   1.669 +    }
   1.670 +
   1.671 +    /*----------------------------- handle the last (probably partial) block */
   1.672 +    /* 
   1.673 +     * "k[2]<<8" actually reads beyond the end of the string, but
   1.674 +     * then shifts out the part it's not allowed to read.  Because the
   1.675 +     * string is aligned, the illegal read is in the same word as the
   1.676 +     * rest of the string.  Every machine with memory protection I've seen
   1.677 +     * does it on word boundaries, so is OK with this.  But VALGRIND will
   1.678 +     * still catch it and complain.  The masking trick does make the hash
   1.679 +     * noticably faster for short strings (like English words).
   1.680 +     */
   1.681 +#ifndef VALGRIND
   1.682 +
   1.683 +    switch(length)
   1.684 +    {
   1.685 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.686 +    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
   1.687 +    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
   1.688 +    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
   1.689 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.690 +    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
   1.691 +    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
   1.692 +    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
   1.693 +    case 4 : a+=k[0]; break;
   1.694 +    case 3 : a+=k[0]&0xffffff00; break;
   1.695 +    case 2 : a+=k[0]&0xffff0000; break;
   1.696 +    case 1 : a+=k[0]&0xff000000; break;
   1.697 +    case 0 : return c;              /* zero length strings require no mixing */
   1.698 +    }
   1.699 +
   1.700 +#else  /* make valgrind happy */
   1.701 +
   1.702 +    k8 = (const uint8_t *)k;
   1.703 +    switch(length)                   /* all the case statements fall through */
   1.704 +    {
   1.705 +    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
   1.706 +    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
   1.707 +    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
   1.708 +    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
   1.709 +    case 8 : b+=k[1]; a+=k[0]; break;
   1.710 +    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
   1.711 +    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
   1.712 +    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
   1.713 +    case 4 : a+=k[0]; break;
   1.714 +    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
   1.715 +    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
   1.716 +    case 1 : a+=((uint32_t)k8[0])<<24; break;
   1.717 +    case 0 : return c;
   1.718 +    }
   1.719 +
   1.720 +#endif /* !VALGRIND */
   1.721 +
   1.722 +  } else {                        /* need to read the key one byte at a time */
   1.723 +    const uint8_t *k = (const uint8_t *)key;
   1.724 +
   1.725 +    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
   1.726 +    while (length > 12)
   1.727 +    {
   1.728 +      a += ((uint32_t)k[0])<<24;
   1.729 +      a += ((uint32_t)k[1])<<16;
   1.730 +      a += ((uint32_t)k[2])<<8;
   1.731 +      a += ((uint32_t)k[3]);
   1.732 +      b += ((uint32_t)k[4])<<24;
   1.733 +      b += ((uint32_t)k[5])<<16;
   1.734 +      b += ((uint32_t)k[6])<<8;
   1.735 +      b += ((uint32_t)k[7]);
   1.736 +      c += ((uint32_t)k[8])<<24;
   1.737 +      c += ((uint32_t)k[9])<<16;
   1.738 +      c += ((uint32_t)k[10])<<8;
   1.739 +      c += ((uint32_t)k[11]);
   1.740 +      mix(a,b,c);
   1.741 +      length -= 12;
   1.742 +      k += 12;
   1.743 +    }
   1.744 +
   1.745 +    /*-------------------------------- last block: affect all 32 bits of (c) */
   1.746 +    switch(length)                   /* all the case statements fall through */
   1.747 +    {
   1.748 +    case 12: c+=k[11];
   1.749 +    case 11: c+=((uint32_t)k[10])<<8;
   1.750 +    case 10: c+=((uint32_t)k[9])<<16;
   1.751 +    case 9 : c+=((uint32_t)k[8])<<24;
   1.752 +    case 8 : b+=k[7];
   1.753 +    case 7 : b+=((uint32_t)k[6])<<8;
   1.754 +    case 6 : b+=((uint32_t)k[5])<<16;
   1.755 +    case 5 : b+=((uint32_t)k[4])<<24;
   1.756 +    case 4 : a+=k[3];
   1.757 +    case 3 : a+=((uint32_t)k[2])<<8;
   1.758 +    case 2 : a+=((uint32_t)k[1])<<16;
   1.759 +    case 1 : a+=((uint32_t)k[0])<<24;
   1.760 +             break;
   1.761 +    case 0 : return c;
   1.762 +    }
   1.763 +  }
   1.764 +
   1.765 +  final(a,b,c);
   1.766 +  return c;
   1.767 +}
   1.768 +
   1.769 +
   1.770 +#ifdef SELF_TEST
   1.771 +
   1.772 +/* used for timings */
   1.773 +void driver1()
   1.774 +{
   1.775 +  uint8_t buf[256];
   1.776 +  uint32_t i;
   1.777 +  uint32_t h=0;
   1.778 +  time_t a,z;
   1.779 +
   1.780 +  time(&a);
   1.781 +  for (i=0; i<256; ++i) buf[i] = 'x';
   1.782 +  for (i=0; i<1; ++i) 
   1.783 +  {
   1.784 +    h = hashlittle(&buf[0],1,h);
   1.785 +  }
   1.786 +  time(&z);
   1.787 +
   1.788 +  if (z-a > 0) printf("time %d %.8x\n", z-a, h);
   1.789 +
   1.790 +}
   1.791 +
   1.792 +/* check that every input bit changes every output bit half the time */
   1.793 +#define HASHSTATE 1
   1.794 +#define HASHLEN   1
   1.795 +#define MAXPAIR 60
   1.796 +#define MAXLEN  70
   1.797 +void driver2()
   1.798 +{
   1.799 +  uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
   1.800 +  uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
   1.801 +  uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
   1.802 +  uint32_t x[HASHSTATE],y[HASHSTATE];
   1.803 +  uint32_t hlen;
   1.804 +
   1.805 +  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
   1.806 +  for (hlen=0; hlen < MAXLEN; ++hlen)
   1.807 +  {
   1.808 +    z=0;
   1.809 +    for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
   1.810 +    {
   1.811 +      for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
   1.812 +      {
   1.813 +	for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
   1.814 +	{
   1.815 +	  for (l=0; l<HASHSTATE; ++l)
   1.816 +	    e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
   1.817 +
   1.818 +      	  /*---- check that every output bit is affected by that input bit */
   1.819 +	  for (k=0; k<MAXPAIR; k+=2)
   1.820 +	  { 
   1.821 +	    uint32_t finished=1;
   1.822 +	    /* keys have one bit different */
   1.823 +	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
   1.824 +	    /* have a and b be two keys differing in only one bit */
   1.825 +	    a[i] ^= (k<<j);
   1.826 +	    a[i] ^= (k>>(8-j));
   1.827 +	     c[0] = hashlittle(a, hlen, m);
   1.828 +	    b[i] ^= ((k+1)<<j);
   1.829 +	    b[i] ^= ((k+1)>>(8-j));
   1.830 +	     d[0] = hashlittle(b, hlen, m);
   1.831 +	    /* check every bit is 1, 0, set, and not set at least once */
   1.832 +	    for (l=0; l<HASHSTATE; ++l)
   1.833 +	    {
   1.834 +	      e[l] &= (c[l]^d[l]);
   1.835 +	      f[l] &= ~(c[l]^d[l]);
   1.836 +	      g[l] &= c[l];
   1.837 +	      h[l] &= ~c[l];
   1.838 +	      x[l] &= d[l];
   1.839 +	      y[l] &= ~d[l];
   1.840 +	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
   1.841 +	    }
   1.842 +	    if (finished) break;
   1.843 +	  }
   1.844 +	  if (k>z) z=k;
   1.845 +	  if (k==MAXPAIR) 
   1.846 +	  {
   1.847 +	     printf("Some bit didn't change: ");
   1.848 +	     printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
   1.849 +	            e[0],f[0],g[0],h[0],x[0],y[0]);
   1.850 +	     printf("i %d j %d m %d len %d\n", i, j, m, hlen);
   1.851 +	  }
   1.852 +	  if (z==MAXPAIR) goto done;
   1.853 +	}
   1.854 +      }
   1.855 +    }
   1.856 +   done:
   1.857 +    if (z < MAXPAIR)
   1.858 +    {
   1.859 +      printf("Mix success  %2d bytes  %2d initvals  ",i,m);
   1.860 +      printf("required  %d  trials\n", z/2);
   1.861 +    }
   1.862 +  }
   1.863 +  printf("\n");
   1.864 +}
   1.865 +
   1.866 +/* Check for reading beyond the end of the buffer and alignment problems */
   1.867 +void driver3()
   1.868 +{
   1.869 +  uint8_t buf[MAXLEN+20], *b;
   1.870 +  uint32_t len;
   1.871 +  uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
   1.872 +  uint32_t h;
   1.873 +  uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
   1.874 +  uint32_t i;
   1.875 +  uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
   1.876 +  uint32_t j;
   1.877 +  uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
   1.878 +  uint32_t ref,x,y;
   1.879 +  uint8_t *p;
   1.880 +
   1.881 +  printf("Endianness.  These lines should all be the same (for values filled in):\n");
   1.882 +  printf("%.8x                            %.8x                            %.8x\n",
   1.883 +         jenkHash32((const uint32_t *)q, (sizeof(q)-1)/4, 13),
   1.884 +         jenkHash32((const uint32_t *)q, (sizeof(q)-5)/4, 13),
   1.885 +         jenkHash32((const uint32_t *)q, (sizeof(q)-9)/4, 13));
   1.886 +  p = q;
   1.887 +  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
   1.888 +         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
   1.889 +         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
   1.890 +         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
   1.891 +         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
   1.892 +         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
   1.893 +         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
   1.894 +  p = &qq[1];
   1.895 +  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
   1.896 +         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
   1.897 +         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
   1.898 +         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
   1.899 +         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
   1.900 +         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
   1.901 +         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
   1.902 +  p = &qqq[2];
   1.903 +  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
   1.904 +         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
   1.905 +         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
   1.906 +         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
   1.907 +         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
   1.908 +         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
   1.909 +         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
   1.910 +  p = &qqqq[3];
   1.911 +  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
   1.912 +         hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
   1.913 +         hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
   1.914 +         hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
   1.915 +         hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
   1.916 +         hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
   1.917 +         hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
   1.918 +  printf("\n");
   1.919 +
   1.920 +  /* check that hashlittle2 and hashlittle produce the same results */
   1.921 +  i=47; j=0;
   1.922 +  hashlittle2(q, sizeof(q), &i, &j);
   1.923 +  if (hashlittle(q, sizeof(q), 47) != i)
   1.924 +    printf("hashlittle2 and hashlittle mismatch\n");
   1.925 +
   1.926 +  /* check that hashword2 and hashword produce the same results */
   1.927 +  len = 0xdeadbeef;
   1.928 +  i=47, j=0;
   1.929 +  hashword2(&len, 1, &i, &j);
   1.930 +  if (jenkHash32(&len, 1, 47) != i)
   1.931 +    printf("hashword2 and hashword mismatch %x %x\n", 
   1.932 +	   i, jenkHash32(&len, 1, 47));
   1.933 +
   1.934 +  /* check hashlittle doesn't read before or after the ends of the string */
   1.935 +  for (h=0, b=buf+1; h<8; ++h, ++b)
   1.936 +  {
   1.937 +    for (i=0; i<MAXLEN; ++i)
   1.938 +    {
   1.939 +      len = i;
   1.940 +      for (j=0; j<i; ++j) *(b+j)=0;
   1.941 +
   1.942 +      /* these should all be equal */
   1.943 +      ref = hashlittle(b, len, (uint32_t)1);
   1.944 +      *(b+i)=(uint8_t)~0;
   1.945 +      *(b-1)=(uint8_t)~0;
   1.946 +      x = hashlittle(b, len, (uint32_t)1);
   1.947 +      y = hashlittle(b, len, (uint32_t)1);
   1.948 +      if ((ref != x) || (ref != y)) 
   1.949 +      {
   1.950 +	printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
   1.951 +               h, i);
   1.952 +      }
   1.953 +    }
   1.954 +  }
   1.955 +}
   1.956 +
   1.957 +/* check for problems with nulls */
   1.958 + void driver4()
   1.959 +{
   1.960 +  uint8_t buf[1];
   1.961 +  uint32_t h,i,state[HASHSTATE];
   1.962 +
   1.963 +
   1.964 +  buf[0] = ~0;
   1.965 +  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
   1.966 +  printf("These should all be different\n");
   1.967 +  for (i=0, h=0; i<8; ++i)
   1.968 +  {
   1.969 +    h = hashlittle(buf, 0, h);
   1.970 +
   1.971 +    printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
   1.972 +
   1.973 +  }
   1.974 +}
   1.975 +
   1.976 +void driver5()
   1.977 +{
   1.978 +
   1.979 +  uint32_t b,c;
   1.980 +  b=0, c=0, hashlittle2("", 0, &c, &b);
   1.981 +  printf("hash is %.8lx %.8lx\n", c, b);   /* deadbeef deadbeef */
   1.982 +  b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
   1.983 +  printf("hash is %.8lx %.8lx\n", c, b);   /* bd5b7dde deadbeef */
   1.984 +  b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
   1.985 +  printf("hash is %.8lx %.8lx\n", c, b);   /* 9c093ccd bd5b7dde */
   1.986 +  b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
   1.987 +  printf("hash is %.8lx %.8lx\n", c, b);   /* 17770551 ce7226e6 */
   1.988 +  b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
   1.989 +  printf("hash is %.8lx %.8lx\n", c, b);   /* e3607cae bd371de4 */
   1.990 +  b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
   1.991 +  printf("hash is %.8lx %.8lx\n", c, b);   /* cd628161 6cbea4b3 */
   1.992 +  c = hashlittle("Four score and seven years ago", 30, 0);
   1.993 +  printf("hash is %.8lx\n", c);   /* 17770551 */
   1.994 +  c = hashlittle("Four score and seven years ago", 30, 1);
   1.995 +  printf("hash is %.8lx\n", c);   /* cd628161 */
   1.996 +
   1.997 +}
   1.998 +
   1.999 +
  1.1000 +int test_lookup3()
  1.1001 +{
  1.1002 +  driver1();   /* test that the key is hashed: used for timings */
  1.1003 +  driver2();   /* test that whole key is hashed thoroughly */
  1.1004 +  driver3();   /* test that nothing but the key is hashed */
  1.1005 +  driver4();   /* test hashing multiple buffers (all buffers are null) */
  1.1006 +  driver5();   /* test the hash against known vectors */
  1.1007 +  return 1;
  1.1008 +}
  1.1009 +
  1.1010 +#endif  /* SELF_TEST */

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/jenkinsHash_lookup8.c	Tue Jun 26 03:09:05 2012 -0700
     2.3 @@ -0,0 +1,451 @@
     2.4 +/*
     2.5 +--------------------------------------------------------------------
     2.6 +lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
     2.7 +hash(), hash2(), hash3, and mix() are externally useful functions.
     2.8 +Routines to test the hash are included if SELF_TEST is defined.
     2.9 +You can use this free for any purpose.  It has no warranty.
    2.10 +
    2.11 +2009: This is obsolete.  I recently timed lookup3.c as being faster 
    2.12 +at producing 64-bit results.
    2.13 +--------------------------------------------------------------------
    2.14 +*/
    2.15 +//#define SELF_TEST
    2.16 +
    2.17 +#include <stdio.h>
    2.18 +#include <stddef.h>
    2.19 +#include <stdlib.h>
    2.20 +typedef  unsigned long  long ub8;   /* unsigned 8-byte quantities */
    2.21 +typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
    2.22 +typedef  unsigned       char ub1;
    2.23 +
    2.24 +#define hashsize(n) ((ub8)1<<(n))
    2.25 +#define hashmask(n) (hashsize(n)-1)
    2.26 +
    2.27 +/*
    2.28 +--------------------------------------------------------------------
    2.29 +mix -- mix 3 64-bit values reversibly.
    2.30 +mix() takes 48 machine instructions, but only 24 cycles on a superscalar
    2.31 +  machine (like Intel's new MMX architecture).  It requires 4 64-bit
    2.32 +  registers for 4::2 parallelism.
    2.33 +All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
    2.34 +  (a,b,c), and all deltas of bottom bits were tested.  All deltas were
    2.35 +  tested both on random keys and on keys that were nearly all zero.
    2.36 +  These deltas all cause every bit of c to change between 1/3 and 2/3
    2.37 +  of the time (well, only 113/400 to 287/400 of the time for some
    2.38 +  2-bit delta).  These deltas all cause at least 80 bits to change
    2.39 +  among (a,b,c) when the mix is run either forward or backward (yes it
    2.40 +  is reversible).
    2.41 +This implies that a hash using mix64 has no funnels.  There may be
    2.42 +  characteristics with 3-bit deltas or bigger, I didn't test for
    2.43 +  those.
    2.44 +--------------------------------------------------------------------
    2.45 +*/
    2.46 +#define mix64(a,b,c) \
    2.47 +{ \
    2.48 +  a -= b; a -= c; a ^= (c>>43); \
    2.49 +  b -= c; b -= a; b ^= (a<<9); \
    2.50 +  c -= a; c -= b; c ^= (b>>8); \
    2.51 +  a -= b; a -= c; a ^= (c>>38); \
    2.52 +  b -= c; b -= a; b ^= (a<<23); \
    2.53 +  c -= a; c -= b; c ^= (b>>5); \
    2.54 +  a -= b; a -= c; a ^= (c>>35); \
    2.55 +  b -= c; b -= a; b ^= (a<<49); \
    2.56 +  c -= a; c -= b; c ^= (b>>11); \
    2.57 +  a -= b; a -= c; a ^= (c>>12); \
    2.58 +  b -= c; b -= a; b ^= (a<<18); \
    2.59 +  c -= a; c -= b; c ^= (b>>22); \
    2.60 +}
    2.61 +
    2.62 +/*
    2.63 +--------------------------------------------------------------------
    2.64 +hash() -- hash a variable-length key into a 64-bit value
    2.65 +  k     : the key (the unaligned variable-length array of bytes)
    2.66 +  len   : the length of the key, counting by bytes
    2.67 +  level : can be any 8-byte value
    2.68 +Returns a 64-bit value.  Every bit of the key affects every bit of
    2.69 +the return value.  No funnels.  Every 1-bit and 2-bit delta achieves
    2.70 +avalanche.  About 41+5len instructions.
    2.71 +
    2.72 +The best hash table sizes are powers of 2.  There is no need to do
    2.73 +mod a prime (mod is sooo slow!).  If you need less than 64 bits,
    2.74 +use a bitmask.  For example, if you need only 10 bits, do
    2.75 +  h = (h & hashmask(10));
    2.76 +In which case, the hash table should have hashsize(10) elements.
    2.77 +
    2.78 +If you are hashing n strings (ub1 **)k, do it like this:
    2.79 +  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
    2.80 +
    2.81 +By Bob Jenkins, Jan 4 1997.  bob_jenkins@burtleburtle.net.  You may
    2.82 +use this code any way you wish, private, educational, or commercial,
    2.83 +but I would appreciate if you give me credit.
    2.84 +
    2.85 +See http://burtleburtle.net/bob/hash/evahash.html
    2.86 +Use for hash table lookup, or anything where one collision in 2^^64
    2.87 +is acceptable.  Do NOT use for cryptographic purposes.
    2.88 +--------------------------------------------------------------------
    2.89 +*/
    2.90 +
    2.91 +ub8 hash( k, length, level)
    2.92 +register ub1 *k;        /* the key */
    2.93 +register ub8  length;   /* the length of the key */
    2.94 +register ub8  level;    /* the previous hash, or an arbitrary value */
    2.95 +{
    2.96 +  register ub8 a,b,c,len;
    2.97 +
    2.98 +  /* Set up the internal state */
    2.99 +  len = length;
   2.100 +  a = b = level;                         /* the previous hash value */
   2.101 +  c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
   2.102 +
   2.103 +  /*---------------------------------------- handle most of the key */
   2.104 +  while (len >= 24)
   2.105 +  {
   2.106 +    a += (k[0]        +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24)
   2.107 +     +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56));
   2.108 +    b += (k[8]        +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24)
   2.109 +     +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56));
   2.110 +    c += (k[16]       +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24)
   2.111 +     +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56));
   2.112 +    mix64(a,b,c);
   2.113 +    k += 24; len -= 24;
   2.114 +  }
   2.115 +
   2.116 +  /*------------------------------------- handle the last 23 bytes */
   2.117 +  c += length;
   2.118 +  switch(len)              /* all the case statements fall through */
   2.119 +  {
   2.120 +  case 23: c+=((ub8)k[22]<<56);
   2.121 +  case 22: c+=((ub8)k[21]<<48);
   2.122 +  case 21: c+=((ub8)k[20]<<40);
   2.123 +  case 20: c+=((ub8)k[19]<<32);
   2.124 +  case 19: c+=((ub8)k[18]<<24);
   2.125 +  case 18: c+=((ub8)k[17]<<16);
   2.126 +  case 17: c+=((ub8)k[16]<<8);
   2.127 +    /* the first byte of c is reserved for the length */
   2.128 +  case 16: b+=((ub8)k[15]<<56);
   2.129 +  case 15: b+=((ub8)k[14]<<48);
   2.130 +  case 14: b+=((ub8)k[13]<<40);
   2.131 +  case 13: b+=((ub8)k[12]<<32);
   2.132 +  case 12: b+=((ub8)k[11]<<24);
   2.133 +  case 11: b+=((ub8)k[10]<<16);
   2.134 +  case 10: b+=((ub8)k[ 9]<<8);
   2.135 +  case  9: b+=((ub8)k[ 8]);
   2.136 +  case  8: a+=((ub8)k[ 7]<<56);
   2.137 +  case  7: a+=((ub8)k[ 6]<<48);
   2.138 +  case  6: a+=((ub8)k[ 5]<<40);
   2.139 +  case  5: a+=((ub8)k[ 4]<<32);
   2.140 +  case  4: a+=((ub8)k[ 3]<<24);
   2.141 +  case  3: a+=((ub8)k[ 2]<<16);
   2.142 +  case  2: a+=((ub8)k[ 1]<<8);
   2.143 +  case  1: a+=((ub8)k[ 0]);
   2.144 +    /* case 0: nothing left to add */
   2.145 +  }
   2.146 +  mix64(a,b,c);
   2.147 +  /*-------------------------------------------- report the result */
   2.148 +  return c;
   2.149 +}
   2.150 +
   2.151 +/*
   2.152 +--------------------------------------------------------------------
   2.153 + This works on all machines, is identical to hash() on little-endian 
   2.154 + machines, and it is much faster than hash(), but it requires
   2.155 + -- that the key be an array of ub8's, and
   2.156 + -- that all your machines have the same endianness, and
   2.157 + -- that the length be the number of ub8's in the key
   2.158 +--------------------------------------------------------------------
   2.159 +*/
   2.160 +ub8 hash2( k, length, level)
   2.161 +register ub8 *k;        /* the key */
   2.162 +register ub8  length;   /* the length of the key */
   2.163 +register ub8  level;    /* the previous hash, or an arbitrary value */
   2.164 +{
   2.165 +  register ub8 a,b,c,len;
   2.166 +
   2.167 +  /* Set up the internal state */
   2.168 +  len = length;
   2.169 +  a = b = level;                         /* the previous hash value */
   2.170 +  c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
   2.171 +
   2.172 +  /*---------------------------------------- handle most of the key */
   2.173 +  while (len >= 3)
   2.174 +  {
   2.175 +    a += k[0];
   2.176 +    b += k[1];
   2.177 +    c += k[2];
   2.178 +    mix64(a,b,c);
   2.179 +    k += 3; len -= 3;
   2.180 +  }
   2.181 +
   2.182 +  /*-------------------------------------- handle the last 2 ub8's */
   2.183 +  c += (length<<3);
   2.184 +  switch(len)              /* all the case statements fall through */
   2.185 +  {
   2.186 +    /* c is reserved for the length */
   2.187 +  case  2: b+=k[1];
   2.188 +  case  1: a+=k[0];
   2.189 +    /* case 0: nothing left to add */
   2.190 +  }
   2.191 +  mix64(a,b,c);
   2.192 +  /*-------------------------------------------- report the result */
   2.193 +  return c;
   2.194 +}
   2.195 +
   2.196 +/*
   2.197 +--------------------------------------------------------------------
   2.198 + This is identical to hash() on little-endian machines, and it is much
   2.199 + faster than hash(), but a little slower than hash2(), and it requires
   2.200 + -- that all your machines be little-endian, for example all Intel x86
   2.201 +    chips or all VAXen.  It gives wrong results on big-endian machines.
   2.202 +--------------------------------------------------------------------
   2.203 +*/
   2.204 +
   2.205 +ub8 hash3( k, length, level)
   2.206 +register ub1 *k;        /* the key */
   2.207 +register ub8  length;   /* the length of the key */
   2.208 +register ub8  level;    /* the previous hash, or an arbitrary value */
   2.209 +{
   2.210 +  register ub8 a,b,c,len;
   2.211 +
   2.212 +  /* Set up the internal state */
   2.213 +  len = length;
   2.214 +  a = b = level;                         /* the previous hash value */
   2.215 +  c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
   2.216 +
   2.217 +  /*---------------------------------------- handle most of the key */
   2.218 +  if (((size_t)k)&7)
   2.219 +  {
   2.220 +    while (len >= 24)
   2.221 +    {
   2.222 +      a += (k[0]        +((ub8)k[ 1]<< 8)+((ub8)k[ 2]<<16)+((ub8)k[ 3]<<24)
   2.223 +       +((ub8)k[4 ]<<32)+((ub8)k[ 5]<<40)+((ub8)k[ 6]<<48)+((ub8)k[ 7]<<56));
   2.224 +      b += (k[8]        +((ub8)k[ 9]<< 8)+((ub8)k[10]<<16)+((ub8)k[11]<<24)
   2.225 +       +((ub8)k[12]<<32)+((ub8)k[13]<<40)+((ub8)k[14]<<48)+((ub8)k[15]<<56));
   2.226 +      c += (k[16]       +((ub8)k[17]<< 8)+((ub8)k[18]<<16)+((ub8)k[19]<<24)
   2.227 +       +((ub8)k[20]<<32)+((ub8)k[21]<<40)+((ub8)k[22]<<48)+((ub8)k[23]<<56));
   2.228 +      mix64(a,b,c);
   2.229 +      k += 24; len -= 24;
   2.230 +    }
   2.231 +  }
   2.232 +  else
   2.233 +  {
   2.234 +    while (len >= 24)    /* aligned */
   2.235 +    {
   2.236 +      a += *(ub8 *)(k+0);
   2.237 +      b += *(ub8 *)(k+8);
   2.238 +      c += *(ub8 *)(k+16);
   2.239 +      mix64(a,b,c);
   2.240 +      k += 24; len -= 24;
   2.241 +    }
   2.242 +  }
   2.243 +
   2.244 +  /*------------------------------------- handle the last 23 bytes */
   2.245 +  c += length;
   2.246 +  switch(len)              /* all the case statements fall through */
   2.247 +  {
   2.248 +  case 23: c+=((ub8)k[22]<<56);
   2.249 +  case 22: c+=((ub8)k[21]<<48);
   2.250 +  case 21: c+=((ub8)k[20]<<40);
   2.251 +  case 20: c+=((ub8)k[19]<<32);
   2.252 +  case 19: c+=((ub8)k[18]<<24);
   2.253 +  case 18: c+=((ub8)k[17]<<16);
   2.254 +  case 17: c+=((ub8)k[16]<<8);
   2.255 +    /* the first byte of c is reserved for the length */
   2.256 +  case 16: b+=((ub8)k[15]<<56);
   2.257 +  case 15: b+=((ub8)k[14]<<48);
   2.258 +  case 14: b+=((ub8)k[13]<<40);
   2.259 +  case 13: b+=((ub8)k[12]<<32);
   2.260 +  case 12: b+=((ub8)k[11]<<24);
   2.261 +  case 11: b+=((ub8)k[10]<<16);
   2.262 +  case 10: b+=((ub8)k[ 9]<<8);
   2.263 +  case  9: b+=((ub8)k[ 8]);
   2.264 +  case  8: a+=((ub8)k[ 7]<<56);
   2.265 +  case  7: a+=((ub8)k[ 6]<<48);
   2.266 +  case  6: a+=((ub8)k[ 5]<<40);
   2.267 +  case  5: a+=((ub8)k[ 4]<<32);
   2.268 +  case  4: a+=((ub8)k[ 3]<<24);
   2.269 +  case  3: a+=((ub8)k[ 2]<<16);
   2.270 +  case  2: a+=((ub8)k[ 1]<<8);
   2.271 +  case  1: a+=((ub8)k[ 0]);
   2.272 +    /* case 0: nothing left to add */
   2.273 +  }
   2.274 +  mix64(a,b,c);
   2.275 +  /*-------------------------------------------- report the result */
   2.276 +  return c;
   2.277 +}
   2.278 +
   2.279 +#ifdef SELF_TEST
   2.280 +
   2.281 +/* used for timings */
   2.282 +void driver1()
   2.283 +{
   2.284 +  ub8 buf[256];
   2.285 +  ub8 i;
   2.286 +  ub8 h=0;
   2.287 +
   2.288 +  for (i=0; i<256; ++i) 
   2.289 +  {
   2.290 +    h = hash(buf,i,h);
   2.291 +  }
   2.292 +}
   2.293 +
   2.294 +/* check that every input bit changes every output bit half the time */
   2.295 +#define HASHSTATE 1
   2.296 +#define HASHLEN   1
   2.297 +#define MAXPAIR 80
   2.298 +#define MAXLEN 5
   2.299 +void driver2()
   2.300 +{
   2.301 +  ub1 qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
   2.302 +  ub8 c[HASHSTATE], d[HASHSTATE], i, j=0, k, l, m, z;
   2.303 +  ub8 e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
   2.304 +  ub8 x[HASHSTATE],y[HASHSTATE];
   2.305 +  ub8 hlen;
   2.306 +
   2.307 +  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
   2.308 +  for (hlen=0; hlen < MAXLEN; ++hlen)
   2.309 +  {
   2.310 +    z=0;
   2.311 +    for (i=0; i<hlen; ++i)  /*----------------------- for each byte, */
   2.312 +    {
   2.313 +      for (j=0; j<8; ++j)   /*------------------------ for each bit, */
   2.314 +      {
   2.315 +	for (m=0; m<8; ++m) /*-------- for serveral possible levels, */
   2.316 +	{
   2.317 +	  for (l=0; l<HASHSTATE; ++l) e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((ub8)0);
   2.318 +
   2.319 +      	  /*---- check that every input bit affects every output bit */
   2.320 +	  for (k=0; k<MAXPAIR; k+=2)
   2.321 +	  { 
   2.322 +	    ub8 finished=1;
   2.323 +	    /* keys have one bit different */
   2.324 +	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (ub1)0;}
   2.325 +	    /* have a and b be two keys differing in only one bit */
   2.326 +	    a[i] ^= (k<<j);
   2.327 +	    a[i] ^= (k>>(8-j));
   2.328 +	     c[0] = hash(a, hlen, m);
   2.329 +	    b[i] ^= ((k+1)<<j);
   2.330 +	    b[i] ^= ((k+1)>>(8-j));
   2.331 +	     d[0] = hash(b, hlen, m);
   2.332 +	    /* check every bit is 1, 0, set, and not set at least once */
   2.333 +	    for (l=0; l<HASHSTATE; ++l)
   2.334 +	    {
   2.335 +	      e[l] &= (c[l]^d[l]);
   2.336 +	      f[l] &= ~(c[l]^d[l]);
   2.337 +	      g[l] &= c[l];
   2.338 +	      h[l] &= ~c[l];
   2.339 +	      x[l] &= d[l];
   2.340 +	      y[l] &= ~d[l];
   2.341 +	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
   2.342 +	    }
   2.343 +	    if (finished) break;
   2.344 +	  }
   2.345 +	  if (k>z) z=k;
   2.346 +	  if (k==MAXPAIR) 
   2.347 +	  {
   2.348 +
   2.349 +	     printf("Some bit didn't change: ");
   2.350 +	     printf("%.8lx %.8lx %.8lx %.8lx %.8lx %.8lx  ",
   2.351 +	            e[0],f[0],g[0],h[0],x[0],y[0]);
   2.352 +	     printf("i %ld j %ld m %ld len %ld\n",
   2.353 +	            (ub4)i,(ub4)j,(ub4)m,(ub4)hlen);
   2.354 +
   2.355 +	  }
   2.356 +	  if (z==MAXPAIR) goto done;
   2.357 +	}
   2.358 +      }
   2.359 +    }
   2.360 +   done:
   2.361 +    if (z < MAXPAIR)
   2.362 +    {
   2.363 +      printf("Mix success  %2ld bytes  %2ld levels  ",(ub4)i,(ub4)m);
   2.364 +      printf("required  %ld  trials\n",(ub4)(z/2));
   2.365 +    }
   2.366 +  }
   2.367 +  printf("\n");
   2.368 +}
   2.369 +
   2.370 +/* Check for reading beyond the end of the buffer and alignment problems */
   2.371 +void driver3()
   2.372 +{
   2.373 +  ub1 buf[MAXLEN+20], *b;
   2.374 +  ub8 len;
   2.375 +  ub1 q[] = "This is the time for all good men to come to the aid of their country";
   2.376 +  ub1 qq[] = "xThis is the time for all good men to come to the aid of their country";
   2.377 +  ub1 qqq[] = "xxThis is the time for all good men to come to the aid of their country";
   2.378 +  ub1 qqqq[] = "xxxThis is the time for all good men to come to the aid of their country";
   2.379 +  ub1 o[] = "xxxxThis is the time for all good men to come to the aid of their country";
   2.380 +  ub1 oo[] = "xxxxxThis is the time for all good men to come to the aid of their country";
   2.381 +  ub1 ooo[] = "xxxxxxThis is the time for all good men to come to the aid of their country";
   2.382 +  ub1 oooo[] = "xxxxxxxThis is the time for all good men to come to the aid of their country";
   2.383 +  ub8 h,i,j,ref,x,y;
   2.384 +
   2.385 +  printf("Endianness.  These should all be the same:\n");
   2.386 +  h = hash(q+0, (ub8)(sizeof(q)-1), (ub8)0);
   2.387 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.388 +  h = hash(qq+1, (ub8)(sizeof(q)-1), (ub8)0);
   2.389 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.390 +  h = hash(qqq+2, (ub8)(sizeof(q)-1), (ub8)0);
   2.391 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.392 +  h = hash(qqqq+3, (ub8)(sizeof(q)-1), (ub8)0);
   2.393 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.394 +  h = hash(o+4, (ub8)(sizeof(q)-1), (ub8)0);
   2.395 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.396 +  h = hash(oo+5, (ub8)(sizeof(q)-1), (ub8)0);
   2.397 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.398 +  h = hash(ooo+6, (ub8)(sizeof(q)-1), (ub8)0);
   2.399 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.400 +  h = hash(oooo+7, (ub8)(sizeof(q)-1), (ub8)0);
   2.401 +  printf("%.8lx%.8lx\n", (ub4)h, (ub4)(h>>32));
   2.402 +  printf("\n");
   2.403 +  for (h=0, b=buf+1; h<8; ++h, ++b)
   2.404 +  {
   2.405 +    for (i=0; i<MAXLEN; ++i)
   2.406 +    {
   2.407 +      len = i;
   2.408 +      for (j=0; j<i; ++j) *(b+j)=0;
   2.409 +
   2.410 +      /* these should all be equal */
   2.411 +      ref = hash(b, len, (ub8)1);
   2.412 +      *(b+i)=(ub1)~0;
   2.413 +      *(b-1)=(ub1)~0;
   2.414 +      x = hash(b, len, (ub8)1);
   2.415 +      y = hash(b, len, (ub8)1);
   2.416 +      if ((ref != x) || (ref != y)) 
   2.417 +      {
   2.418 +
   2.419 +	printf("alignment error: %.8lx %.8lx %.8lx %ld %ld\n",ref,x,y,h,i);
   2.420 +
   2.421 +      }
   2.422 +    }
   2.423 +  }
   2.424 +}
   2.425 +
   2.426 +/* check for problems with nulls */
   2.427 + void driver4()
   2.428 +{
   2.429 +  ub1 buf[1];
   2.430 +  ub8 h,i,state[HASHSTATE];
   2.431 +
   2.432 +
   2.433 +  buf[0] = ~0;
   2.434 +  for (i=0; i<HASHSTATE; ++i) state[i] = 1;
   2.435 +  printf("These should all be different\n");
   2.436 +  for (i=0, h=0; i<8; ++i)
   2.437 +  {
   2.438 +    h = hash(buf, (ub8)0, h);
   2.439 +    printf("%2ld  0-byte strings, hash is  %.8lx%.8lx\n", (ub4)i,
   2.440 +      (ub4)h,(ub4)(h>>32));
   2.441 +  }
   2.442 +}
   2.443 +
   2.444 +
   2.445 +int test_lookup8()
   2.446 +{
   2.447 +  driver1();   /* test that the key is hashed: used for timings */
   2.448 +  driver2();   /* test that whole key is hashed thoroughly */
   2.449 +  driver3();   /* test that nothing but the key is hashed */
   2.450 +  driver4();   /* test hashing multiple buffers (all buffers are null) */
   2.451 +  return 1;
   2.452 +}
   2.453 +
   2.454 +#endif  /* SELF_TEST */