rpm  4.5
lookup3.c
Go to the documentation of this file.
1 /* -------------------------------------------------------------------- */
2 /*
3  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4  *
5  * These are functions for producing 32-bit hashes for hash table lookup.
6  * jlu32w(), jlu32l(), jlu32lpair(), jlu32b(), _JLU3_MIX(), and _JLU3_FINAL()
7  * are externally useful functions. Routines to test the hash are included
8  * if SELF_TEST is defined. You can use this free for any purpose. It's in
9  * the public domain. It has no warranty.
10  *
11  * You probably want to use jlu32l(). jlu32l() and jlu32b()
12  * hash byte arrays. jlu32l() is is faster than jlu32b() on
13  * little-endian machines. Intel and AMD are little-endian machines.
14  * On second thought, you probably want jlu32lpair(), which is identical to
15  * jlu32l() except it returns two 32-bit hashes for the price of one.
16  * You could implement jlu32bpair() if you wanted but I haven't bothered here.
17  *
18  * If you want to find a hash of, say, exactly 7 integers, do
19  * a = i1; b = i2; c = i3;
20  * _JLU3_MIX(a,b,c);
21  * a += i4; b += i5; c += i6;
22  * _JLU3_MIX(a,b,c);
23  * a += i7;
24  * _JLU3_FINAL(a,b,c);
25  * then use c as the hash value. If you have a variable size array of
26  * 4-byte integers to hash, use jlu32w(). If you have a byte array (like
27  * a character string), use jlu32l(). If you have several byte arrays, or
28  * a mix of things, see the comments above jlu32l().
29  *
30  * Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31  * then mix those integers. This is fast (you can do a lot more thorough
32  * mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33  * on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 */
35 /* -------------------------------------------------------------------- */
36 
37 #include "system.h"
38 #include "debug.h"
39 
40 #if defined(_JLU3_SELFTEST)
41 # define _JLU3_jlu32w 1
42 # define _JLU3_jlu32l 1
43 # define _JLU3_jlu32lpair 1
44 # define _JLU3_jlu32b 1
45 #endif
46 
47 /*@-redef@*/
48 /*@unchecked@*/
49 static const union _dbswap {
50  const uint32_t ui;
51  const unsigned char uc[4];
52 } endian = { .ui = 0x11223344 };
53 # define HASH_LITTLE_ENDIAN (endian.uc[0] == 0x44)
54 # define HASH_BIG_ENDIAN (endian.uc[0] == 0x11)
55 /*@=redef@*/
56 
57 #ifndef ROTL32
58 # define ROTL32(x, s) (((x) << (s)) | ((x) >> (32 - (s))))
59 #endif
60 
61 /* NOTE: The _size parameter should be in bytes. */
62 #define _JLU3_INIT(_h, _size) (0xdeadbeef + ((uint32_t)(_size)) + (_h))
63 
64 /* -------------------------------------------------------------------- */
65 /*
66  * _JLU3_MIX -- mix 3 32-bit values reversibly.
67  *
68  * This is reversible, so any information in (a,b,c) before _JLU3_MIX() is
69  * still in (a,b,c) after _JLU3_MIX().
70  *
71  * If four pairs of (a,b,c) inputs are run through _JLU3_MIX(), or through
72  * _JLU3_MIX() in reverse, there are at least 32 bits of the output that
73  * are sometimes the same for one pair and different for another pair.
74  * This was tested for:
75  * * pairs that differed by one bit, by two bits, in any combination
76  * of top bits of (a,b,c), or in any combination of bottom bits of
77  * (a,b,c).
78  * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
79  * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
80  * is commonly produced by subtraction) look like a single 1-bit
81  * difference.
82  * * the base values were pseudorandom, all zero but one bit set, or
83  * all zero plus a counter that starts at zero.
84  *
85  * Some k values for my "a-=c; a^=ROTL32(c,k); c+=b;" arrangement that
86  * satisfy this are
87  * 4 6 8 16 19 4
88  * 9 15 3 18 27 15
89  * 14 9 3 7 17 3
90  * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
91  * for "differ" defined as + with a one-bit base and a two-bit delta. I
92  * used http://burtleburtle.net/bob/hash/avalanche.html to choose
93  * the operations, constants, and arrangements of the variables.
94  *
95  * This does not achieve avalanche. There are input bits of (a,b,c)
96  * that fail to affect some output bits of (a,b,c), especially of a. The
97  * most thoroughly mixed value is c, but it doesn't really even achieve
98  * avalanche in c.
99  *
100  * This allows some parallelism. Read-after-writes are good at doubling
101  * the number of bits affected, so the goal of mixing pulls in the opposite
102  * direction as the goal of parallelism. I did what I could. Rotates
103  * seem to cost as much as shifts on every machine I could lay my hands
104  * on, and rotates are much kinder to the top and bottom bits, so I used
105  * rotates.
106  */
107 /* -------------------------------------------------------------------- */
108 #define _JLU3_MIX(a,b,c) \
109 { \
110  a -= c; a ^= ROTL32(c, 4); c += b; \
111  b -= a; b ^= ROTL32(a, 6); a += c; \
112  c -= b; c ^= ROTL32(b, 8); b += a; \
113  a -= c; a ^= ROTL32(c,16); c += b; \
114  b -= a; b ^= ROTL32(a,19); a += c; \
115  c -= b; c ^= ROTL32(b, 4); b += a; \
116 }
117 
118 /* -------------------------------------------------------------------- */
142 /* -------------------------------------------------------------------- */
143 #define _JLU3_FINAL(a,b,c) \
144 { \
145  c ^= b; c -= ROTL32(b,14); \
146  a ^= c; a -= ROTL32(c,11); \
147  b ^= a; b -= ROTL32(a,25); \
148  c ^= b; c -= ROTL32(b,16); \
149  a ^= c; a -= ROTL32(c,4); \
150  b ^= a; b -= ROTL32(a,14); \
151  c ^= b; c -= ROTL32(b,24); \
152 }
153 
154 #if defined(_JLU3_jlu32w)
155 uint32_t jlu32w(uint32_t h, /*@null@*/ const uint32_t *k, size_t size)
156  /*@*/;
157 /* -------------------------------------------------------------------- */
174 /* -------------------------------------------------------------------- */
175 uint32_t jlu32w(uint32_t h, const uint32_t *k, size_t size)
176 {
177  uint32_t a = _JLU3_INIT(h, (size * sizeof(*k)));
178  uint32_t b = a;
179  uint32_t c = a;
180 
181  if (k == NULL)
182  goto exit;
183 
184  /*----------------------------------------------- handle most of the key */
185  while (size > 3) {
186  a += k[0];
187  b += k[1];
188  c += k[2];
189  _JLU3_MIX(a,b,c);
190  size -= 3;
191  k += 3;
192  }
193 
194  /*----------------------------------------- handle the last 3 uint32_t's */
195  switch (size) {
196  case 3 : c+=k[2];
197  case 2 : b+=k[1];
198  case 1 : a+=k[0];
199  _JLU3_FINAL(a,b,c);
200  /*@fallthrough@*/
201  case 0:
202  break;
203  }
204  /*---------------------------------------------------- report the result */
205 exit:
206  return c;
207 }
208 #endif /* defined(_JLU3_jlu32w) */
209 
210 #if defined(_JLU3_jlu32l)
211 uint32_t jlu32l(uint32_t h, const void *key, size_t size)
212  /*@*/;
213 /* -------------------------------------------------------------------- */
214 /*
215  * jlu32l() -- hash a variable-length key into a 32-bit value
216  * h : can be any 4-byte value
217  * k : the key (the unaligned variable-length array of bytes)
218  * size : the size of the key, counting by bytes
219  * Returns a 32-bit value. Every bit of the key affects every bit of
220  * the return value. Two keys differing by one or two bits will have
221  * totally different hash values.
222  *
223  * The best hash table sizes are powers of 2. There is no need to do
224  * mod a prime (mod is sooo slow!). If you need less than 32 bits,
225  * use a bitmask. For example, if you need only 10 bits, do
226  * h = (h & hashmask(10));
227  * In which case, the hash table should have hashsize(10) elements.
228  *
229  * If you are hashing n strings (uint8_t **)k, do it like this:
230  * for (i=0, h=0; i<n; ++i) h = jlu32l(h, k[i], len[i]);
231  *
232  * By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
233  * code any way you wish, private, educational, or commercial. It's free.
234  *
235  * Use for hash table lookup, or anything where one collision in 2^^32 is
236  * acceptable. Do NOT use for cryptographic purposes.
237  *
238  * @param h the previous hash, or an arbitrary value
239  * @param *k the key, an array of uint8_t values
240  * @param size the size of the key
241  * @return the lookup3 hash
242  */
243 /* -------------------------------------------------------------------- */
244 uint32_t jlu32l(uint32_t h, const void *key, size_t size)
245 {
246  union { const void *ptr; size_t i; } u;
247  uint32_t a = _JLU3_INIT(h, size);
248  uint32_t b = a;
249  uint32_t c = a;
250 
251  if (key == NULL)
252  goto exit;
253 
254  u.ptr = key;
255  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
256  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
257 #ifdef VALGRIND
258  const uint8_t *k8;
259 #endif
260 
261  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
262  while (size > 12) {
263  a += k[0];
264  b += k[1];
265  c += k[2];
266  _JLU3_MIX(a,b,c);
267  size -= 12;
268  k += 3;
269  }
270 
271  /*------------------------- handle the last (probably partial) block */
272  /*
273  * "k[2]&0xffffff" actually reads beyond the end of the string, but
274  * then masks off the part it's not allowed to read. Because the
275  * string is aligned, the masked-off tail is in the same word as the
276  * rest of the string. Every machine with memory protection I've seen
277  * does it on word boundaries, so is OK with this. But VALGRIND will
278  * still catch it and complain. The masking trick does make the hash
279  * noticably faster for short strings (like English words).
280  */
281 #ifndef VALGRIND
282 
283  switch (size) {
284  case 12: c += k[2]; b+=k[1]; a+=k[0]; break;
285  case 11: c += k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
286  case 10: c += k[2]&0xffff; b+=k[1]; a+=k[0]; break;
287  case 9: c += k[2]&0xff; b+=k[1]; a+=k[0]; break;
288  case 8: b += k[1]; a+=k[0]; break;
289  case 7: b += k[1]&0xffffff; a+=k[0]; break;
290  case 6: b += k[1]&0xffff; a+=k[0]; break;
291  case 5: b += k[1]&0xff; a+=k[0]; break;
292  case 4: a += k[0]; break;
293  case 3: a += k[0]&0xffffff; break;
294  case 2: a += k[0]&0xffff; break;
295  case 1: a += k[0]&0xff; break;
296  case 0: goto exit;
297  }
298 
299 #else /* make valgrind happy */
300 
301  k8 = (const uint8_t *)k;
302  switch (size) {
303  case 12: c += k[2]; b+=k[1]; a+=k[0] break;
304  case 11: c += ((uint32_t)k8[10])<<16; /*@fallthrough@*/
305  case 10: c += ((uint32_t)k8[9])<<8; /*@fallthrough@*/
306  case 9: c += k8[8]; /*@fallthrough@*/
307  case 8: b += k[1]; a+=k[0]; break;
308  case 7: b += ((uint32_t)k8[6])<<16; /*@fallthrough@*/
309  case 6: b += ((uint32_t)k8[5])<<8; /*@fallthrough@*/
310  case 5: b += k8[4]; /*@fallthrough@*/
311  case 4: a += k[0]; break;
312  case 3: a += ((uint32_t)k8[2])<<16; /*@fallthrough@*/
313  case 2: a += ((uint32_t)k8[1])<<8; /*@fallthrough@*/
314  case 1: a += k8[0]; break;
315  case 0: goto exit;
316  }
317 
318 #endif /* !valgrind */
319 
320  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
321  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
322  const uint8_t *k8;
323 
324  /*----------- all but last block: aligned reads and different mixing */
325  while (size > 12) {
326  a += k[0] + (((uint32_t)k[1])<<16);
327  b += k[2] + (((uint32_t)k[3])<<16);
328  c += k[4] + (((uint32_t)k[5])<<16);
329  _JLU3_MIX(a,b,c);
330  size -= 12;
331  k += 6;
332  }
333 
334  /*------------------------- handle the last (probably partial) block */
335  k8 = (const uint8_t *)k;
336  switch (size) {
337  case 12:
338  c += k[4]+(((uint32_t)k[5])<<16);
339  b += k[2]+(((uint32_t)k[3])<<16);
340  a += k[0]+(((uint32_t)k[1])<<16);
341  break;
342  case 11:
343  c += ((uint32_t)k8[10])<<16;
344  /*@fallthrough@*/
345  case 10:
346  c += k[4];
347  b += k[2]+(((uint32_t)k[3])<<16);
348  a += k[0]+(((uint32_t)k[1])<<16);
349  break;
350  case 9:
351  c += k8[8];
352  /*@fallthrough@*/
353  case 8:
354  b += k[2]+(((uint32_t)k[3])<<16);
355  a += k[0]+(((uint32_t)k[1])<<16);
356  break;
357  case 7:
358  b += ((uint32_t)k8[6])<<16;
359  /*@fallthrough@*/
360  case 6:
361  b += k[2];
362  a += k[0]+(((uint32_t)k[1])<<16);
363  break;
364  case 5:
365  b += k8[4];
366  /*@fallthrough@*/
367  case 4:
368  a += k[0]+(((uint32_t)k[1])<<16);
369  break;
370  case 3:
371  a += ((uint32_t)k8[2])<<16;
372  /*@fallthrough@*/
373  case 2:
374  a += k[0];
375  break;
376  case 1:
377  a += k8[0];
378  break;
379  case 0:
380  goto exit;
381  }
382 
383  } else { /* need to read the key one byte at a time */
384  const uint8_t *k = (const uint8_t *)key;
385 
386  /*----------- all but the last block: affect some 32 bits of (a,b,c) */
387  while (size > 12) {
388  a += k[0];
389  a += ((uint32_t)k[1])<<8;
390  a += ((uint32_t)k[2])<<16;
391  a += ((uint32_t)k[3])<<24;
392  b += k[4];
393  b += ((uint32_t)k[5])<<8;
394  b += ((uint32_t)k[6])<<16;
395  b += ((uint32_t)k[7])<<24;
396  c += k[8];
397  c += ((uint32_t)k[9])<<8;
398  c += ((uint32_t)k[10])<<16;
399  c += ((uint32_t)k[11])<<24;
400  _JLU3_MIX(a,b,c);
401  size -= 12;
402  k += 12;
403  }
404 
405  /*---------------------------- last block: affect all 32 bits of (c) */
406  switch (size) {
407  case 12: c += ((uint32_t)k[11])<<24; /*@fallthrough@*/
408  case 11: c += ((uint32_t)k[10])<<16; /*@fallthrough@*/
409  case 10: c += ((uint32_t)k[9])<<8; /*@fallthrough@*/
410  case 9: c += k[8]; /*@fallthrough@*/
411  case 8: b += ((uint32_t)k[7])<<24; /*@fallthrough@*/
412  case 7: b += ((uint32_t)k[6])<<16; /*@fallthrough@*/
413  case 6: b += ((uint32_t)k[5])<<8; /*@fallthrough@*/
414  case 5: b += k[4]; /*@fallthrough@*/
415  case 4: a += ((uint32_t)k[3])<<24; /*@fallthrough@*/
416  case 3: a += ((uint32_t)k[2])<<16; /*@fallthrough@*/
417  case 2: a += ((uint32_t)k[1])<<8; /*@fallthrough@*/
418  case 1: a += k[0];
419  break;
420  case 0:
421  goto exit;
422  }
423  }
424 
425  _JLU3_FINAL(a,b,c);
426 
427 exit:
428  return c;
429 }
430 #endif /* defined(_JLU3_jlu32l) */
431 
432 #if defined(_JLU3_jlu32lpair)
433 void jlu32lpair(/*@null@*/ const void *key, size_t size,
434  uint32_t *pc, uint32_t *pb)
435  /*@modifies *pc, *pb@*/;
452 void jlu32lpair(const void *key, size_t size, uint32_t *pc, uint32_t *pb)
453 {
454  union { const void *ptr; size_t i; } u;
455  uint32_t a = _JLU3_INIT(*pc, size);
456  uint32_t b = a;
457  uint32_t c = a;
458 
459  if (key == NULL)
460  goto exit;
461 
462  c += *pb; /* Add the secondary hash. */
463 
464  u.ptr = key;
465  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
466  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
467 #ifdef VALGRIND
468  const uint8_t *k8;
469 #endif
470 
471  /*-- all but last block: aligned reads and affect 32 bits of (a,b,c) */
472  while (size > 12) {
473  a += k[0];
474  b += k[1];
475  c += k[2];
476  _JLU3_MIX(a,b,c);
477  size -= 12;
478  k += 3;
479  }
480  /*------------------------- handle the last (probably partial) block */
481  /*
482  * "k[2]&0xffffff" actually reads beyond the end of the string, but
483  * then masks off the part it's not allowed to read. Because the
484  * string is aligned, the masked-off tail is in the same word as the
485  * rest of the string. Every machine with memory protection I've seen
486  * does it on word boundaries, so is OK with this. But VALGRIND will
487  * still catch it and complain. The masking trick does make the hash
488  * noticably faster for short strings (like English words).
489  */
490 #ifndef VALGRIND
491 
492  switch (size) {
493  case 12: c += k[2]; b+=k[1]; a+=k[0]; break;
494  case 11: c += k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
495  case 10: c += k[2]&0xffff; b+=k[1]; a+=k[0]; break;
496  case 9: c += k[2]&0xff; b+=k[1]; a+=k[0]; break;
497  case 8: b += k[1]; a+=k[0]; break;
498  case 7: b += k[1]&0xffffff; a+=k[0]; break;
499  case 6: b += k[1]&0xffff; a+=k[0]; break;
500  case 5: b += k[1]&0xff; a+=k[0]; break;
501  case 4: a += k[0]; break;
502  case 3: a += k[0]&0xffffff; break;
503  case 2: a += k[0]&0xffff; break;
504  case 1: a += k[0]&0xff; break;
505  case 0: goto exit;
506  }
507 
508 #else /* make valgrind happy */
509 
510  k8 = (const uint8_t *)k;
511  switch (size) {
512  case 12: c += k[2]; b+=k[1]; a+=k[0]; break;
513  case 11: c += ((uint32_t)k8[10])<<16; /*@fallthrough@*/
514  case 10: c += ((uint32_t)k8[9])<<8; /*@fallthrough@*/
515  case 9: c += k8[8]; /*@fallthrough@*/
516  case 8: b += k[1]; a+=k[0]; break;
517  case 7: b += ((uint32_t)k8[6])<<16; /*@fallthrough@*/
518  case 6: b += ((uint32_t)k8[5])<<8; /*@fallthrough@*/
519  case 5: b += k8[4]; /*@fallthrough@*/
520  case 4: a += k[0]; break;
521  case 3: a += ((uint32_t)k8[2])<<16; /*@fallthrough@*/
522  case 2: a += ((uint32_t)k8[1])<<8; /*@fallthrough@*/
523  case 1: a += k8[0]; break;
524  case 0: goto exit;
525  }
526 
527 #endif /* !valgrind */
528 
529  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
530  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
531  const uint8_t *k8;
532 
533  /*----------- all but last block: aligned reads and different mixing */
534  while (size > 12) {
535  a += k[0] + (((uint32_t)k[1])<<16);
536  b += k[2] + (((uint32_t)k[3])<<16);
537  c += k[4] + (((uint32_t)k[5])<<16);
538  _JLU3_MIX(a,b,c);
539  size -= 12;
540  k += 6;
541  }
542 
543  /*------------------------- handle the last (probably partial) block */
544  k8 = (const uint8_t *)k;
545  switch (size) {
546  case 12:
547  c += k[4]+(((uint32_t)k[5])<<16);
548  b += k[2]+(((uint32_t)k[3])<<16);
549  a += k[0]+(((uint32_t)k[1])<<16);
550  break;
551  case 11:
552  c += ((uint32_t)k8[10])<<16;
553  /*@fallthrough@*/
554  case 10:
555  c += k[4];
556  b += k[2]+(((uint32_t)k[3])<<16);
557  a += k[0]+(((uint32_t)k[1])<<16);
558  break;
559  case 9:
560  c += k8[8];
561  /*@fallthrough@*/
562  case 8:
563  b += k[2]+(((uint32_t)k[3])<<16);
564  a += k[0]+(((uint32_t)k[1])<<16);
565  break;
566  case 7:
567  b += ((uint32_t)k8[6])<<16;
568  /*@fallthrough@*/
569  case 6:
570  b += k[2];
571  a += k[0]+(((uint32_t)k[1])<<16);
572  break;
573  case 5:
574  b += k8[4];
575  /*@fallthrough@*/
576  case 4:
577  a += k[0]+(((uint32_t)k[1])<<16);
578  break;
579  case 3:
580  a += ((uint32_t)k8[2])<<16;
581  /*@fallthrough@*/
582  case 2:
583  a += k[0];
584  break;
585  case 1:
586  a += k8[0];
587  break;
588  case 0:
589  goto exit;
590  }
591 
592  } else { /* need to read the key one byte at a time */
593  const uint8_t *k = (const uint8_t *)key;
594 
595  /*----------- all but the last block: affect some 32 bits of (a,b,c) */
596  while (size > 12) {
597  a += k[0];
598  a += ((uint32_t)k[1])<<8;
599  a += ((uint32_t)k[2])<<16;
600  a += ((uint32_t)k[3])<<24;
601  b += k[4];
602  b += ((uint32_t)k[5])<<8;
603  b += ((uint32_t)k[6])<<16;
604  b += ((uint32_t)k[7])<<24;
605  c += k[8];
606  c += ((uint32_t)k[9])<<8;
607  c += ((uint32_t)k[10])<<16;
608  c += ((uint32_t)k[11])<<24;
609  _JLU3_MIX(a,b,c);
610  size -= 12;
611  k += 12;
612  }
613 
614  /*---------------------------- last block: affect all 32 bits of (c) */
615  switch (size) {
616  case 12: c += ((uint32_t)k[11])<<24; /*@fallthrough@*/
617  case 11: c += ((uint32_t)k[10])<<16; /*@fallthrough@*/
618  case 10: c += ((uint32_t)k[9])<<8; /*@fallthrough@*/
619  case 9: c += k[8]; /*@fallthrough@*/
620  case 8: b += ((uint32_t)k[7])<<24; /*@fallthrough@*/
621  case 7: b += ((uint32_t)k[6])<<16; /*@fallthrough@*/
622  case 6: b += ((uint32_t)k[5])<<8; /*@fallthrough@*/
623  case 5: b += k[4]; /*@fallthrough@*/
624  case 4: a += ((uint32_t)k[3])<<24; /*@fallthrough@*/
625  case 3: a += ((uint32_t)k[2])<<16; /*@fallthrough@*/
626  case 2: a += ((uint32_t)k[1])<<8; /*@fallthrough@*/
627  case 1: a += k[0]; /*@fallthrough@*/
628  break;
629  case 0:
630  goto exit;
631  }
632  }
633 
634  _JLU3_FINAL(a,b,c);
635 
636 exit:
637  *pc = c;
638  *pb = b;
639  return;
640 }
641 #endif /* defined(_JLU3_jlu32lpair) */
642 
643 #if defined(_JLU3_jlu32b)
644 uint32_t jlu32b(uint32_t h, /*@null@*/ const void *key, size_t size)
645  /*@*/;
646 /*
647  * jlu32b():
648  * This is the same as jlu32w() on big-endian machines. It is different
649  * from jlu32l() on all machines. jlu32b() takes advantage of
650  * big-endian byte ordering.
651  *
652  * @param h the previous hash, or an arbitrary value
653  * @param *k the key, an array of uint8_t values
654  * @param size the size of the key
655  * @return the lookup3 hash
656  */
657 uint32_t jlu32b(uint32_t h, const void *key, size_t size)
658 {
659  union { const void *ptr; size_t i; } u;
660  uint32_t a = _JLU3_INIT(h, size);
661  uint32_t b = a;
662  uint32_t c = a;
663 
664  if (key == NULL)
665  return h;
666 
667  u.ptr = key;
668  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
669  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
670 #ifdef VALGRIND
671  const uint8_t *k8;
672 #endif
673 
674  /*-- all but last block: aligned reads and affect 32 bits of (a,b,c) */
675  while (size > 12) {
676  a += k[0];
677  b += k[1];
678  c += k[2];
679  _JLU3_MIX(a,b,c);
680  size -= 12;
681  k += 3;
682  }
683 
684  /*------------------------- handle the last (probably partial) block */
685  /*
686  * "k[2]<<8" actually reads beyond the end of the string, but
687  * then shifts out the part it's not allowed to read. Because the
688  * string is aligned, the illegal read is in the same word as the
689  * rest of the string. Every machine with memory protection I've seen
690  * does it on word boundaries, so is OK with this. But VALGRIND will
691  * still catch it and complain. The masking trick does make the hash
692  * noticably faster for short strings (like English words).
693  */
694 #ifndef VALGRIND
695 
696  switch (size) {
697  case 12: c += k[2]; b+=k[1]; a+=k[0]; break;
698  case 11: c += k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
699  case 10: c += k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
700  case 9: c += k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
701  case 8: b += k[1]; a+=k[0]; break;
702  case 7: b += k[1]&0xffffff00; a+=k[0]; break;
703  case 6: b += k[1]&0xffff0000; a+=k[0]; break;
704  case 5: b += k[1]&0xff000000; a+=k[0]; break;
705  case 4: a += k[0]; break;
706  case 3: a += k[0]&0xffffff00; break;
707  case 2: a += k[0]&0xffff0000; break;
708  case 1: a += k[0]&0xff000000; break;
709  case 0: goto exit;
710  }
711 
712 #else /* make valgrind happy */
713 
714  k8 = (const uint8_t *)k;
715  switch (size) { /* all the case statements fall through */
716  case 12: c += k[2]; b+=k[1]; a+=k[0]; break;
717  case 11: c += ((uint32_t)k8[10])<<8; /*@fallthrough@*/
718  case 10: c += ((uint32_t)k8[9])<<16; /*@fallthrough@*/
719  case 9: c += ((uint32_t)k8[8])<<24; /*@fallthrough@*/
720  case 8: b += k[1]; a+=k[0]; break;
721  case 7: b += ((uint32_t)k8[6])<<8; /*@fallthrough@*/
722  case 6: b += ((uint32_t)k8[5])<<16; /*@fallthrough@*/
723  case 5: b += ((uint32_t)k8[4])<<24; /*@fallthrough@*/
724  case 4: a += k[0]; break;
725  case 3: a += ((uint32_t)k8[2])<<8; /*@fallthrough@*/
726  case 2: a += ((uint32_t)k8[1])<<16; /*@fallthrough@*/
727  case 1: a += ((uint32_t)k8[0])<<24; break;
728  case 0: goto exit;
729  }
730 
731 #endif /* !VALGRIND */
732 
733  } else { /* need to read the key one byte at a time */
734  const uint8_t *k = (const uint8_t *)key;
735 
736  /*----------- all but the last block: affect some 32 bits of (a,b,c) */
737  while (size > 12) {
738  a += ((uint32_t)k[0])<<24;
739  a += ((uint32_t)k[1])<<16;
740  a += ((uint32_t)k[2])<<8;
741  a += ((uint32_t)k[3]);
742  b += ((uint32_t)k[4])<<24;
743  b += ((uint32_t)k[5])<<16;
744  b += ((uint32_t)k[6])<<8;
745  b += ((uint32_t)k[7]);
746  c += ((uint32_t)k[8])<<24;
747  c += ((uint32_t)k[9])<<16;
748  c += ((uint32_t)k[10])<<8;
749  c += ((uint32_t)k[11]);
750  _JLU3_MIX(a,b,c);
751  size -= 12;
752  k += 12;
753  }
754 
755  /*---------------------------- last block: affect all 32 bits of (c) */
756  switch (size) { /* all the case statements fall through */
757  case 12: c += k[11]; /*@fallthrough@*/
758  case 11: c += ((uint32_t)k[10])<<8; /*@fallthrough@*/
759  case 10: c += ((uint32_t)k[9])<<16; /*@fallthrough@*/
760  case 9: c += ((uint32_t)k[8])<<24; /*@fallthrough@*/
761  case 8: b += k[7]; /*@fallthrough@*/
762  case 7: b += ((uint32_t)k[6])<<8; /*@fallthrough@*/
763  case 6: b += ((uint32_t)k[5])<<16; /*@fallthrough@*/
764  case 5: b += ((uint32_t)k[4])<<24; /*@fallthrough@*/
765  case 4: a += k[3]; /*@fallthrough@*/
766  case 3: a += ((uint32_t)k[2])<<8; /*@fallthrough@*/
767  case 2: a += ((uint32_t)k[1])<<16; /*@fallthrough@*/
768  case 1: a += ((uint32_t)k[0])<<24; /*@fallthrough@*/
769  break;
770  case 0:
771  goto exit;
772  }
773  }
774 
775  _JLU3_FINAL(a,b,c);
776 
777 exit:
778  return c;
779 }
780 #endif /* defined(_JLU3_jlu32b) */
781 
782 #if defined(_JLU3_SELFTEST)
783 
784 /* used for timings */
785 static void driver1(void)
786  /*@*/
787 {
788  uint8_t buf[256];
789  uint32_t i;
790  uint32_t h=0;
791  time_t a,z;
792 
793  time(&a);
794  for (i=0; i<256; ++i) buf[i] = 'x';
795  for (i=0; i<1; ++i) {
796  h = jlu32l(h, &buf[0], sizeof(buf[0]));
797  }
798  time(&z);
799  if (z-a > 0) printf("time %d %.8x\n", (int)(z-a), h);
800 }
801 
802 /* check that every input bit changes every output bit half the time */
803 #define HASHSTATE 1
804 #define HASHLEN 1
805 #define MAXPAIR 60
806 #define MAXLEN 70
807 static void driver2(void)
808  /*@*/
809 {
810  uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
811  uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
812  uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
813  uint32_t x[HASHSTATE],y[HASHSTATE];
814  uint32_t hlen;
815 
816  printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
817  for (hlen=0; hlen < MAXLEN; ++hlen) {
818  z=0;
819  for (i=0; i<hlen; ++i) { /*-------------- for each input byte, */
820  for (j=0; j<8; ++j) { /*--------------- for each input bit, */
821  for (m=1; m<8; ++m) { /*--- for serveral possible initvals, */
822  for (l=0; l<HASHSTATE; ++l)
823  e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
824 
825  /* check that every output bit is affected by that input bit */
826  for (k=0; k<MAXPAIR; k+=2) {
827  uint32_t finished=1;
828  /* keys have one bit different */
829  for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
830  /* have a and b be two keys differing in only one bit */
831  a[i] ^= (k<<j);
832  a[i] ^= (k>>(8-j));
833  c[0] = jlu32l(m, a, hlen);
834  b[i] ^= ((k+1)<<j);
835  b[i] ^= ((k+1)>>(8-j));
836  d[0] = jlu32l(m, b, hlen);
837  /* check every bit is 1, 0, set, and not set at least once */
838  for (l=0; l<HASHSTATE; ++l) {
839  e[l] &= (c[l]^d[l]);
840  f[l] &= ~(c[l]^d[l]);
841  g[l] &= c[l];
842  h[l] &= ~c[l];
843  x[l] &= d[l];
844  y[l] &= ~d[l];
845  if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
846  }
847  if (finished) break;
848  }
849  if (k>z) z=k;
850  if (k == MAXPAIR) {
851  printf("Some bit didn't change: ");
852  printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
853  e[0],f[0],g[0],h[0],x[0],y[0]);
854  printf("i %d j %d m %d len %d\n", i, j, m, hlen);
855  }
856  if (z == MAXPAIR) goto done;
857  }
858  }
859  }
860  done:
861  if (z < MAXPAIR) {
862  printf("Mix success %2d bytes %2d initvals ",i,m);
863  printf("required %d trials\n", z/2);
864  }
865  }
866  printf("\n");
867 }
868 
869 /* Check for reading beyond the end of the buffer and alignment problems */
870 static void driver3(void)
871  /*@*/
872 {
873  uint8_t buf[MAXLEN+20], *b;
874  uint32_t len;
875  uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
876  uint32_t h;
877  uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
878  uint32_t i;
879  uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
880  uint32_t j;
881  uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
882  uint32_t ref,x,y;
883  uint8_t *p;
884  uint32_t m = 13;
885 
886  printf("Endianness. These lines should all be the same (for values filled in):\n");
887  printf("%.8x %.8x %.8x\n",
888  jlu32w(m, (const uint32_t *)q, (sizeof(q)-1)/4),
889  jlu32w(m, (const uint32_t *)q, (sizeof(q)-5)/4),
890  jlu32w(m, (const uint32_t *)q, (sizeof(q)-9)/4));
891  p = q;
892  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
893  jlu32l(m, p, sizeof(q)-1), jlu32l(m, p, sizeof(q)-2),
894  jlu32l(m, p, sizeof(q)-3), jlu32l(m, p, sizeof(q)-4),
895  jlu32l(m, p, sizeof(q)-5), jlu32l(m, p, sizeof(q)-6),
896  jlu32l(m, p, sizeof(q)-7), jlu32l(m, p, sizeof(q)-8),
897  jlu32l(m, p, sizeof(q)-9), jlu32l(m, p, sizeof(q)-10),
898  jlu32l(m, p, sizeof(q)-11), jlu32l(m, p, sizeof(q)-12));
899  p = &qq[1];
900  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
901  jlu32l(m, p, sizeof(q)-1), jlu32l(m, p, sizeof(q)-2),
902  jlu32l(m, p, sizeof(q)-3), jlu32l(m, p, sizeof(q)-4),
903  jlu32l(m, p, sizeof(q)-5), jlu32l(m, p, sizeof(q)-6),
904  jlu32l(m, p, sizeof(q)-7), jlu32l(m, p, sizeof(q)-8),
905  jlu32l(m, p, sizeof(q)-9), jlu32l(m, p, sizeof(q)-10),
906  jlu32l(m, p, sizeof(q)-11), jlu32l(m, p, sizeof(q)-12));
907  p = &qqq[2];
908  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
909  jlu32l(m, p, sizeof(q)-1), jlu32l(m, p, sizeof(q)-2),
910  jlu32l(m, p, sizeof(q)-3), jlu32l(m, p, sizeof(q)-4),
911  jlu32l(m, p, sizeof(q)-5), jlu32l(m, p, sizeof(q)-6),
912  jlu32l(m, p, sizeof(q)-7), jlu32l(m, p, sizeof(q)-8),
913  jlu32l(m, p, sizeof(q)-9), jlu32l(m, p, sizeof(q)-10),
914  jlu32l(m, p, sizeof(q)-11), jlu32l(m, p, sizeof(q)-12));
915  p = &qqqq[3];
916  printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
917  jlu32l(m, p, sizeof(q)-1), jlu32l(m, p, sizeof(q)-2),
918  jlu32l(m, p, sizeof(q)-3), jlu32l(m, p, sizeof(q)-4),
919  jlu32l(m, p, sizeof(q)-5), jlu32l(m, p, sizeof(q)-6),
920  jlu32l(m, p, sizeof(q)-7), jlu32l(m, p, sizeof(q)-8),
921  jlu32l(m, p, sizeof(q)-9), jlu32l(m, p, sizeof(q)-10),
922  jlu32l(m, p, sizeof(q)-11), jlu32l(m, p, sizeof(q)-12));
923  printf("\n");
924  for (h=0, b=buf+1; h<8; ++h, ++b) {
925  for (i=0; i<MAXLEN; ++i) {
926  len = i;
927  for (j=0; j<i; ++j)
928  *(b+j)=0;
929 
930  /* these should all be equal */
931  m = 1;
932  ref = jlu32l(m, b, len);
933  *(b+i)=(uint8_t)~0;
934  *(b-1)=(uint8_t)~0;
935  x = jlu32l(m, b, len);
936  y = jlu32l(m, b, len);
937  if ((ref != x) || (ref != y))
938  printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y, h, i);
939  }
940  }
941 }
942 
943 /* check for problems with nulls */
944 static void driver4(void)
945  /*@*/
946 {
947  uint8_t buf[1];
948  uint32_t h;
949  uint32_t i;
950  uint32_t state[HASHSTATE];
951 
952  buf[0] = ~0;
953  for (i=0; i<HASHSTATE; ++i)
954  state[i] = 1;
955  printf("These should all be different\n");
956  h = 0;
957  for (i=0; i<8; ++i) {
958  h = jlu32l(h, buf, 0);
959  printf("%2ld 0-byte strings, hash is %.8x\n", (long)i, h);
960  }
961 }
962 
963 
964 int main(int argc, char ** argv)
965 {
966  driver1(); /* test that the key is hashed: used for timings */
967  driver2(); /* test that whole key is hashed thoroughly */
968  driver3(); /* test that nothing but the key is hashed */
969  driver4(); /* test hashing multiple buffers (all buffers are null) */
970  return 1;
971 }
972 
973 #endif /* _JLU3_SELFTEST */