VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 3387

Last change on this file since 3387 was 3140, checked in by bird, 6 years ago

kmk: Merged in changes from GNU make 4.2.1 (2e55f5e4abdc0e38c1d64be703b446695e70b3b6 / https://git.savannah.gnu.org/git/make.git).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.8 KB
Line 
1/* $Id: strcache2.c 3140 2018-03-14 21:28:10Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008-2010 knut st. osmundsen <bird-kBuild-spamx@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild. If not, see <http://www.gnu.org/licenses/>
23 *
24 */
25
26/*******************************************************************************
27* Header Files *
28*******************************************************************************/
29#include "makeint.h"
30#include "strcache2.h"
31
32#include <assert.h>
33
34#include "debug.h"
35
36#ifdef _MSC_VER
37typedef unsigned char uint8_t;
38typedef unsigned short uint16_t;
39typedef unsigned int uint32_t;
40typedef signed char int8_t;
41typedef signed short int16_t;
42typedef signed int int32_t;
43#else
44# include <stdint.h>
45#endif
46
47#ifdef WINDOWS32
48# include <io.h>
49# include <process.h>
50# include <Windows.h>
51# define PARSE_IN_WORKER
52#endif
53
54#ifdef __OS2__
55# include <sys/fmutex.h>
56#endif
57
58#ifdef HAVE_PTHREAD
59# include <pthread.h>
60#endif
61
62
63/*******************************************************************************
64* Defined Constants And Macros *
65*******************************************************************************/
66/* The default size of a memory segment (1MB). */
67#define STRCACHE2_SEG_SIZE (1024U*1024U)
68/* The default hash table shift (hash size give as a power of two). */
69#define STRCACHE2_HASH_SHIFT 16
70/** Does the modding / masking of a hash number into an index. */
71#ifdef STRCACHE2_USE_MASK
72# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73#else
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75#endif
76
77# if ( defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
78 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)) \
79 && !defined(GCC_ADDRESS_SANITIZER)
80# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
81# else
82 /* (endian doesn't matter) */
83# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
84 | (((const uint8_t *)(ptr))[1]) )
85# endif
86
87
88/*******************************************************************************
89* Global Variables *
90*******************************************************************************/
91/* List of initialized string caches. */
92static struct strcache2 *strcache_head;
93
94
95#ifndef STRCACHE2_USE_MASK
96/** Finds the closest primary number for power of two value (or something else
97 * useful if not support). */
98MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
99{
100 switch (shift)
101 {
102 case 5: return 31;
103 case 6: return 61;
104 case 7: return 127;
105 case 8: return 251;
106 case 9: return 509;
107 case 10: return 1021;
108 case 11: return 2039;
109 case 12: return 4093;
110 case 13: return 8191;
111 case 14: return 16381;
112 case 15: return 32749;
113 case 16: return 65521;
114 case 17: return 131063;
115 case 18: return 262139;
116 case 19: return 524269;
117 case 20: return 1048573;
118 case 21: return 2097143;
119 case 22: return 4194301;
120 case 23: return 8388593;
121
122 default:
123 assert (0);
124 return (1 << shift) - 1;
125 }
126}
127#endif
128
129/* The following is a bit experiment. It produces longer chains, i.e. worse
130 distribution of the strings in the table, however the actual make
131 performances is better (<time). The explanation is probably that the
132 collisions only really increase for entries that aren't looked up that
133 much and that it actually improoves the situation for those that is. Or
134 that we spend so much less time hashing that it makes up (and more) for
135 the pentalty we suffer from the longer chains and worse distribution.
136
137 XXX: Check how this works out with different path lengths. I suspect it
138 might depend on the length of PATH_ROOT and the depth of the files
139 in the project as well. If it does, this might make matters worse
140 for some and better for others which isn't very cool... */
141
142#if 0
143# define BIG_HASH_SIZE 32 /* kinda fast */
144# define BIG_HASH_HEAD 16
145# define BIG_HASH_TAIL 12
146#elif 0
147# define BIG_HASH_SIZE 68 /* kinda safe */
148# define BIG_HASH_HEAD 24
149# define BIG_HASH_TAIL 24
150#elif 0
151# define BIG_HASH_SIZE 128 /* safe */
152# define BIG_HASH_HEAD 32
153# define BIG_HASH_TAIL 32
154#endif
155
156#ifdef BIG_HASH_SIZE
157/* long string: hash head and tail, drop the middle. */
158MY_INLINE unsigned int
159strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
160{
161 uint32_t hash = len;
162 uint32_t tmp;
163 unsigned int head;
164
165 /* head BIG_HASH_HEAD bytes */
166 head = (BIG_HASH_HEAD >> 2);
167 while (head-- > 0)
168 {
169 hash += strcache2_get_unaligned_16bits (str);
170 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
171 hash = (hash << 16) ^ tmp;
172 str += 2 * sizeof (uint16_t);
173 hash += hash >> 11;
174 }
175
176 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
177 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
178 head = (BIG_HASH_TAIL >> 2);
179 while (head-- > 0)
180 {
181 hash += strcache2_get_unaligned_16bits (str);
182 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
183 hash = (hash << 16) ^ tmp;
184 str += 2 * sizeof (uint16_t);
185 hash += hash >> 11;
186 }
187
188 /* force "avalanching" of final 127 bits. */
189 hash ^= hash << 3;
190 hash += hash >> 5;
191 hash ^= hash << 4;
192 hash += hash >> 17;
193 hash ^= hash << 25;
194 hash += hash >> 6;
195
196 return hash;
197}
198#endif /* BIG_HASH_SIZE */
199
200MY_INLINE unsigned int
201strcache2_case_sensitive_hash (const char *str, unsigned int len)
202{
203#if 1
204 /* Paul Hsieh hash SuperFast function:
205 http://www.azillionmonkeys.com/qed/hash.html
206
207 This performs very good and as a sligtly better distribution than
208 STRING_N_HASH_1 on a typical kBuild run.
209
210 It is also 37% faster than return_STRING_N_HASH_1 when running the
211 two 100 times over typical kBuild strings that end up here (did a
212 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
213 with -O2.
214
215 FIXME: A path for well aligned data should be added to speed up
216 execution on alignment sensitive systems. */
217 unsigned int rem;
218 uint32_t hash;
219 uint32_t tmp;
220
221 assert (sizeof (uint8_t) == sizeof (char));
222
223# ifdef BIG_HASH_SIZE
224 /* long string? */
225# if 0 /*BIG_HASH_SIZE > 128*/
226 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
227# else
228 if (len >= BIG_HASH_SIZE)
229# endif
230 return strcache2_case_sensitive_hash_big (str, len);
231# endif
232
233 /* short string: main loop, walking on 2 x uint16_t */
234 hash = len;
235 rem = len & 3;
236 len >>= 2;
237 while (len > 0)
238 {
239 hash += strcache2_get_unaligned_16bits (str);
240 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
241 hash = (hash << 16) ^ tmp;
242 str += 2 * sizeof (uint16_t);
243 hash += hash >> 11;
244 len--;
245 }
246
247 /* the remainder */
248 switch (rem)
249 {
250 case 3:
251 hash += strcache2_get_unaligned_16bits (str);
252 hash ^= hash << 16;
253 hash ^= str[sizeof (uint16_t)] << 18;
254 hash += hash >> 11;
255 break;
256 case 2:
257 hash += strcache2_get_unaligned_16bits (str);
258 hash ^= hash << 11;
259 hash += hash >> 17;
260 break;
261 case 1:
262 hash += *str;
263 hash ^= hash << 10;
264 hash += hash >> 1;
265 break;
266 }
267
268 /* force "avalanching" of final 127 bits. */
269 hash ^= hash << 3;
270 hash += hash >> 5;
271 hash ^= hash << 4;
272 hash += hash >> 17;
273 hash ^= hash << 25;
274 hash += hash >> 6;
275
276 return hash;
277
278#elif 1
279 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
280 when running the two 100 times over typical kBuild strings that
281 end up here (did a fprintf here and built kBuild).
282 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
283
284 unsigned int hash = 0;
285 if (MY_PREDICT_TRUE(len >= 2))
286 {
287 unsigned int ch0 = *str++;
288 hash = 0;
289 len--;
290 while (len >= 2)
291 {
292 unsigned int ch1 = *str++;
293 hash += ch0 << (ch1 & 0xf);
294
295 ch0 = *str++;
296 hash += ch1 << (ch0 & 0xf);
297
298 len -= 2;
299 }
300 if (len == 1)
301 {
302 unsigned ch1 = *str;
303 hash += ch0 << (ch1 & 0xf);
304
305 hash += ch1;
306 }
307 else
308 hash += ch0;
309 }
310 else if (len)
311 {
312 hash = *str;
313 hash += hash << (hash & 0xf);
314 }
315 else
316 hash = 0;
317 return hash;
318
319#elif 1
320# if 0
321 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
322 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
323 first in the assignment made it noticably slower.)
324
325 However, it is noticably slower in practice, most likely because of more
326 collisions. Hrmpf. */
327
328# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
329 unsigned int hash = 0;
330
331# else
332 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
333 fast than return_STRING_N_HASH_1.
334
335 Ditto. */
336
337# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
338 unsigned int hash = 5381;
339# endif
340
341
342 while (len >= 4)
343 {
344 UPDATE_HASH (str[0]);
345 UPDATE_HASH (str[1]);
346 UPDATE_HASH (str[2]);
347 UPDATE_HASH (str[3]);
348 str += 4;
349 len -= 4;
350 }
351 switch (len)
352 {
353 default:
354 case 0:
355 return hash;
356 case 1:
357 UPDATE_HASH (str[0]);
358 return hash;
359 case 2:
360 UPDATE_HASH (str[0]);
361 UPDATE_HASH (str[1]);
362 return hash;
363 case 3:
364 UPDATE_HASH (str[0]);
365 UPDATE_HASH (str[1]);
366 UPDATE_HASH (str[2]);
367 return hash;
368 }
369#endif
370}
371
372MY_INLINE unsigned int
373strcache2_case_insensitive_hash (const char *str, unsigned int len)
374{
375 unsigned int hash = 0;
376 if (MY_PREDICT_TRUE(len >= 2))
377 {
378 unsigned int ch0 = *str++;
379 ch0 = tolower (ch0);
380 hash = 0;
381 len--;
382 while (len >= 2)
383 {
384 unsigned int ch1 = *str++;
385 ch1 = tolower (ch1);
386 hash += ch0 << (ch1 & 0xf);
387
388 ch0 = *str++;
389 ch0 = tolower (ch0);
390 hash += ch1 << (ch0 & 0xf);
391
392 len -= 2;
393 }
394 if (len == 1)
395 {
396 unsigned ch1 = *str;
397 ch1 = tolower (ch1);
398 hash += ch0 << (ch1 & 0xf);
399
400 hash += ch1;
401 }
402 else
403 hash += ch0;
404 }
405 else if (len)
406 {
407 hash = *str;
408 hash += hash << (hash & 0xf);
409 }
410 else
411 hash = 0;
412 return hash;
413}
414
415#if 0
416MY_INLINE int
417strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
418{
419 if (length <= 8)
420 {
421 /* short string compare - ~50% of the kBuild calls. */
422 assert ( !((size_t)ys & 3) );
423 if (!((size_t)xs & 3))
424 {
425 /* aligned */
426 int result;
427 switch (length)
428 {
429 default: /* memcmp for longer strings */
430 return memcmp (xs, ys, length);
431 case 8:
432 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
433 result |= *(int32_t*)xs - *(int32_t*)ys;
434 return result;
435 case 7:
436 result = xs[6] - ys[6];
437 result |= xs[5] - ys[5];
438 result |= xs[4] - ys[4];
439 result |= *(int32_t*)xs - *(int32_t*)ys;
440 return result;
441 case 6:
442 result = xs[5] - ys[5];
443 result |= xs[4] - ys[4];
444 result |= *(int32_t*)xs - *(int32_t*)ys;
445 return result;
446 case 5:
447 result = xs[4] - ys[4];
448 result |= *(int32_t*)xs - *(int32_t*)ys;
449 return result;
450 case 4:
451 return *(int32_t*)xs - *(int32_t*)ys;
452 case 3:
453 result = xs[2] - ys[2];
454 result |= xs[1] - ys[1];
455 result |= xs[0] - ys[0];
456 return result;
457 case 2:
458 result = xs[1] - ys[1];
459 result |= xs[0] - ys[0];
460 return result;
461 case 1:
462 return *xs - *ys;
463 case 0:
464 return 0;
465 }
466 }
467 else
468 {
469 /* unaligned */
470 int result = 0;
471 switch (length)
472 {
473 case 8: result |= xs[7] - ys[7];
474 case 7: result |= xs[6] - ys[6];
475 case 6: result |= xs[5] - ys[5];
476 case 5: result |= xs[4] - ys[4];
477 case 4: result |= xs[3] - ys[3];
478 case 3: result |= xs[2] - ys[2];
479 case 2: result |= xs[1] - ys[1];
480 case 1: result |= xs[0] - ys[0];
481 case 0:
482 return result;
483 }
484 }
485 }
486
487 /* memcmp for longer strings */
488 return memcmp (xs, ys, length);
489}
490#endif
491
492MY_INLINE int
493strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
494{
495#ifndef ELECTRIC_HEAP
496 assert ( !((size_t)ys & 3) );
497#endif
498 if (!((size_t)xs & 3))
499 {
500 /* aligned */
501 int result;
502 unsigned reminder = length & 7;
503 length >>= 3;
504 while (length-- > 0)
505 {
506 result = *(int32_t*)xs - *(int32_t*)ys;
507 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
508 if (MY_PREDICT_FALSE(result))
509 return result;
510 xs += 8;
511 ys += 8;
512 }
513 switch (reminder)
514 {
515 case 7:
516 result = *(int32_t*)xs - *(int32_t*)ys;
517 result |= xs[6] - ys[6];
518 result |= xs[5] - ys[5];
519 result |= xs[4] - ys[4];
520 return result;
521 case 6:
522 result = *(int32_t*)xs - *(int32_t*)ys;
523 result |= xs[5] - ys[5];
524 result |= xs[4] - ys[4];
525 return result;
526 case 5:
527 result = *(int32_t*)xs - *(int32_t*)ys;
528 result |= xs[4] - ys[4];
529 return result;
530 case 4:
531 return *(int32_t*)xs - *(int32_t*)ys;
532 case 3:
533 result = xs[2] - ys[2];
534 result |= xs[1] - ys[1];
535 result |= xs[0] - ys[0];
536 return result;
537 case 2:
538 result = xs[1] - ys[1];
539 result |= xs[0] - ys[0];
540 return result;
541 case 1:
542 return *xs - *ys;
543 default:
544 case 0:
545 return 0;
546 }
547 }
548 else
549 {
550 /* unaligned */
551 int result;
552 unsigned reminder = length & 7;
553 length >>= 3;
554 while (length-- > 0)
555 {
556#if defined(__i386__) || defined(__x86_64__)
557 result = ( ((int32_t)xs[3] << 24)
558 | ((int32_t)xs[2] << 16)
559 | ((int32_t)xs[1] << 8)
560 | xs[0] )
561 - *(int32_t*)ys;
562 result |= ( ((int32_t)xs[7] << 24)
563 | ((int32_t)xs[6] << 16)
564 | ((int32_t)xs[5] << 8)
565 | xs[4] )
566 - *(int32_t*)(ys + 4);
567#else
568 result = xs[3] - ys[3];
569 result |= xs[2] - ys[2];
570 result |= xs[1] - ys[1];
571 result |= xs[0] - ys[0];
572 result |= xs[7] - ys[7];
573 result |= xs[6] - ys[6];
574 result |= xs[5] - ys[5];
575 result |= xs[4] - ys[4];
576#endif
577 if (MY_PREDICT_FALSE(result))
578 return result;
579 xs += 8;
580 ys += 8;
581 }
582
583 result = 0;
584 switch (reminder)
585 {
586 case 7: result |= xs[6] - ys[6]; /* fall thru */
587 case 6: result |= xs[5] - ys[5]; /* fall thru */
588 case 5: result |= xs[4] - ys[4]; /* fall thru */
589 case 4: result |= xs[3] - ys[3]; /* fall thru */
590 case 3: result |= xs[2] - ys[2]; /* fall thru */
591 case 2: result |= xs[1] - ys[1]; /* fall thru */
592 case 1: result |= xs[0] - ys[0]; /* fall thru */
593 return result;
594 default:
595 case 0:
596 return 0;
597 }
598 }
599}
600
601MY_INLINE int
602strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
603 const char *str, unsigned int length, unsigned int hash)
604{
605 assert (!cache->case_insensitive);
606
607 /* the simple stuff first. */
608 if ( entry->hash != hash
609 || entry->length != length)
610 return 0;
611
612#if 0
613 return memcmp (str, entry + 1, length) == 0;
614#elif 1
615 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
616#else
617 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
618#endif
619}
620
621#if defined(HAVE_CASE_INSENSITIVE_FS)
622MY_INLINE int
623strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
624 const char *str, unsigned int length, unsigned int hash)
625{
626 assert (cache->case_insensitive);
627
628 /* the simple stuff first. */
629 if ( entry->hash != hash
630 || entry->length != length)
631 return 0;
632
633# if defined(_MSC_VER) || defined(__OS2__)
634 return _memicmp (entry + 1, str, length) == 0;
635# else
636 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
637# endif
638}
639#endif /* HAVE_CASE_INSENSITIVE_FS */
640
641static void
642strcache2_rehash (struct strcache2 *cache)
643{
644 unsigned int src = cache->hash_size;
645 struct strcache2_entry **src_tab = cache->hash_tab;
646 struct strcache2_entry **dst_tab;
647#ifndef STRCACHE2_USE_MASK
648 unsigned int hash_shift;
649#endif
650
651 /* Allocate a new hash table twice the size of the current. */
652 cache->hash_size <<= 1;
653#ifdef STRCACHE2_USE_MASK
654 cache->hash_mask <<= 1;
655 cache->hash_mask |= 1;
656#else
657 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
658 /* nothing */;
659 cache->hash_div = strcache2_find_prime (hash_shift);
660#endif
661 cache->rehash_count <<= 1;
662 cache->hash_tab = dst_tab = (struct strcache2_entry **)
663 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
664 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
665
666 /* Copy the entries from the old to the new hash table. */
667 cache->collision_count = 0;
668 while (src-- > 0)
669 {
670 struct strcache2_entry *entry = src_tab[src];
671 while (entry)
672 {
673 struct strcache2_entry *next = entry->next;
674 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
675 if ((entry->next = dst_tab[dst]) != 0)
676 cache->collision_count++;
677 dst_tab[dst] = entry;
678
679 entry = next;
680 }
681 }
682
683 /* That's it, just free the old table and we're done. */
684 free (src_tab);
685}
686
687static struct strcache2_seg *
688strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
689{
690 struct strcache2_seg *seg;
691 size_t size;
692 size_t off;
693
694 size = cache->def_seg_size;
695 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
696 {
697 size = (size_t)minlen * 2;
698 size = (size + 0xfff) & ~(size_t)0xfff;
699 }
700
701 seg = xmalloc (size);
702 seg->start = (char *)(seg + 1);
703 seg->size = size - sizeof (struct strcache2_seg);
704 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
705 if (off)
706 {
707 off = STRCACHE2_ENTRY_ALIGNMENT - off;
708 seg->start += off;
709 seg->size -= off;
710 }
711 assert (seg->size > minlen);
712 seg->cursor = seg->start;
713 seg->avail = seg->size;
714
715 seg->next = cache->seg_head;
716 cache->seg_head = seg;
717
718 return seg;
719}
720
721/* Internal worker that enters a new string into the cache. */
722static const char *
723strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
724 const char *str, unsigned int length,
725 unsigned int hash)
726{
727 struct strcache2_entry *entry;
728 struct strcache2_seg *seg;
729 unsigned int size;
730 char *str_copy;
731
732 /* Allocate space for the string. */
733
734 size = length + 1 + sizeof (struct strcache2_entry);
735 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
736
737 seg = cache->seg_head;
738 if (MY_PREDICT_FALSE(seg->avail < size))
739 {
740 do
741 seg = seg->next;
742 while (seg && seg->avail < size);
743 if (!seg)
744 seg = strcache2_new_seg (cache, size);
745 }
746
747 entry = (struct strcache2_entry *) seg->cursor;
748 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
749 seg->cursor += size;
750 seg->avail -= size;
751
752 /* Setup the entry, copy the string and insert it into the hash table. */
753
754 entry->user = NULL;
755 entry->length = length;
756 entry->hash = hash;
757 str_copy = (char *) memcpy (entry + 1, str, length);
758 str_copy[length] = '\0';
759
760 if ((entry->next = cache->hash_tab[idx]) != 0)
761 cache->collision_count++;
762 cache->hash_tab[idx] = entry;
763 cache->count++;
764 if (cache->count >= cache->rehash_count)
765 strcache2_rehash (cache);
766
767 return str_copy;
768}
769
770/* The public add string interface. */
771const char *
772strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
773{
774 struct strcache2_entry const *entry;
775 unsigned int hash = strcache2_case_sensitive_hash (str, length);
776 unsigned int idx;
777
778 assert (!cache->case_insensitive);
779 assert (!memchr (str, '\0', length));
780
781 MAKE_STATS (cache->lookup_count++);
782
783 /* Lookup the entry in the hash table, hoping for an
784 early match. If not found, enter the string at IDX. */
785 idx = STRCACHE2_MOD_IT (cache, hash);
786 entry = cache->hash_tab[idx];
787 if (!entry)
788 return strcache2_enter_string (cache, idx, str, length, hash);
789 if (strcache2_is_equal (cache, entry, str, length, hash))
790 return (const char *)(entry + 1);
791 MAKE_STATS (cache->collision_1st_count++);
792
793 entry = entry->next;
794 if (!entry)
795 return strcache2_enter_string (cache, idx, str, length, hash);
796 if (strcache2_is_equal (cache, entry, str, length, hash))
797 return (const char *)(entry + 1);
798 MAKE_STATS (cache->collision_2nd_count++);
799
800 /* Loop the rest. */
801 for (;;)
802 {
803 entry = entry->next;
804 if (!entry)
805 return strcache2_enter_string (cache, idx, str, length, hash);
806 if (strcache2_is_equal (cache, entry, str, length, hash))
807 return (const char *)(entry + 1);
808 MAKE_STATS (cache->collision_3rd_count++);
809 }
810 /* not reached */
811}
812
813/* The public add string interface for prehashed strings.
814 Use strcache2_hash_str to calculate the hash of a string. */
815const char *
816strcache2_add_hashed (struct strcache2 *cache, const char *str,
817 unsigned int length, unsigned int hash)
818{
819 struct strcache2_entry const *entry;
820 unsigned int idx;
821#ifndef NDEBUG
822 unsigned correct_hash;
823
824 assert (!cache->case_insensitive);
825 assert (!memchr (str, '\0', length));
826 correct_hash = strcache2_case_sensitive_hash (str, length);
827 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
828#endif /* NDEBUG */
829
830 MAKE_STATS (cache->lookup_count++);
831
832 /* Lookup the entry in the hash table, hoping for an
833 early match. If not found, enter the string at IDX. */
834 idx = STRCACHE2_MOD_IT (cache, hash);
835 entry = cache->hash_tab[idx];
836 if (!entry)
837 return strcache2_enter_string (cache, idx, str, length, hash);
838 if (strcache2_is_equal (cache, entry, str, length, hash))
839 return (const char *)(entry + 1);
840 MAKE_STATS (cache->collision_1st_count++);
841
842 entry = entry->next;
843 if (!entry)
844 return strcache2_enter_string (cache, idx, str, length, hash);
845 if (strcache2_is_equal (cache, entry, str, length, hash))
846 return (const char *)(entry + 1);
847 MAKE_STATS (cache->collision_2nd_count++);
848
849 /* Loop the rest. */
850 for (;;)
851 {
852 entry = entry->next;
853 if (!entry)
854 return strcache2_enter_string (cache, idx, str, length, hash);
855 if (strcache2_is_equal (cache, entry, str, length, hash))
856 return (const char *)(entry + 1);
857 MAKE_STATS (cache->collision_3rd_count++);
858 }
859 /* not reached */
860}
861
862/* The public lookup (case sensitive) string interface. */
863const char *
864strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
865{
866 struct strcache2_entry const *entry;
867 unsigned int hash = strcache2_case_sensitive_hash (str, length);
868 unsigned int idx;
869
870 assert (!cache->case_insensitive);
871 assert (!memchr (str, '\0', length));
872
873 MAKE_STATS (cache->lookup_count++);
874
875 /* Lookup the entry in the hash table, hoping for an
876 early match. */
877 idx = STRCACHE2_MOD_IT (cache, hash);
878 entry = cache->hash_tab[idx];
879 if (!entry)
880 return NULL;
881 if (strcache2_is_equal (cache, entry, str, length, hash))
882 return (const char *)(entry + 1);
883 MAKE_STATS (cache->collision_1st_count++);
884
885 entry = entry->next;
886 if (!entry)
887 return NULL;
888 if (strcache2_is_equal (cache, entry, str, length, hash))
889 return (const char *)(entry + 1);
890 MAKE_STATS (cache->collision_2nd_count++);
891
892 /* Loop the rest. */
893 for (;;)
894 {
895 entry = entry->next;
896 if (!entry)
897 return NULL;
898 if (strcache2_is_equal (cache, entry, str, length, hash))
899 return (const char *)(entry + 1);
900 MAKE_STATS (cache->collision_3rd_count++);
901 }
902 /* not reached */
903}
904
905#if defined(HAVE_CASE_INSENSITIVE_FS)
906
907/* The public add string interface for case insensitive strings. */
908const char *
909strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
910{
911 struct strcache2_entry const *entry;
912 unsigned int hash = strcache2_case_insensitive_hash (str, length);
913 unsigned int idx;
914
915 assert (cache->case_insensitive);
916 assert (!memchr (str, '\0', length));
917
918 MAKE_STATS (cache->lookup_count++);
919
920 /* Lookup the entry in the hash table, hoping for an
921 early match. If not found, enter the string at IDX. */
922 idx = STRCACHE2_MOD_IT (cache, hash);
923 entry = cache->hash_tab[idx];
924 if (!entry)
925 return strcache2_enter_string (cache, idx, str, length, hash);
926 if (strcache2_is_iequal (cache, entry, str, length, hash))
927 return (const char *)(entry + 1);
928 MAKE_STATS (cache->collision_1st_count++);
929
930 entry = entry->next;
931 if (!entry)
932 return strcache2_enter_string (cache, idx, str, length, hash);
933 if (strcache2_is_iequal (cache, entry, str, length, hash))
934 return (const char *)(entry + 1);
935 MAKE_STATS (cache->collision_2nd_count++);
936
937 /* Loop the rest. */
938 for (;;)
939 {
940 entry = entry->next;
941 if (!entry)
942 return strcache2_enter_string (cache, idx, str, length, hash);
943 if (strcache2_is_iequal (cache, entry, str, length, hash))
944 return (const char *)(entry + 1);
945 MAKE_STATS (cache->collision_3rd_count++);
946 }
947 /* not reached */
948}
949
950/* The public add string interface for prehashed case insensitive strings.
951 Use strcache2_hash_istr to calculate the hash of a string. */
952const char *
953strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
954 unsigned int length, unsigned int hash)
955{
956 struct strcache2_entry const *entry;
957 unsigned int idx;
958#ifndef NDEBUG
959 unsigned correct_hash;
960
961 assert (cache->case_insensitive);
962 assert (!memchr (str, '\0', length));
963 correct_hash = strcache2_case_insensitive_hash (str, length);
964 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
965#endif /* NDEBUG */
966
967 MAKE_STATS (cache->lookup_count++);
968
969 /* Lookup the entry in the hash table, hoping for an
970 early match. If not found, enter the string at IDX. */
971 idx = STRCACHE2_MOD_IT (cache, hash);
972 entry = cache->hash_tab[idx];
973 if (!entry)
974 return strcache2_enter_string (cache, idx, str, length, hash);
975 if (strcache2_is_iequal (cache, entry, str, length, hash))
976 return (const char *)(entry + 1);
977 MAKE_STATS (cache->collision_1st_count++);
978
979 entry = entry->next;
980 if (!entry)
981 return strcache2_enter_string (cache, idx, str, length, hash);
982 if (strcache2_is_iequal (cache, entry, str, length, hash))
983 return (const char *)(entry + 1);
984 MAKE_STATS (cache->collision_2nd_count++);
985
986 /* Loop the rest. */
987 for (;;)
988 {
989 entry = entry->next;
990 if (!entry)
991 return strcache2_enter_string (cache, idx, str, length, hash);
992 if (strcache2_is_iequal (cache, entry, str, length, hash))
993 return (const char *)(entry + 1);
994 MAKE_STATS (cache->collision_3rd_count++);
995 }
996 /* not reached */
997}
998
999/* The public lookup (case insensitive) string interface. */
1000const char *
1001strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
1002{
1003 struct strcache2_entry const *entry;
1004 unsigned int hash = strcache2_case_insensitive_hash (str, length);
1005 unsigned int idx;
1006
1007 assert (cache->case_insensitive);
1008 assert (!memchr (str, '\0', length));
1009
1010 MAKE_STATS (cache->lookup_count++);
1011
1012 /* Lookup the entry in the hash table, hoping for an
1013 early match. */
1014 idx = STRCACHE2_MOD_IT (cache, hash);
1015 entry = cache->hash_tab[idx];
1016 if (!entry)
1017 return NULL;
1018 if (strcache2_is_iequal (cache, entry, str, length, hash))
1019 return (const char *)(entry + 1);
1020 MAKE_STATS (cache->collision_1st_count++);
1021
1022 entry = entry->next;
1023 if (!entry)
1024 return NULL;
1025 if (strcache2_is_iequal (cache, entry, str, length, hash))
1026 return (const char *)(entry + 1);
1027 MAKE_STATS (cache->collision_2nd_count++);
1028
1029 /* Loop the rest. */
1030 for (;;)
1031 {
1032 entry = entry->next;
1033 if (!entry)
1034 return NULL;
1035 if (strcache2_is_iequal (cache, entry, str, length, hash))
1036 return (const char *)(entry + 1);
1037 MAKE_STATS (cache->collision_3rd_count++);
1038 }
1039 /* not reached */
1040}
1041
1042#endif /* HAVE_CASE_INSENSITIVE_FS */
1043
1044/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1045int
1046strcache2_is_cached (struct strcache2 *cache, const char *str)
1047{
1048 /* Check mandatory alignment first. */
1049 if (!((size_t)str & (sizeof (void *) - 1)))
1050 {
1051 /* Check the segment list and consider the question answered if the
1052 string is within one of them. (Could check it more thoroughly...) */
1053 struct strcache2_seg const *seg;
1054 for (seg = cache->seg_head; seg; seg = seg->next)
1055 if ((size_t)(str - seg->start) < seg->size)
1056 return 1;
1057 }
1058
1059 return 0;
1060}
1061
1062
1063/* Verify the integrity of the specified string, returning 0 if OK. */
1064int
1065strcache2_verify_entry (struct strcache2 *cache, const char *str)
1066{
1067 struct strcache2_entry const *entry;
1068 unsigned int hash;
1069 unsigned int length;
1070 const char *end;
1071
1072 entry = (struct strcache2_entry const *)str - 1;
1073 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1074 {
1075 fprintf (stderr,
1076 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1077 cache->name, (void *)entry, (void *)str, str);
1078 return -1;
1079 }
1080
1081 end = memchr (str, '\0', entry->length + 1);
1082 length = end - str;
1083 if (length != entry->length)
1084 {
1085 fprintf (stderr,
1086 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1087 cache->name, (void *)entry, length, entry->length, str);
1088 return -1;
1089 }
1090
1091 hash = cache->case_insensitive
1092 ? strcache2_case_insensitive_hash (str, entry->length)
1093 : strcache2_case_sensitive_hash (str, entry->length);
1094 if (hash != entry->hash)
1095 {
1096 fprintf (stderr,
1097 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1098 cache->name, (void *)entry, hash, entry->hash, str);
1099 return -1;
1100 }
1101
1102 return 0;
1103}
1104
1105
1106/* Calculates the case sensitive hash values of the string.
1107 The first hash is returned, the other is put at HASH2P. */
1108unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1109{
1110 *hash2p = 1;
1111 return strcache2_case_sensitive_hash (str, length);
1112}
1113
1114/* Calculates the case insensitive hash values of the string.
1115 The first hash is returned, the other is put at HASH2P. */
1116unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1117{
1118 *hash2p = 1;
1119 return strcache2_case_insensitive_hash (str, length);
1120}
1121
1122
1123
1124/* Initalizes a new cache. */
1125void
1126strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1127 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1128{
1129 unsigned hash_shift;
1130 assert (!thread_safe);
1131
1132 /* calc the size as a power of two */
1133 if (!size)
1134 hash_shift = STRCACHE2_HASH_SHIFT;
1135 else
1136 {
1137 assert (size <= (~0U / 2 + 1));
1138 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1139 /* nothing */;
1140 }
1141
1142 /* adjust the default segment size */
1143 if (!def_seg_size)
1144 def_seg_size = STRCACHE2_SEG_SIZE;
1145 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1146 def_seg_size = sizeof (struct strcache2_seg) * 10;
1147 else if ((def_seg_size & 0xfff) < 0xf00)
1148 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1149
1150
1151 /* init the structure. */
1152 cache->case_insensitive = case_insensitive;
1153#ifdef STRCACHE2_USE_MASK
1154 cache->hash_mask = (1U << hash_shift) - 1U;
1155#else
1156 cache->hash_div = strcache2_find_prime(hash_shift);
1157#endif
1158 cache->count = 0;
1159 cache->collision_count = 0;
1160 cache->lookup_count = 0;
1161 cache->collision_1st_count = 0;
1162 cache->collision_2nd_count = 0;
1163 cache->collision_3rd_count = 0;
1164 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1165 cache->init_size = 1U << hash_shift;
1166 cache->hash_size = 1U << hash_shift;
1167 cache->def_seg_size = def_seg_size;
1168 cache->lock = NULL;
1169 cache->name = name;
1170
1171 /* allocate the hash table and first segment. */
1172 cache->hash_tab = (struct strcache2_entry **)
1173 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1174 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1175 strcache2_new_seg (cache, 0);
1176
1177 /* link it */
1178 cache->next = strcache_head;
1179 strcache_head = cache;
1180}
1181
1182
1183/* Terminates a string cache, freeing all memory and other
1184 associated resources. */
1185void
1186strcache2_term (struct strcache2 *cache)
1187{
1188 /* unlink it */
1189 if (strcache_head == cache)
1190 strcache_head = cache->next;
1191 else
1192 {
1193 struct strcache2 *prev = strcache_head;
1194 while (prev->next != cache)
1195 prev = prev->next;
1196 assert (prev);
1197 prev->next = cache->next;
1198 }
1199
1200 /* free the memory segments */
1201 do
1202 {
1203 void *free_it = cache->seg_head;
1204 cache->seg_head = cache->seg_head->next;
1205 free (free_it);
1206 }
1207 while (cache->seg_head);
1208
1209 /* free the hash and clear the structure. */
1210 free (cache->hash_tab);
1211 memset (cache, '\0', sizeof (struct strcache2));
1212}
1213
1214/* Print statistics a string cache. */
1215void
1216strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1217{
1218 unsigned int seg_count = 0;
1219 unsigned long seg_total_bytes = 0;
1220 unsigned long seg_avg_bytes;
1221 unsigned long seg_avail_bytes = 0;
1222 unsigned long seg_max_bytes = 0;
1223 struct strcache2_seg *seg;
1224 unsigned int str_count = 0;
1225 unsigned long str_total_len = 0;
1226 unsigned int str_avg_len;
1227 unsigned int str_min_len = ~0U;
1228 unsigned int str_max_len = 0;
1229 unsigned int idx;
1230 unsigned int rehashes;
1231 unsigned int chain_depths[32];
1232
1233 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1234
1235 /* Segment statistics. */
1236 for (seg = cache->seg_head; seg; seg = seg->next)
1237 {
1238 seg_count++;
1239 seg_total_bytes += seg->size;
1240 seg_avail_bytes += seg->avail;
1241 if (seg->size > seg_max_bytes)
1242 seg_max_bytes = seg->size;
1243 }
1244 seg_avg_bytes = seg_total_bytes / seg_count;
1245 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1246 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1247 cache->def_seg_size, seg_avail_bytes);
1248
1249 /* String statistics. */
1250 memset (chain_depths, '\0', sizeof (chain_depths));
1251 idx = cache->hash_size;
1252 while (idx-- > 0)
1253 {
1254 struct strcache2_entry const *entry = cache->hash_tab[idx];
1255 unsigned int depth = 0;
1256 for (; entry != 0; entry = entry->next, depth++)
1257 {
1258 unsigned int length = entry->length;
1259 str_total_len += length;
1260 if (length > str_max_len)
1261 str_max_len = length;
1262 if (length < str_min_len)
1263 str_min_len = length;
1264 str_count++;
1265 }
1266 chain_depths[depth >= 32 ? 31 : depth]++;
1267 }
1268 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1269 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1270 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1271 if (str_count != cache->count)
1272 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1273 cache->count, str_count);
1274
1275 /* Hash statistics. */
1276 idx = cache->init_size;
1277 rehashes = 0;
1278 while (idx < cache->hash_size)
1279 {
1280 idx *= 2;
1281 rehashes++;
1282 }
1283
1284#ifdef STRCACHE2_USE_MASK
1285 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1286 prefix, cache->hash_size, cache->hash_mask, rehashes);
1287#else
1288 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1289 prefix, cache->hash_size, cache->hash_div, rehashes);
1290#endif
1291 if (cache->lookup_count)
1292 printf (_("%s lookups = %lu\n"
1293 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1294 prefix, cache->lookup_count,
1295 prefix,
1296 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1297 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1298 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1299 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1300 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1301 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1302 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1303 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1304 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1305 for (idx = 2; idx < 32; idx++)
1306 {
1307 unsigned strs_at_this_depth = chain_depths[idx];
1308 unsigned i;
1309 for (i = idx + 1; i < 32; i++)
1310 strs_at_this_depth += chain_depths[i];
1311 if (strs_at_this_depth)
1312 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1313 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1314 idx - 1, idx == 2 ? " " : "s",
1315 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1316 }
1317}
1318
1319/* Print statistics for all string caches. */
1320void
1321strcache2_print_stats_all (const char *prefix)
1322{
1323 struct strcache2 *cur;
1324 for (cur = strcache_head; cur; cur = cur->next)
1325 strcache2_print_stats (cur, prefix);
1326}
1327
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use