LTP GCOV extension - code coverage report
Current view: directory - csrc/cl - hashtable.c
Test: app.info
Date: 2007-12-10 Instrumented lines: 197
Code covered: 93.4 % Executed lines: 184

       1                 : /*
       2                 :         Copyright (C) 2007, Bruce Ediger
       3                 : 
       4                 :     This file is part of cl.
       5                 : 
       6                 :     cl is free software; you can redistribute it and/or modify
       7                 :     it under the terms of the GNU General Public License as published by
       8                 :     the Free Software Foundation; either version 2 of the License, or
       9                 :     (at your option) any later version.
      10                 : 
      11                 :     cl is distributed in the hope that it will be useful,
      12                 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :     GNU General Public License for more details.
      15                 : 
      16                 :     You should have received a copy of the GNU General Public License
      17                 :     along with cl; if not, write to the Free Software
      18                 :     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      19                 : 
      20                 : */
      21                 : #include <stdio.h>
      22                 : #include <stdlib.h>
      23                 : #include <string.h>
      24                 : 
      25                 : #include <hashtable.h>
      26                 : 
      27                 : #include <node.h>
      28                 : #include <abbreviations.h>  /* free_graph() */
      29                 : 
      30                 : unsigned int hash_djb2(const char *s);
      31                 : void rehash_hashtable(struct hashtable *h);
      32                 : void reallocate_buckets(struct hashtable *h);
      33                 : int  sparsebit(long i);
      34                 : int  msbbit(long i);
      35                 : 
      36                 : /* chain is the dummy node at head of hash chain */
      37                 : void
      38                 : insert_node(struct hashnode *chain, struct hashnode *n)
      39            6185 : {
      40            6185 :         n->next = chain->next;
      41            6185 :         chain->next = n;
      42            6185 :         n->next->prev = n;
      43            6185 :         n->prev = chain;
      44            6185 : }
      45                 : 
      46                 : struct hashtable *
      47                 : init_hashtable(int buckets, int maxload)
      48              71 : {
      49                 :         int i;
      50                 :         int f, z;
      51                 : 
      52                 :         struct hashtable *h;
      53                 : 
      54              71 :         h = malloc(sizeof(*h));
      55                 : 
      56              71 :         h->flags = 0;
      57                 : 
      58                 :         /* Insure that bucket array size is always a multiple of 2.
      59                 :          * This amounted to about 10% run-time savings for small files
      60                 :          * by avoiding the '%' operator. */
      61                 : 
      62              71 :         f = msbbit(buckets);
      63              71 :         z = 1 << f;
      64                 : 
      65              71 :         if (z < buckets)
      66               0 :                 z <<= 1;
      67                 : 
      68              71 :         buckets = z;
      69                 : 
      70                 :         /* buckets and sentinels */
      71              71 :         h->buckets =
      72                 :                 (struct hashnode **)malloc(buckets*sizeof(*h->buckets));
      73              71 :         memset((char *)h->buckets, '\0', buckets*sizeof(*h->buckets));
      74                 : 
      75              71 :         h->sentinels =
      76                 :                 (struct hashnode **)malloc(buckets*sizeof(*h->sentinels));
      77              71 :         memset((char *)h->sentinels, '\0', buckets*sizeof(*h->sentinels));
      78                 : 
      79            4615 :         for (i = 0; i < buckets; ++i)
      80                 :         {
      81                 :                 /* first node is a dummy - for convenience and to hold chain stats */
      82                 : 
      83            4544 :                 h->buckets[i] = (struct hashnode *)malloc(sizeof(*h->buckets[i]));
      84            4544 :                 h->buckets[i]->data = NULL;
      85            4544 :                 h->buckets[i]->string = NULL;
      86            4544 :                 h->buckets[i]->string_length = 0;
      87                 : 
      88            4544 :                 h->sentinels[i] = (struct hashnode *)malloc(sizeof(*h->sentinels[i]));
      89            4544 :                 h->sentinels[i]->data = NULL;
      90                 : 
      91            4544 :                 h->buckets[i]->next = h->sentinels[i];
      92            4544 :                 h->buckets[i]->prev = h->buckets[i];
      93            4544 :                 h->sentinels[i]->next = h->sentinels[i];
      94            4544 :                 h->sentinels[i]->prev = h->buckets[i];
      95                 : 
      96            4544 :                 h->buckets[i]->string = NULL;
      97            4544 :                 h->buckets[i]->value = 0;    /* split count of chain */
      98            4544 :                 h->buckets[i]->nodes_in_chain = 0;    /* split count of chain */
      99                 :         }
     100                 : 
     101                 :         /* bucket allocation */
     102              71 :         h->currentsize = buckets;
     103              71 :         h->allocated = buckets;
     104                 : 
     105              71 :         h->maxload = maxload;
     106                 : 
     107                 :         /* bucket splitting */
     108              71 :         h->p = 0;
     109              71 :         h->maxp = buckets;
     110                 : 
     111              71 :         h->node_cnt = 0;
     112              71 :         h->rehash_cnt = 0;
     113                 : 
     114              71 :         return h;
     115                 : }
     116                 : 
     117                 : void
     118                 : rehash_hashtable(struct hashtable *h)
     119             391 : {
     120                 :         struct hashnode *l;
     121                 :         struct hashnode *oldbucket, *oldtail;
     122                 :         int newindex;
     123                 : 
     124             391 :         ++h->rehash_cnt;
     125                 : 
     126             391 :         if (h->currentsize >= h->allocated)
     127               3 :                 reallocate_buckets(h);
     128                 : 
     129             391 :         oldbucket = h->buckets[h->p];
     130             391 :         oldtail = h->sentinels[h->p];
     131             391 :         l = oldbucket->next;
     132                 : 
     133             391 :         oldbucket->next = oldtail;
     134             391 :         oldtail->prev = oldbucket;
     135                 : 
     136             391 :         newindex = h->p + h->maxp;
     137                 : 
     138             391 :         if (h->flags > 1)
     139                 :         {
     140               0 :                 printf("# %d rehashes, %d total nodes, old bucket %d, new bucket %d\n",
     141                 :                         h->rehash_cnt, h->node_cnt, h->p, newindex);
     142               0 :                 printf("# %d nodes in old bucket\n", oldbucket->nodes_in_chain);
     143                 :         }
     144                 : 
     145             391 :         oldbucket->nodes_in_chain = 0;  /* may not be any lines in chain after rehash */
     146             391 :         ++oldbucket->value;      /* number of splits */
     147                 : 
     148             391 :         ++h->p;
     149                 : 
     150             391 :         if (h->p == h->maxp)
     151                 :         {
     152               2 :                 h->maxp *= 2;
     153               2 :                 h->p = 0;
     154                 :         }
     155                 : 
     156             391 :         ++h->currentsize;
     157                 : 
     158            6967 :         while (oldtail != l)
     159                 :         {
     160            6185 :                 struct hashnode *t = l->next;
     161            6185 :                 int idx = MOD(l->value, h->maxp);
     162                 : 
     163            6185 :                 if (idx < h->p)
     164            6143 :                         idx = MOD(l->value, (2*h->maxp));
     165                 : 
     166            6185 :                 if (idx == newindex)
     167                 :                 {
     168            3116 :                         insert_node(h->buckets[newindex], l);
     169            3116 :                         ++h->buckets[newindex]->nodes_in_chain;
     170                 :                 } else {
     171            3069 :                         insert_node(oldbucket, l);
     172            3069 :                         ++oldbucket->nodes_in_chain;
     173                 :                 }
     174                 : 
     175            6185 :                 l = t;
     176                 :         }
     177                 : 
     178             391 :         if (h->flags > 1)
     179                 :         {
     180               0 :                 printf("# split: %d in old chain, %d in new chain\n",
     181                 :                         oldbucket->nodes_in_chain, h->buckets[newindex]->nodes_in_chain);
     182                 :         }
     183             391 : }
     184                 : 
     185                 : void
     186                 : reallocate_buckets(struct hashtable *h)
     187               3 : {
     188                 :         int i;
     189                 :         int newallocated;
     190                 : 
     191               3 :         newallocated = h->allocated * 2;
     192                 : 
     193               3 :         h->buckets = (struct hashnode **)realloc(
     194                 :                 h->buckets,
     195                 :                 sizeof(struct hashnode *)*newallocated
     196                 :         );
     197                 : 
     198               3 :         h->sentinels = (struct hashnode **)realloc(
     199                 :                 h->sentinels,
     200                 :                 sizeof(struct hashnode *)*newallocated
     201                 :         );
     202                 : 
     203             451 :         for (i = h->allocated; i < newallocated; ++i)
     204                 :         {
     205             448 :                 h->buckets[i] = (struct hashnode *)malloc(sizeof(*h->buckets[i]));
     206             448 :                 h->sentinels[i] = (struct hashnode *)malloc(sizeof(*h->sentinels[i]));
     207                 : 
     208             448 :                 h->buckets[i]->next = h->sentinels[i];
     209             448 :                 h->sentinels[i]->prev = h->buckets[i];
     210             448 :                 h->sentinels[i]->next = h->sentinels[i];
     211             448 :                 h->buckets[i]->prev = h->buckets[i];
     212                 : 
     213             448 :                 h->buckets[i]->string = NULL;
     214             448 :                 h->buckets[i]->data = NULL;
     215             448 :                 h->sentinels[i]->string = NULL;
     216             448 :                 h->sentinels[i]->data = NULL;
     217                 : 
     218             448 :                 h->buckets[i]->nodes_in_chain = 0;
     219             448 :                 h->buckets[i]->value = 0;    /* number of times chain splits, too */
     220                 :         }
     221                 : 
     222               3 :         h->allocated = newallocated;
     223               3 : }
     224                 : 
     225                 : const char *
     226                 : add_string(struct hashtable *h, const char *string)
     227            6568 : {
     228            6568 :         return (const char *)add_data(h, string, NULL);
     229                 : }
     230                 : 
     231                 : void *
     232                 : add_data(struct hashtable *h, const char *string, void *data)
     233            6568 : {
     234                 :         struct hashnode *n, *head;
     235                 :         int bucket_index;
     236                 :         unsigned int hv;
     237                 :         int hash_mod;
     238                 : 
     239            6568 :         if((n = node_lookup(h, string, &hv)))
     240                 :         {
     241            1141 :                 void *r = NULL;
     242            1141 :                 if (data)
     243                 :                 {
     244               0 :                         r = (void *)n->data;
     245               0 :                         n->data = data;
     246                 :                 } else {
     247            1141 :                         r = (void *)n->string;
     248                 :                 }
     249            1141 :                 return r;
     250                 :         }
     251                 : 
     252            5427 :         bucket_index = MOD(hv, h->maxp);
     253            5427 :         hash_mod = h->maxp;
     254            5427 :         if (bucket_index < h->p)
     255                 :         {
     256            1835 :                 bucket_index = MOD(hv, (2*h->maxp));
     257            1835 :                 hash_mod = 2*h->maxp;
     258                 :         }
     259                 : 
     260            5427 :         if (h->flags > 1)
     261               0 :                 printf("# \"%s\": hash value %u, base %d, bucket %d\n", string, hv, hash_mod, bucket_index);
     262                 : 
     263                 :         /* allocate new node */
     264            5427 :         n = (struct hashnode *)malloc(sizeof(*n));
     265                 : 
     266                 :         /* fill in newly allocated node */
     267            5427 :         n->value = hv;
     268            5427 :         n->string_length = strlen(string);
     269            5427 :         n->string = malloc(n->string_length+1);
     270            5427 :         memcpy(n->string, string, n->string_length+1);
     271            5427 :         n->data = data;
     272                 : 
     273                 :         /* add newly allocated node to appropriate chain */
     274            5427 :         head = h->buckets[bucket_index];
     275                 : 
     276                 :         /* just push it on the chain */
     277            5427 :         n->next = head->next;
     278            5427 :         head->next = n;
     279            5427 :         n->next->prev = n;
     280            5427 :         n->prev = head;
     281                 : 
     282            5427 :         ++h->buckets[bucket_index]->nodes_in_chain;
     283                 : 
     284            5427 :         ++h->node_cnt;
     285                 : 
     286                 :         /* "load" on hashtable too high? */
     287            5427 :         if (h->node_cnt/h->currentsize > h->maxload)
     288             391 :                 rehash_hashtable(h);
     289                 : 
     290            5427 :         return (NULL == data)? (void *)n->string: NULL;
     291                 : }
     292                 : 
     293                 : void free_hashtable(struct hashtable *h)
     294              69 : {
     295                 :         unsigned int i;
     296                 : 
     297            4876 :         for (i = 0; i < h->currentsize; ++i)
     298                 :         {
     299            4807 :                 struct hashnode *chain = h->buckets[i];
     300           19848 :                 while (chain != h->sentinels[i])
     301                 :                 {
     302           10234 :                         struct hashnode *tmp = chain->next;
     303           10234 :                         if (chain->string)
     304                 :                         {
     305            5427 :                                 free(chain->string);
     306            5427 :                                 chain->string = NULL;
     307                 :                         }
     308           10234 :                         free_graph(chain->data);
     309           10234 :                         chain->data = NULL;
     310           10234 :                         free(chain);
     311           10234 :                         chain = tmp;
     312                 :                 }
     313            4807 :                 free(h->sentinels[i]);
     314            4807 :                 h->sentinels[i] = NULL;
     315                 :         }
     316                 : 
     317                 :         /* free the dummy heads and sentinels that the table
     318                 :          * doesn't currently use - it doubles bucket count
     319                 :          * every time the "load" gets too high */
     320             126 :         for (i = h->currentsize; i < h->allocated; ++i)
     321                 :         {
     322              57 :                 free(h->buckets[i]);
     323              57 :                 h->buckets[i] = NULL;
     324              57 :                 free(h->sentinels[i]);
     325              57 :                 h->sentinels[i] = NULL;
     326                 :         }
     327                 : 
     328              69 :         free(h->buckets);
     329              69 :         free(h->sentinels);
     330              69 :         h->buckets = NULL;
     331              69 :         h->sentinels = NULL;
     332                 : 
     333              69 :         free(h);
     334              69 :         h = NULL;
     335              69 : }
     336                 : 
     337                 : const char *
     338                 : string_lookup(struct hashtable *h, const char *string_to_lookup)
     339               0 : {
     340               0 :         struct hashnode *hn = NULL;
     341               0 :         char *r = NULL;
     342                 :         unsigned int hv;  /* dummy - not used in this function */
     343                 : 
     344               0 :         if (NULL != (hn = node_lookup(h, string_to_lookup, &hv)))
     345               0 :                 r = hn->string;
     346                 : 
     347               0 :         return r;
     348                 : }
     349                 : 
     350                 : void *
     351                 : data_lookup(struct hashtable *h, const char *string_to_lookup)
     352            1153 : {
     353            1153 :         struct hashnode *hn = NULL;
     354            1153 :         void *r = NULL;
     355                 :         unsigned int hv;  /* dummy - not used in this function */
     356                 : 
     357            1153 :         if (NULL != (hn = node_lookup(h, string_to_lookup, &hv)))
     358            1153 :                 r = hn->data;
     359                 : 
     360            1153 :         return r;
     361                 : }
     362                 : 
     363                 : /* note that 3rd argument must evaluate to non-NULL - returns hash value
     364                 :  * of string_to_lookup to add_data() to allow it to skip doing a hash
     365                 :  */
     366                 : struct hashnode *
     367                 : node_lookup(struct hashtable *h, const char *string_to_lookup, unsigned int *rhv)
     368           12866 : {
     369           12866 :         struct hashnode *r = NULL;
     370           12866 :         unsigned int hashval = hash_djb2(string_to_lookup);
     371           12866 :         int idx = MOD(hashval, h->maxp);
     372                 :         struct hashnode *chain;
     373                 : 
     374           12866 :         *rhv = hashval;
     375                 : 
     376           12866 :         if (idx < h->p)
     377            3669 :                 idx = MOD(hashval, (2*h->maxp));
     378                 : 
     379           12866 :         chain = h->buckets[idx]->next;
     380                 : 
     381           93656 :         while (chain != h->sentinels[idx])
     382                 :         {
     383                 :                 /* checking the equivalence of hash value first prevents
     384                 :                  * expensive byte-by-byte strcmp().  Seems to improve performance. */
     385           75363 :                 if (chain->value == hashval &&
     386                 :                         !strcmp(chain->string, string_to_lookup))
     387                 :                 {
     388            7439 :                         r = chain;
     389            7439 :                         break;
     390                 :                 }
     391                 : 
     392           67924 :                 chain = chain->next;
     393                 :         }
     394                 : 
     395           12866 :         return r;
     396                 : }
     397                 : 
     398                 : unsigned int
     399                 : hash_djb2(const char *str)
     400           12866 : {
     401           12866 :         unsigned long hash = 5381;
     402                 :         int c;
     403                 : 
     404           81696 :         while ((c = *str++))
     405           55964 :                 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
     406                 : 
     407           12866 :         return hash;
     408                 : }
     409                 : 
     410                 : int
     411                 : sparsebit(long i)
     412              71 : {
     413              71 :         int p = 0;
     414                 : 
     415              71 :         if (i == 0) return(-1);     /* no bits set */
     416                 : 
     417              71 :         if ((i & (-i)) != i) return(-1);    /* two or more bits set */
     418                 : 
     419              71 :         if (i & 0xAAAAAAAA) p++;
     420              71 :         if (i & 0xCCCCCCCC) p += 2;
     421              71 :         if (i & 0xF0F0F0F0) p += 4;
     422              71 :         if (i & 0xFF00FF00) p += 8;
     423              71 :         if (i & 0xFFFF0000) p += 16;
     424                 : 
     425              71 :         return p;
     426                 : }
     427                 : 
     428                 : int
     429                 : msbbit(long i)
     430              71 : {
     431                 :         long i2;
     432                 : 
     433              71 :         if (i<0) return(31); /* speed-up assumes a 'long's msb is at pos 31 */
     434                 : 
     435              71 :         while ((i2 = (i & (i-1)))) i = i2;
     436                 : 
     437              71 :         return(sparsebit(i));
     438                 : }

Generated by: LTP GCOV extension version 1.6