From d8251d001b3507ffb80b26f4d17f1daa99a5dc4a Mon Sep 17 00:00:00 2001 From: Hristian Kirtchev Date: Tue, 21 Aug 2018 14:44:41 +0000 Subject: [Ada] Dynamically resizable, load factor-based hash table This patch introduces a dynamically resizable, load factor-based hash table in unit GNAT.Dynamic_HTables. 2018-08-21 Hristian Kirtchev gcc/ada/ * libgnat/g-dynhta.adb, libgnat/g-dynhta.ads: New package Dynamic_HTable. gcc/testsuite/ * gnat.dg/dynhash.adb: New testcase. From-SVN: r263709 --- gcc/ada/libgnat/g-dynhta.adb | 834 ++++++++++++++++++++++++++++++++++++++++++- gcc/ada/libgnat/g-dynhta.ads | 310 ++++++++++++++-- 2 files changed, 1111 insertions(+), 33 deletions(-) (limited to 'gcc/ada/libgnat') diff --git a/gcc/ada/libgnat/g-dynhta.adb b/gcc/ada/libgnat/g-dynhta.adb index a6e2734..b093e79 100644 --- a/gcc/ada/libgnat/g-dynhta.adb +++ b/gcc/ada/libgnat/g-dynhta.adb @@ -38,11 +38,10 @@ package body GNAT.Dynamic_HTables is ------------------- package body Static_HTable is - function Get_Non_Null (T : Instance) return Elmt_Ptr; -- Returns Null_Ptr if Iterator_Started is False or if the Table is - -- empty. Returns Iterator_Ptr if non null, or the next non null - -- element in table if any. + -- empty. Returns Iterator_Ptr if non null, or the next non null element + -- in table if any. --------- -- Get -- @@ -363,7 +362,834 @@ package body GNAT.Dynamic_HTables is begin E.Next := Next; end Set_Next; - end Simple_HTable; + -------------------- + -- Dynamic_HTable -- + -------------------- + + package body Dynamic_HTable is + Minimum_Size : constant Bucket_Range_Type := 32; + -- Minimum size of the buckets + + Safe_Compression_Size : constant Bucket_Range_Type := + Minimum_Size * Compression_Factor; + -- Maximum safe size for hash table compression. Beyond this size, a + -- compression will violate the minimum size constraint on the buckets. + + Safe_Expansion_Size : constant Bucket_Range_Type := + Bucket_Range_Type'Last / Expansion_Factor; + -- Maximum safe size for hash table expansion. Beyond this size, an + -- expansion will overflow the buckets. + + procedure Destroy_Buckets (Bkts : Bucket_Table_Ptr); + pragma Inline (Destroy_Buckets); + -- Destroy all nodes within buckets Bkts + + procedure Detach (Nod : Node_Ptr); + pragma Inline (Detach); + -- Detach node Nod from the bucket it resides in + + procedure Ensure_Circular (Head : Node_Ptr); + pragma Inline (Ensure_Circular); + -- Ensure that dummy head Head is circular with respect to itself + + procedure Ensure_Created (T : Instance); + pragma Inline (Ensure_Created); + -- Verify that hash table T is created. Raise Not_Created if this is not + -- the case. + + procedure Ensure_Unlocked (T : Instance); + pragma Inline (Ensure_Unlocked); + -- Verify that hash table T is unlocked. Raise Table_Locked if this is + -- not the case. + + function Find_Bucket + (Bkts : Bucket_Table_Ptr; + Key : Key_Type) return Node_Ptr; + pragma Inline (Find_Bucket); + -- Find the bucket among buckets Bkts which corresponds to key Key, and + -- return its dummy head. + + function Find_Node (Head : Node_Ptr; Key : Key_Type) return Node_Ptr; + pragma Inline (Find_Node); + -- Traverse a bucket indicated by dummy head Head to determine whether + -- there exists a node with key Key. If such a node exists, return it, + -- otherwise return null. + + procedure First_Valid_Node + (T : Instance; + Low_Bkt : Bucket_Range_Type; + High_Bkt : Bucket_Range_Type; + Idx : out Bucket_Range_Type; + Nod : out Node_Ptr); + pragma Inline (First_Valid_Node); + -- Find the first valid node in the buckets of hash table T constrained + -- by the range Low_Bkt .. High_Bkt. If such a node exists, return its + -- bucket index in Idx and reference in Nod. If no such node exists, + -- Idx is set to 0 and Nod to null. + + procedure Free is + new Ada.Unchecked_Deallocation (Bucket_Table, Bucket_Table_Ptr); + + procedure Free is + new Ada.Unchecked_Deallocation (Hash_Table, Instance); + + procedure Free is + new Ada.Unchecked_Deallocation (Node, Node_Ptr); + + function Is_Valid (Iter : Iterator) return Boolean; + pragma Inline (Is_Valid); + -- Determine whether iterator Iter refers to a valid key-value pair + + function Is_Valid (Nod : Node_Ptr; Head : Node_Ptr) return Boolean; + pragma Inline (Is_Valid); + -- Determine whether node Nod is non-null and does not refer to dummy + -- head Head, thus making it valid. + + function Load_Factor (T : Instance) return Threshold_Type; + pragma Inline (Load_Factor); + -- Calculate the load factor of hash table T + + procedure Lock (T : Instance); + pragma Inline (Lock); + -- Lock all mutation functionality of hash table T + + procedure Mutate_And_Rehash (T : Instance; Size : Bucket_Range_Type); + pragma Inline (Mutate_And_Rehash); + -- Replace the buckets of hash table T with a new set of buckets of size + -- Size. Rehash all key-value pairs from the old to the new buckets. + + procedure Prepend (Nod : Node_Ptr; Head : Node_Ptr); + pragma Inline (Prepend); + -- Insert node Nod immediately after dummy head Head + + procedure Unlock (T : Instance); + pragma Inline (Unlock); + -- Unlock all mutation functionality of hash table T + + ------------ + -- Create -- + ------------ + + function Create (Initial_Size : Bucket_Range_Type) return Instance is + Size : constant Bucket_Range_Type := + Bucket_Range_Type'Max (Initial_Size, Minimum_Size); + -- Ensure that the buckets meet a minimum size + + T : constant Instance := new Hash_Table; + + begin + T.Buckets := new Bucket_Table (0 .. Size - 1); + T.Initial_Size := Size; + + return T; + end Create; + + ------------ + -- Delete -- + ------------ + + procedure Delete (T : Instance; Key : Key_Type) is + procedure Compress; + pragma Inline (Compress); + -- Determine whether hash table T requires compression, and if so, + -- half its size. + + -------------- + -- Compress -- + -------------- + + procedure Compress is + pragma Assert (T /= null); + pragma Assert (T.Buckets /= null); + + Old_Size : constant Bucket_Range_Type := T.Buckets'Length; + + begin + -- The ratio of pairs to buckets is under the desited threshold. + -- Compress the hash table only when there is still room to do so. + + if Load_Factor (T) < Compression_Threshold + and then Old_Size >= Safe_Compression_Size + then + Mutate_And_Rehash (T, Old_Size / Compression_Factor); + end if; + end Compress; + + -- Local variables + + Head : Node_Ptr; + Nod : Node_Ptr; + + -- Start of processing for Delete + + begin + Ensure_Created (T); + Ensure_Unlocked (T); + + -- Obtain the dummy head of the bucket which should house the + -- key-value pair. + + Head := Find_Bucket (T.Buckets, Key); + + -- Try to find a node in the bucket which matches the key + + Nod := Find_Node (Head, Key); + + -- If such a node exists, remove it from the bucket and deallocate it + + if Is_Valid (Nod, Head) then + Detach (Nod); + Free (Nod); + + T.Pairs := T.Pairs - 1; + + -- Compress the hash table if the load factor drops below + -- Compression_Threshold. + + Compress; + end if; + end Delete; + + ------------- + -- Destroy -- + ------------- + + procedure Destroy (T : in out Instance) is + begin + Ensure_Created (T); + Ensure_Unlocked (T); + + -- Destroy all nodes in all buckets + + Destroy_Buckets (T.Buckets); + Free (T.Buckets); + Free (T); + end Destroy; + + --------------------- + -- Destroy_Buckets -- + --------------------- + + procedure Destroy_Buckets (Bkts : Bucket_Table_Ptr) is + procedure Destroy_Bucket (Head : Node_Ptr); + pragma Inline (Destroy_Bucket); + -- Destroy all nodes in a bucket with dummy head Head + + -------------------- + -- Destroy_Bucket -- + -------------------- + + procedure Destroy_Bucket (Head : Node_Ptr) is + Nod : Node_Ptr; + + begin + -- Destroy all valid nodes which follow the dummy head + + while Is_Valid (Head.Next, Head) loop + Nod := Head.Next; + + Detach (Nod); + Free (Nod); + end loop; + end Destroy_Bucket; + + -- Start of processing for Destroy_Buckets + + begin + pragma Assert (Bkts /= null); + + for Scan_Idx in Bkts'Range loop + Destroy_Bucket (Bkts (Scan_Idx)'Access); + end loop; + end Destroy_Buckets; + + ------------ + -- Detach -- + ------------ + + procedure Detach (Nod : Node_Ptr) is + pragma Assert (Nod /= null); + + Next : constant Node_Ptr := Nod.Next; + Prev : constant Node_Ptr := Nod.Prev; + + begin + pragma Assert (Next /= null); + pragma Assert (Prev /= null); + + Prev.Next := Next; + Next.Prev := Prev; + + Nod.Next := null; + Nod.Prev := null; + end Detach; + + --------------------- + -- Ensure_Circular -- + --------------------- + + procedure Ensure_Circular (Head : Node_Ptr) is + pragma Assert (Head /= null); + + begin + if Head.Next = null and then Head.Prev = null then + Head.Next := Head; + Head.Prev := Head; + end if; + end Ensure_Circular; + + -------------------- + -- Ensure_Created -- + -------------------- + + procedure Ensure_Created (T : Instance) is + begin + if T = null then + raise Not_Created; + end if; + end Ensure_Created; + + --------------------- + -- Ensure_Unlocked -- + --------------------- + + procedure Ensure_Unlocked (T : Instance) is + begin + pragma Assert (T /= null); + + -- The hash table has at least one outstanding iterator + + if T.Locked > 0 then + raise Table_Locked; + end if; + end Ensure_Unlocked; + + ----------------- + -- Find_Bucket -- + ----------------- + + function Find_Bucket + (Bkts : Bucket_Table_Ptr; + Key : Key_Type) return Node_Ptr + is + pragma Assert (Bkts /= null); + + Idx : constant Bucket_Range_Type := Hash (Key) mod Bkts'Length; + + begin + return Bkts (Idx)'Access; + end Find_Bucket; + + --------------- + -- Find_Node -- + --------------- + + function Find_Node (Head : Node_Ptr; Key : Key_Type) return Node_Ptr is + pragma Assert (Head /= null); + + Nod : Node_Ptr; + + begin + -- Traverse the nodes of the bucket, looking for a key-value pair + -- with the same key. + + Nod := Head.Next; + while Is_Valid (Nod, Head) loop + if Equivalent_Keys (Nod.Key, Key) then + return Nod; + end if; + + Nod := Nod.Next; + end loop; + + return null; + end Find_Node; + + ---------------------- + -- First_Valid_Node -- + ---------------------- + + procedure First_Valid_Node + (T : Instance; + Low_Bkt : Bucket_Range_Type; + High_Bkt : Bucket_Range_Type; + Idx : out Bucket_Range_Type; + Nod : out Node_Ptr) + is + Head : Node_Ptr; + + begin + pragma Assert (T /= null); + pragma Assert (T.Buckets /= null); + + -- Assume that no valid node exists + + Idx := 0; + Nod := null; + + -- Examine the buckets of the hash table within the requested range, + -- looking for the first valid node. + + for Scan_Idx in Low_Bkt .. High_Bkt loop + Head := T.Buckets (Scan_Idx)'Access; + + -- The bucket contains at least one valid node, return the first + -- such node. + + if Is_Valid (Head.Next, Head) then + Idx := Scan_Idx; + Nod := Head.Next; + return; + end if; + end loop; + end First_Valid_Node; + + --------- + -- Get -- + --------- + + function Get (T : Instance; Key : Key_Type) return Value_Type is + Head : Node_Ptr; + Nod : Node_Ptr; + + begin + Ensure_Created (T); + + -- Obtain the dummy head of the bucket which should house the + -- key-value pair. + + Head := Find_Bucket (T.Buckets, Key); + + -- Try to find a node in the bucket which matches the key + + Nod := Find_Node (Head, Key); + + -- If such a node exists, return the value of the key-value pair + + if Is_Valid (Nod, Head) then + return Nod.Value; + end if; + + return No_Value; + end Get; + + -------------- + -- Has_Next -- + -------------- + + function Has_Next (Iter : Iterator) return Boolean is + Is_OK : constant Boolean := Is_Valid (Iter); + T : constant Instance := Iter.Table; + + begin + pragma Assert (T /= null); + + -- The iterator is no longer valid which indicates that it has been + -- exhausted. Unlock all mutation functionality of the hash table + -- because the iterator cannot be advanced any further. + + if not Is_OK then + Unlock (T); + end if; + + return Is_OK; + end Has_Next; + + -------------- + -- Is_Valid -- + -------------- + + function Is_Valid (Iter : Iterator) return Boolean is + begin + -- The invariant of Iterate and Next ensures that the iterator always + -- refers to a valid node if there exists one. + + return Iter.Nod /= null; + end Is_Valid; + + -------------- + -- Is_Valid -- + -------------- + + function Is_Valid (Nod : Node_Ptr; Head : Node_Ptr) return Boolean is + begin + -- A node is valid if it is non-null, and does not refer to the dummy + -- head of some bucket. + + return Nod /= null and then Nod /= Head; + end Is_Valid; + + ------------- + -- Iterate -- + ------------- + + function Iterate (T : Instance) return Iterator is + Iter : Iterator; + + begin + Ensure_Created (T); + pragma Assert (T.Buckets /= null); + + -- Initialize the iterator to reference the first valid node in + -- the full range of hash table buckets. If no such node exists, + -- the iterator is left in a state which does not allow it to + -- advance. + + First_Valid_Node + (T => T, + Low_Bkt => T.Buckets'First, + High_Bkt => T.Buckets'Last, + Idx => Iter.Idx, + Nod => Iter.Nod); + + -- Associate the iterator with the hash table to allow for future + -- mutation functionality unlocking. + + Iter.Table := T; + + -- Lock all mutation functionality of the hash table while it is + -- being iterated on. + + Lock (T); + + return Iter; + end Iterate; + + ----------------- + -- Load_Factor -- + ----------------- + + function Load_Factor (T : Instance) return Threshold_Type is + pragma Assert (T /= null); + pragma Assert (T.Buckets /= null); + + begin + -- The load factor is the ratio of key-value pairs to buckets + + return Threshold_Type (T.Pairs) / Threshold_Type (T.Buckets'Length); + end Load_Factor; + + ---------- + -- Lock -- + ---------- + + procedure Lock (T : Instance) is + begin + -- The hash table may be locked multiple times if multiple iterators + -- are operating over it. + + T.Locked := T.Locked + 1; + end Lock; + + ----------------------- + -- Mutate_And_Rehash -- + ----------------------- + + procedure Mutate_And_Rehash (T : Instance; Size : Bucket_Range_Type) is + procedure Rehash (From : Bucket_Table_Ptr; To : Bucket_Table_Ptr); + pragma Inline (Rehash); + -- Remove all nodes from buckets From and rehash them into buckets To + + procedure Rehash_Bucket (Head : Node_Ptr; To : Bucket_Table_Ptr); + pragma Inline (Rehash_Bucket); + -- Detach all nodes starting from dummy head Head and rehash them + -- into To. + + procedure Rehash_Node (Nod : Node_Ptr; To : Bucket_Table_Ptr); + pragma Inline (Rehash_Node); + -- Rehash node Nod into To + + ------------ + -- Rehash -- + ------------ + + procedure Rehash (From : Bucket_Table_Ptr; To : Bucket_Table_Ptr) is + begin + pragma Assert (From /= null); + pragma Assert (To /= null); + + for Scan_Idx in From'Range loop + Rehash_Bucket (From (Scan_Idx)'Access, To); + end loop; + end Rehash; + + ------------------- + -- Rehash_Bucket -- + ------------------- + + procedure Rehash_Bucket (Head : Node_Ptr; To : Bucket_Table_Ptr) is + pragma Assert (Head /= null); + + Nod : Node_Ptr; + + begin + -- Detach all nodes which follow the dummy head + + while Is_Valid (Head.Next, Head) loop + Nod := Head.Next; + + Detach (Nod); + Rehash_Node (Nod, To); + end loop; + end Rehash_Bucket; + + ----------------- + -- Rehash_Node -- + ----------------- + + procedure Rehash_Node (Nod : Node_Ptr; To : Bucket_Table_Ptr) is + pragma Assert (Nod /= null); + + Head : Node_Ptr; + + begin + -- Obtain the dummy head of the bucket which should house the + -- key-value pair. + + Head := Find_Bucket (To, Nod.Key); + + -- Ensure that the dummy head of an empty bucket is circular with + -- respect to itself. + + Ensure_Circular (Head); + + -- Prepend the node to the bucket + + Prepend (Nod, Head); + end Rehash_Node; + + -- Local declarations + + Old_Bkts : Bucket_Table_Ptr; + + -- Start of processing for Mutate_And_Rehash + + begin + pragma Assert (T /= null); + + Old_Bkts := T.Buckets; + T.Buckets := new Bucket_Table (0 .. Size - 1); + + -- Transfer and rehash all key-value pairs from the old buckets to + -- the new buckets. + + Rehash (From => Old_Bkts, To => T.Buckets); + Free (Old_Bkts); + end Mutate_And_Rehash; + + ---------- + -- Next -- + ---------- + + procedure Next (Iter : in out Iterator; Key : out Key_Type) is + Is_OK : constant Boolean := Is_Valid (Iter); + Saved : constant Node_Ptr := Iter.Nod; + T : constant Instance := Iter.Table; + Head : Node_Ptr; + + begin + pragma Assert (T /= null); + pragma Assert (T.Buckets /= null); + + -- The iterator is no longer valid which indicates that it has been + -- exhausted. Unlock all mutation functionality of the hash table as + -- the iterator cannot be advanced any further. + + if not Is_OK then + Unlock (T); + raise Iterator_Exhausted; + end if; + + -- Advance to the next node along the same bucket + + Iter.Nod := Iter.Nod.Next; + Head := T.Buckets (Iter.Idx)'Access; + + -- If the new node is no longer valid, then this indicates that the + -- current bucket has been exhausted. Advance to the next valid node + -- within the remaining range of buckets. If no such node exists, the + -- iterator is left in a state which does not allow it to advance. + + if not Is_Valid (Iter.Nod, Head) then + First_Valid_Node + (T => T, + Low_Bkt => Iter.Idx + 1, + High_Bkt => T.Buckets'Last, + Idx => Iter.Idx, + Nod => Iter.Nod); + end if; + + Key := Saved.Key; + end Next; + + ------------- + -- Prepend -- + ------------- + + procedure Prepend (Nod : Node_Ptr; Head : Node_Ptr) is + pragma Assert (Nod /= null); + pragma Assert (Head /= null); + + Next : constant Node_Ptr := Head.Next; + + begin + Head.Next := Nod; + Next.Prev := Nod; + + Nod.Next := Next; + Nod.Prev := Head; + end Prepend; + + --------- + -- Put -- + --------- + + procedure Put + (T : Instance; + Key : Key_Type; + Value : Value_Type) + is + procedure Expand; + pragma Inline (Expand); + -- Determine whether hash table T requires expansion, and if so, + -- double its size. + + procedure Prepend_Or_Replace (Head : Node_Ptr); + pragma Inline (Prepend_Or_Replace); + -- Update the value of a node within a bucket with dummy head Head + -- whose key is Key to Value. If there is no such node, prepend a new + -- key-value pair to the bucket. + + ------------ + -- Expand -- + ------------ + + procedure Expand is + pragma Assert (T /= null); + pragma Assert (T.Buckets /= null); + + Old_Size : constant Bucket_Range_Type := T.Buckets'Length; + + begin + -- The ratio of pairs to buckets is over the desited threshold. + -- Expand the hash table only when there is still room to do so. + + if Load_Factor (T) > Expansion_Threshold + and then Old_Size <= Safe_Expansion_Size + then + Mutate_And_Rehash (T, Old_Size * Expansion_Factor); + end if; + end Expand; + + ------------------------ + -- Prepend_Or_Replace -- + ------------------------ + + procedure Prepend_Or_Replace (Head : Node_Ptr) is + pragma Assert (Head /= null); + + Nod : Node_Ptr; + + begin + -- If the bucket containst at least one valid node, then there is + -- a chance that a node with the same key as Key exists. If this + -- is the case, the value of that node must be updated. + + Nod := Head.Next; + while Is_Valid (Nod, Head) loop + if Equivalent_Keys (Nod.Key, Key) then + Nod.Value := Value; + return; + end if; + + Nod := Nod.Next; + end loop; + + -- At this point the bucket is either empty, or none of the nodes + -- match key Key. Prepend a new key-value pair. + + Nod := new Node'(Key, Value, null, null); + + Prepend (Nod, Head); + end Prepend_Or_Replace; + + -- Local variables + + Head : Node_Ptr; + + -- Start of processing for Put + + begin + Ensure_Created (T); + Ensure_Unlocked (T); + + -- Obtain the dummy head of the bucket which should house the + -- key-value pair. + + Head := Find_Bucket (T.Buckets, Key); + + -- Ensure that the dummy head of an empty bucket is circular with + -- respect to itself. + + Ensure_Circular (Head); + + -- In case the bucket already contains a node with the same key, + -- replace its value, otherwise prepend a new key-value pair. + + Prepend_Or_Replace (Head); + + T.Pairs := T.Pairs + 1; + + -- Expand the hash table if the ratio of pairs to buckets goes over + -- Expansion_Threshold. + + Expand; + end Put; + + ----------- + -- Reset -- + ----------- + + procedure Reset (T : Instance) is + begin + Ensure_Created (T); + Ensure_Unlocked (T); + + -- Destroy all nodes in all buckets + + Destroy_Buckets (T.Buckets); + Free (T.Buckets); + + -- Recreate the buckets using the original size from creation time + + T.Buckets := new Bucket_Table (0 .. T.Initial_Size - 1); + T.Pairs := 0; + end Reset; + + ---------- + -- Size -- + ---------- + + function Size (T : Instance) return Pair_Count_Type is + begin + Ensure_Created (T); + + return T.Pairs; + end Size; + + ------------ + -- Unlock -- + ------------ + + procedure Unlock (T : Instance) is + begin + -- The hash table may be locked multiple times if multiple iterators + -- are operating over it. + + T.Locked := T.Locked - 1; + end Unlock; + end Dynamic_HTable; + end GNAT.Dynamic_HTables; diff --git a/gcc/ada/libgnat/g-dynhta.ads b/gcc/ada/libgnat/g-dynhta.ads index ea331c0..41574fd 100644 --- a/gcc/ada/libgnat/g-dynhta.ads +++ b/gcc/ada/libgnat/g-dynhta.ads @@ -31,13 +31,11 @@ -- Hash table searching routines --- This package contains three separate packages. The Simple_HTable package +-- This package contains two separate packages. The Simple_HTable package -- provides a very simple abstraction that associates one element to one key -- value and takes care of all allocations automatically using the heap. The -- Static_HTable package provides a more complex interface that allows full --- control over allocation. The Load_Factor_HTable package provides a more --- complex abstraction where collisions are resolved by chaining, and the --- table grows by a percentage after the load factor has been exceeded. +-- control over allocation. -- This package provides a facility similar to that of GNAT.HTable, except -- that this package declares types that can be used to define dynamic @@ -48,6 +46,8 @@ -- GNAT.HTable to keep as much coherency as possible between these two -- related units. +pragma Compiler_Unit_Warning; + package GNAT.Dynamic_HTables is ------------------- @@ -85,40 +85,38 @@ package GNAT.Dynamic_HTables is Null_Ptr : Elmt_Ptr; -- The null value of the Elmt_Ptr type + with function Next (E : Elmt_Ptr) return Elmt_Ptr; with procedure Set_Next (E : Elmt_Ptr; Next : Elmt_Ptr); - with function Next (E : Elmt_Ptr) return Elmt_Ptr; -- The type must provide an internal link for the sake of the -- staticness of the HTable. type Key is limited private; with function Get_Key (E : Elmt_Ptr) return Key; - with function Hash (F : Key) return Header_Num; - with function Equal (F1, F2 : Key) return Boolean; + with function Hash (F : Key) return Header_Num; + with function Equal (F1 : Key; F2 : Key) return Boolean; package Static_HTable is - type Instance is private; Nil : constant Instance; procedure Reset (T : in out Instance); - -- Resets the hash table by releasing all memory associated with - -- it. The hash table can safely be reused after this call. For the - -- most common case where Elmt_Ptr is an access type, and Null_Ptr is - -- null, this is only needed if the same table is reused in a new - -- context. If Elmt_Ptr is other than an access type, or Null_Ptr is - -- other than null, then Reset must be called before the first use of - -- the hash table. + -- Resets the hash table by releasing all memory associated with it. The + -- hash table can safely be reused after this call. For the most common + -- case where Elmt_Ptr is an access type, and Null_Ptr is null, this is + -- only needed if the same table is reused in a new context. If Elmt_Ptr + -- is other than an access type, or Null_Ptr is other than null, then + -- Reset must be called before the first use of the hash table. procedure Set (T : in out Instance; E : Elmt_Ptr); -- Insert the element pointer in the HTable function Get (T : Instance; K : Key) return Elmt_Ptr; - -- Returns the latest inserted element pointer with the given Key - -- or null if none. + -- Returns the latest inserted element pointer with the given Key or + -- null if none. procedure Remove (T : Instance; K : Key); - -- Removes the latest inserted element pointer associated with the - -- given key if any, does nothing if none. + -- Removes the latest inserted element pointer associated with the given + -- key if any, does nothing if none. function Get_First (T : Instance) return Elmt_Ptr; -- Returns Null_Ptr if the Htable is empty, otherwise returns one @@ -126,11 +124,11 @@ package GNAT.Dynamic_HTables is -- function will return the same element. function Get_Next (T : Instance) return Elmt_Ptr; - -- Returns an unspecified element that has not been returned by the - -- same function since the last call to Get_First or Null_Ptr if - -- there is no such element or Get_First has never been called. If - -- there is no call to 'Set' in between Get_Next calls, all the - -- elements of the Htable will be traversed. + -- Returns an unspecified element that has not been returned by the same + -- function since the last call to Get_First or Null_Ptr if there is no + -- such element or Get_First has never been called. If there is no call + -- to 'Set' in between Get_Next calls, all the elements of the Htable + -- will be traversed. private type Table_Type is array (Header_Num) of Elmt_Ptr; @@ -169,11 +167,10 @@ package GNAT.Dynamic_HTables is -- a given key type Key is private; - with function Hash (F : Key) return Header_Num; - with function Equal (F1, F2 : Key) return Boolean; + with function Hash (F : Key) return Header_Num; + with function Equal (F1 : Key; F2 : Key) return Boolean; package Simple_HTable is - type Instance is private; Nil : constant Instance; @@ -233,7 +230,6 @@ package GNAT.Dynamic_HTables is -- same restrictions apply as Get_Next. private - type Element_Wrapper; type Elmt_Ptr is access all Element_Wrapper; type Element_Wrapper is record @@ -260,7 +256,263 @@ package GNAT.Dynamic_HTables is type Instance is new Tab.Instance; Nil : constant Instance := Instance (Tab.Nil); - end Simple_HTable; + -------------------- + -- Dynamic_HTable -- + -------------------- + + -- The following package offers a hash table abstraction with the following + -- characteristics: + -- + -- * Dynamic resizing based on load factor. + -- * Creation of multiple instances, of different sizes. + -- * Iterable keys. + -- + -- This type of hash table is best used in scenarios where the size of the + -- key set is not known. The dynamic resizing aspect allows for performance + -- to remain within reasonable bounds as the size of the key set grows. + -- + -- The following use pattern must be employed when operating this table: + -- + -- Table : Instance := Create (); + -- + -- + -- + -- Destroy (Table); + -- + -- The destruction of the table reclaims all storage occupied by it. + + -- The following type denotes the underlying range of the hash table + -- buckets. + + type Bucket_Range_Type is mod 2 ** 32; + + -- The following type denotes the multiplicative factor used in expansion + -- and compression of the hash table. + + subtype Factor_Type is Bucket_Range_Type range 2 .. 100; + + -- The following type denotes the number of key-value pairs stored in the + -- hash table. + + type Pair_Count_Type is range 0 .. 2 ** 31 - 1; + + -- The following type denotes the threshold range used in expansion and + -- compression of the hash table. + + subtype Threshold_Type is Long_Float range 0.0 .. Long_Float'Last; + + generic + type Key_Type is private; + type Value_Type is private; + -- The types of the key-value pairs stored in the hash table + + No_Value : Value_Type; + -- An indicator for a non-existent value + + Expansion_Threshold : Threshold_Type; + Expansion_Factor : Factor_Type; + -- Once the load factor goes over Expansion_Threshold, the size of the + -- buckets is increased using the formula + -- + -- New_Size = Old_Size * Expansion_Factor + -- + -- An Expansion_Threshold of 1.5 and Expansion_Factor of 2 indicate that + -- the size of the buckets will be doubled once the load factor exceeds + -- 1.5. + + Compression_Threshold : Threshold_Type; + Compression_Factor : Factor_Type; + -- Once the load factor drops below Compression_Threshold, the size of + -- the buckets is decreased using the formula + -- + -- New_Size = Old_Size / Compression_Factor + -- + -- A Compression_Threshold of 0.5 and Compression_Factor of 2 indicate + -- that the size of the buckets will be halved once the load factor + -- drops below 0.5. + + with function Equivalent_Keys + (Left : Key_Type; + Right : Key_Type) return Boolean; + -- Determine whether two keys are equivalent + + with function Hash (Key : Key_Type) return Bucket_Range_Type; + -- Map an arbitrary key into the range of buckets + + package Dynamic_HTable is + + ---------------------- + -- Table operations -- + ---------------------- + + -- The following type denotes a hash table handle. Each instance must be + -- created using routine Create. + + type Instance is private; + Nil : constant Instance; + + Not_Created : exception; + -- This exception is raised when the hash table has not been created by + -- routine Create, and an attempt is made to read or mutate its state. + + Table_Locked : exception; + -- This exception is raised when the hash table is being iterated on, + -- and an attempt is made to mutate its state. + + function Create (Initial_Size : Bucket_Range_Type) return Instance; + -- Create a new table with bucket capacity Initial_Size. This routine + -- must be called at the start of a hash table's lifetime. + + procedure Delete (T : Instance; Key : Key_Type); + -- Delete the value which corresponds to key Key from hash table T. The + -- routine has no effect if the value is not present in the hash table. + -- This action will raise Table_Locked if the hash table has outstanding + -- iterators. If the load factor drops below Compression_Threshold, the + -- size of the buckets is decreased by Copression_Factor. + + procedure Destroy (T : in out Instance); + -- Destroy the contents of hash table T, rendering it unusable. This + -- routine must be called at the end of a hash table's lifetime. This + -- action will raise Table_Locked if the hash table has outstanding + -- iterators. + + function Get (T : Instance; Key : Key_Type) return Value_Type; + -- Obtain the value which corresponds to key Key from hash table T. If + -- the value does not exist, return No_Value. + + procedure Put + (T : Instance; + Key : Key_Type; + Value : Value_Type); + -- Associate value Value with key Key in hash table T. If the table + -- already contains a mapping of the same key to a previous value, the + -- previous value is overwritten. This action will raise Table_Locked + -- if the hash table has outstanding iterators. If the load factor goes + -- over Expansion_Threshold, the size of the buckets is increased by + -- Expansion_Factor. + + procedure Reset (T : Instance); + -- Destroy the contents of hash table T, and reset it to its initial + -- created state. This action will raise Table_Locked if the hash table + -- has outstanding iterators. + + function Size (T : Instance) return Pair_Count_Type; + -- Obtain the number of key-value pairs in hash table T + + ------------------------- + -- Iterator operations -- + ------------------------- + + -- The following type represents a key iterator. An iterator locks + -- all mutation operations, and unlocks them once it is exhausted. + -- The iterator must be used with the following pattern: + -- + -- Iter := Iterate (My_Table); + -- while Has_Next (Iter) loop + -- Key := Next (Iter); + -- . . . + -- end loop; + -- + -- It is possible to advance the iterator by using Next only, however + -- this risks raising Iterator_Exhausted. + + type Iterator is private; + + Iterator_Exhausted : exception; + -- This exception is raised when an iterator is exhausted and further + -- attempts to advance it are made by calling routine Next. + + function Iterate (T : Instance) return Iterator; + -- Obtain an iterator over the keys of hash table T. This action locks + -- all mutation functionality of the associated hash table. + + function Has_Next (Iter : Iterator) return Boolean; + -- Determine whether iterator Iter has more keys to examine. If the + -- iterator has been exhausted, restore all mutation functionality of + -- the associated hash table. + + procedure Next + (Iter : in out Iterator; + Key : out Key_Type); + -- Return the current key referenced by iterator Iter and advance to + -- the next available key. If the iterator has been exhausted and + -- further attempts are made to advance it, this routine restores + -- mutation functionality of the associated hash table, and then + -- raises Iterator_Exhausted. + + private + -- The following type represents a doubly linked list node used to + -- store a key-value pair. There are several reasons to use a doubly + -- linked list: + -- + -- * Most read and write operations utilize the same primitve + -- routines to locate, create, and delete a node, allowing for + -- greater degree of code sharing. + -- + -- * Special cases are eliminated by maintaining a circular node + -- list with a dummy head (see type Bucket_Table). + -- + -- A node is said to be "valid" if it is non-null, and does not refer to + -- the dummy head of some bucket. + + type Node; + type Node_Ptr is access all Node; + type Node is record + Key : Key_Type; + Value : Value_Type := No_Value; + -- Key-value pair stored in a bucket + + Prev : Node_Ptr := null; + Next : Node_Ptr := null; + end record; + + -- The following type represents a bucket table. Each bucket contains a + -- circular doubly linked list of nodes with a dummy head. Initially, + -- the head does not refer to itself. This is intentional because it + -- improves the performance of creation, compression, and expansion by + -- avoiding a separate pass to link a head to itself. Several routines + -- ensure that the head is properly formed. + + type Bucket_Table is array (Bucket_Range_Type range <>) of aliased Node; + type Bucket_Table_Ptr is access Bucket_Table; + + -- The following type represents a hash table + + type Hash_Table is record + Buckets : Bucket_Table_Ptr := null; + -- Reference to the compressing / expanding buckets + + Initial_Size : Bucket_Range_Type := 0; + -- The initial size of the buckets as specified at creation time + + Locked : Natural := 0; + -- Number of outstanding iterators + + Pairs : Pair_Count_Type := 0; + -- Number of key-value pairs in the buckets + end record; + + type Instance is access Hash_Table; + Nil : constant Instance := null; + + -- The following type represents a key iterator + + type Iterator is record + Idx : Bucket_Range_Type := 0; + -- Index of the current bucket being examined. This index is always + -- kept within the range of the buckets. + + Nod : Node_Ptr := null; + -- Reference to the current node being examined within the current + -- bucket. The invariant of the iterator requires that this field + -- always point to a valid node. A value of null indicates that the + -- iterator is exhausted. + + Table : Instance := null; + -- Reference to the associated hash table + end record; + end Dynamic_HTable; + end GNAT.Dynamic_HTables; -- cgit v1.1