diff options
Diffstat (limited to 'gcc/ada/s-regexp.adb')
-rw-r--r-- | gcc/ada/s-regexp.adb | 1729 |
1 files changed, 0 insertions, 1729 deletions
diff --git a/gcc/ada/s-regexp.adb b/gcc/ada/s-regexp.adb deleted file mode 100644 index 58a63a2..0000000 --- a/gcc/ada/s-regexp.adb +++ /dev/null @@ -1,1729 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT COMPILER COMPONENTS -- --- -- --- S Y S T E M . R E G E X P -- --- -- --- B o d y -- --- -- --- Copyright (C) 1999-2017, AdaCore -- --- -- --- GNAT is free software; you can redistribute it and/or modify it under -- --- terms of the GNU General Public License as published by the Free Soft- -- --- ware Foundation; either version 3, or (at your option) any later ver- -- --- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- --- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- --- or FITNESS FOR A PARTICULAR PURPOSE. -- --- -- --- As a special exception under Section 7 of GPL version 3, you are granted -- --- additional permissions described in the GCC Runtime Library Exception, -- --- version 3.1, as published by the Free Software Foundation. -- --- -- --- You should have received a copy of the GNU General Public License and -- --- a copy of the GCC Runtime Library Exception along with this program; -- --- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- --- <http://www.gnu.org/licenses/>. -- --- -- --- GNAT was originally developed by the GNAT team at New York University. -- --- Extensive contributions were provided by Ada Core Technologies Inc. -- --- -- ------------------------------------------------------------------------------- - -with Ada.Unchecked_Deallocation; -with System.Case_Util; - -package body System.Regexp is - - Initial_Max_States_In_Primary_Table : constant := 100; - -- Initial size for the number of states in the indefinite state - -- machine. The number of states will be increased as needed. - -- - -- This is also used as the maximal number of meta states (groups of - -- states) in the secondary table. - - Open_Paren : constant Character := '('; - Close_Paren : constant Character := ')'; - Open_Bracket : constant Character := '['; - Close_Bracket : constant Character := ']'; - - type State_Index is new Natural; - type Column_Index is new Natural; - - type Regexp_Array is array - (State_Index range <>, Column_Index range <>) of State_Index; - -- First index is for the state number. Second index is for the character - -- type. Contents is the new State. - - type Regexp_Array_Access is access Regexp_Array; - -- Use this type through the functions Set below, so that it can grow - -- dynamically depending on the needs. - - type Mapping is array (Character'Range) of Column_Index; - -- Mapping between characters and column in the Regexp_Array - - type Boolean_Array is array (State_Index range <>) of Boolean; - - type Regexp_Value - (Alphabet_Size : Column_Index; - Num_States : State_Index) is - record - Map : Mapping; - Case_Sensitive : Boolean; - States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size); - Is_Final : Boolean_Array (1 .. Num_States); - end record; - -- Deterministic finite-state machine - - ----------------------- - -- Local Subprograms -- - ----------------------- - - procedure Set - (Table : in out Regexp_Array_Access; - State : State_Index; - Column : Column_Index; - Value : State_Index); - -- Sets a value in the table. If the table is too small, reallocate it - -- dynamically so that (State, Column) is a valid index in it. - - function Get - (Table : Regexp_Array_Access; - State : State_Index; - Column : Column_Index) return State_Index; - -- Returns the value in the table at (State, Column). If this index does - -- not exist in the table, returns zero. - - procedure Free is new Ada.Unchecked_Deallocation - (Regexp_Array, Regexp_Array_Access); - - ------------ - -- Adjust -- - ------------ - - procedure Adjust (R : in out Regexp) is - Tmp : Regexp_Access; - begin - if R.R /= null then - Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size, - Num_States => R.R.Num_States); - Tmp.all := R.R.all; - R.R := Tmp; - end if; - end Adjust; - - ------------- - -- Compile -- - ------------- - - function Compile - (Pattern : String; - Glob : Boolean := False; - Case_Sensitive : Boolean := True) return Regexp - is - S : String := Pattern; - -- The pattern which is really compiled (when the pattern is case - -- insensitive, we convert this string to lower-cases - - Map : Mapping := (others => 0); - -- Mapping between characters and columns in the tables - - Alphabet_Size : Column_Index := 0; - -- Number of significant characters in the regular expression. - -- This total does not include special operators, such as *, (, ... - - procedure Check_Well_Formed_Pattern; - -- Check that the pattern to compile is well-formed, so that subsequent - -- code can rely on this without performing each time the checks to - -- avoid accessing the pattern outside its bounds. However, not all - -- well-formedness rules are checked. In particular, rules about special - -- characters not being treated as regular characters are not checked. - - procedure Create_Mapping; - -- Creates a mapping between characters in the regexp and columns - -- in the tables representing the regexp. Test that the regexp is - -- well-formed Modifies Alphabet_Size and Map - - procedure Create_Primary_Table - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index); - -- Creates the first version of the regexp (this is a non deterministic - -- finite state machine, which is unadapted for a fast pattern - -- matching algorithm). We use a recursive algorithm to process the - -- parenthesis sub-expressions. - -- - -- Table : at the end of the procedure : Column 0 is for any character - -- ('.') and the last columns are for no character (closure). Num_States - -- is set to the number of states in the table Start_State is the number - -- of the starting state in the regexp End_State is the number of the - -- final state when the regexp matches. - - procedure Create_Primary_Table_Glob - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index); - -- Same function as above, but it deals with the second possible - -- grammar for 'globbing pattern', which is a kind of subset of the - -- whole regular expression grammar. - - function Create_Secondary_Table - (First_Table : Regexp_Array_Access; - Start_State : State_Index; - End_State : State_Index) return Regexp; - -- Creates the definitive table representing the regular expression - -- This is actually a transformation of the primary table First_Table, - -- where every state is grouped with the states in its 'no-character' - -- columns. The transitions between the new states are then recalculated - -- and if necessary some new states are created. - -- - -- Note that the resulting finite-state machine is not optimized in - -- terms of the number of states : it would be more time-consuming to - -- add a third pass to reduce the number of states in the machine, with - -- no speed improvement... - - procedure Raise_Exception (M : String; Index : Integer); - pragma No_Return (Raise_Exception); - -- Raise an exception, indicating an error at character Index in S - - ------------------------------- - -- Check_Well_Formed_Pattern -- - ------------------------------- - - procedure Check_Well_Formed_Pattern is - J : Integer; - - Past_Elmt : Boolean := False; - -- Set to True everywhere an elmt has been parsed, if Glob=False, - -- meaning there can be now an occurrence of '*', '+' and '?'. - - Past_Term : Boolean := False; - -- Set to True everywhere a term has been parsed, if Glob=False, - -- meaning there can be now an occurrence of '|'. - - Parenthesis_Level : Integer := 0; - Curly_Level : Integer := 0; - - Last_Open : Integer := S'First - 1; - -- The last occurrence of an opening parenthesis, if Glob=False, - -- or the last occurrence of an opening curly brace, if Glob=True. - - procedure Raise_Exception_If_No_More_Chars (K : Integer := 0); - -- If no more characters are raised, call Raise_Exception - - -------------------------------------- - -- Raise_Exception_If_No_More_Chars -- - -------------------------------------- - - procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is - begin - if J + K > S'Last then - Raise_Exception ("Ill-formed pattern while parsing", J); - end if; - end Raise_Exception_If_No_More_Chars; - - -- Start of processing for Check_Well_Formed_Pattern - - begin - J := S'First; - while J <= S'Last loop - case S (J) is - when Open_Bracket => - J := J + 1; - Raise_Exception_If_No_More_Chars; - - if not Glob then - if S (J) = '^' then - J := J + 1; - Raise_Exception_If_No_More_Chars; - end if; - end if; - - -- The first character never has a special meaning - - if S (J) = ']' or else S (J) = '-' then - J := J + 1; - Raise_Exception_If_No_More_Chars; - end if; - - -- The set of characters cannot be empty - - if S (J) = ']' then - Raise_Exception - ("Set of characters cannot be empty in regular " - & "expression", J); - end if; - - declare - Possible_Range_Start : Boolean := True; - -- Set True everywhere a range character '-' can occur - - begin - loop - exit when S (J) = Close_Bracket; - - -- The current character should be followed by a - -- closing bracket. - - Raise_Exception_If_No_More_Chars (1); - - if S (J) = '-' - and then S (J + 1) /= Close_Bracket - then - if not Possible_Range_Start then - Raise_Exception - ("No mix of ranges is allowed in " - & "regular expression", J); - end if; - - J := J + 1; - Raise_Exception_If_No_More_Chars; - - -- Range cannot be followed by '-' character, - -- except as last character in the set. - - Possible_Range_Start := False; - - else - Possible_Range_Start := True; - end if; - - if S (J) = '\' then - J := J + 1; - Raise_Exception_If_No_More_Chars; - end if; - - J := J + 1; - end loop; - end; - - -- A closing bracket can end an elmt or term - - Past_Elmt := True; - Past_Term := True; - - when Close_Bracket => - - -- A close bracket must follow a open_bracket, and cannot be - -- found alone on the line. - - Raise_Exception - ("Incorrect character ']' in regular expression", J); - - when '\' => - if J < S'Last then - J := J + 1; - - -- Any character can be an elmt or a term - - Past_Elmt := True; - Past_Term := True; - - else - -- \ not allowed at the end of the regexp - - Raise_Exception - ("Incorrect character '\' in regular expression", J); - end if; - - when Open_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level + 1; - Last_Open := J; - - -- An open parenthesis does not end an elmt or term - - Past_Elmt := False; - Past_Term := False; - end if; - - when Close_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level - 1; - - if Parenthesis_Level < 0 then - Raise_Exception - ("')' is not associated with '(' in regular " - & "expression", J); - end if; - - if J = Last_Open + 1 then - Raise_Exception - ("Empty parentheses not allowed in regular " - & "expression", J); - end if; - - if not Past_Term then - Raise_Exception - ("Closing parenthesis not allowed here in regular " - & "expression", J); - end if; - - -- A closing parenthesis can end an elmt or term - - Past_Elmt := True; - Past_Term := True; - end if; - - when '{' => - if Glob then - Curly_Level := Curly_Level + 1; - Last_Open := J; - - else - -- Any character can be an elmt or a term - - Past_Elmt := True; - Past_Term := True; - end if; - - -- No need to check for ',' as the code always accepts them - - when '}' => - if Glob then - Curly_Level := Curly_Level - 1; - - if Curly_Level < 0 then - Raise_Exception - ("'}' is not associated with '{' in regular " - & "expression", J); - end if; - - if J = Last_Open + 1 then - Raise_Exception - ("Empty curly braces not allowed in regular " - & "expression", J); - end if; - - else - -- Any character can be an elmt or a term - - Past_Elmt := True; - Past_Term := True; - end if; - - when '*' | '?' | '+' => - if not Glob then - - -- These operators must apply to an elmt sub-expression, - -- and cannot be found if one has not just been parsed. - - if not Past_Elmt then - Raise_Exception - ("'*', '+' and '?' operators must be " - & "applied to an element in regular expression", J); - end if; - - Past_Elmt := False; - Past_Term := True; - end if; - - when '|' => - if not Glob then - - -- This operator must apply to a term sub-expression, - -- and cannot be found if one has not just been parsed. - - if not Past_Term then - Raise_Exception - ("'|' operator must be " - & "applied to a term in regular expression", J); - end if; - - Past_Elmt := False; - Past_Term := False; - end if; - - when others => - if not Glob then - - -- Any character can be an elmt or a term - - Past_Elmt := True; - Past_Term := True; - end if; - end case; - - J := J + 1; - end loop; - - -- A closing parenthesis must follow an open parenthesis - - if Parenthesis_Level /= 0 then - Raise_Exception - ("'(' must always be associated with a ')'", J); - end if; - - -- A closing curly brace must follow an open curly brace - - if Curly_Level /= 0 then - Raise_Exception - ("'{' must always be associated with a '}'", J); - end if; - end Check_Well_Formed_Pattern; - - -------------------- - -- Create_Mapping -- - -------------------- - - procedure Create_Mapping is - - procedure Add_In_Map (C : Character); - -- Add a character in the mapping, if it is not already defined - - ---------------- - -- Add_In_Map -- - ---------------- - - procedure Add_In_Map (C : Character) is - begin - if Map (C) = 0 then - Alphabet_Size := Alphabet_Size + 1; - Map (C) := Alphabet_Size; - end if; - end Add_In_Map; - - J : Integer := S'First; - Parenthesis_Level : Integer := 0; - Curly_Level : Integer := 0; - Last_Open : Integer := S'First - 1; - - -- Start of processing for Create_Mapping - - begin - while J <= S'Last loop - case S (J) is - when Open_Bracket => - J := J + 1; - - if S (J) = '^' then - J := J + 1; - end if; - - if S (J) = ']' or else S (J) = '-' then - J := J + 1; - end if; - - -- The first character never has a special meaning - - loop - if J > S'Last then - Raise_Exception - ("Ran out of characters while parsing ", J); - end if; - - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= Close_Bracket - then - declare - Start : constant Integer := J - 1; - - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Add_In_Map (Char); - end loop; - end; - else - if S (J) = '\' then - J := J + 1; - end if; - - Add_In_Map (S (J)); - end if; - - J := J + 1; - end loop; - - -- A close bracket must follow a open_bracket and cannot be - -- found alone on the line - - when Close_Bracket => - Raise_Exception - ("Incorrect character ']' in regular expression", J); - - when '\' => - if J < S'Last then - J := J + 1; - Add_In_Map (S (J)); - - else - -- Back slash \ not allowed at the end of the regexp - - Raise_Exception - ("Incorrect character '\' in regular expression", J); - end if; - - when Open_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level + 1; - Last_Open := J; - else - Add_In_Map (Open_Paren); - end if; - - when Close_Paren => - if not Glob then - Parenthesis_Level := Parenthesis_Level - 1; - - if Parenthesis_Level < 0 then - Raise_Exception - ("')' is not associated with '(' in regular " - & "expression", J); - end if; - - if J = Last_Open + 1 then - Raise_Exception - ("Empty parenthesis not allowed in regular " - & "expression", J); - end if; - - else - Add_In_Map (Close_Paren); - end if; - - when '.' => - if Glob then - Add_In_Map ('.'); - end if; - - when '{' => - if not Glob then - Add_In_Map (S (J)); - else - Curly_Level := Curly_Level + 1; - end if; - - when '}' => - if not Glob then - Add_In_Map (S (J)); - else - Curly_Level := Curly_Level - 1; - end if; - - when '*' | '?' => - if not Glob then - if J = S'First then - Raise_Exception - ("'*', '+', '?' and '|' operators cannot be in " - & "first position in regular expression", J); - end if; - end if; - - when '|' | '+' => - if not Glob then - if J = S'First then - - -- These operators must apply to a sub-expression, - -- and cannot be found at the beginning of the line - - Raise_Exception - ("'*', '+', '?' and '|' operators cannot be in " - & "first position in regular expression", J); - end if; - - else - Add_In_Map (S (J)); - end if; - - when others => - Add_In_Map (S (J)); - end case; - - J := J + 1; - end loop; - - -- A closing parenthesis must follow an open parenthesis - - if Parenthesis_Level /= 0 then - Raise_Exception - ("'(' must always be associated with a ')'", J); - end if; - - if Curly_Level /= 0 then - Raise_Exception - ("'{' must always be associated with a '}'", J); - end if; - end Create_Mapping; - - -------------------------- - -- Create_Primary_Table -- - -------------------------- - - procedure Create_Primary_Table - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index) - is - Empty_Char : constant Column_Index := Alphabet_Size + 1; - - Current_State : State_Index := 0; - -- Index of the last created state - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index); - -- Add a empty-character transition from State to To_State - - procedure Create_Repetition - (Repetition : Character; - Start_Prev : State_Index; - End_Prev : State_Index; - New_Start : out State_Index; - New_End : in out State_Index); - -- Create the table in case we have a '*', '+' or '?'. - -- Start_Prev .. End_Prev should indicate respectively the start and - -- end index of the previous expression, to which '*', '+' or '?' is - -- applied. - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index); - -- Fill the table for the regexp Simple. This is the recursive - -- procedure called to handle () expressions If End_State = 0, then - -- the call to Create_Simple creates an independent regexp, not a - -- concatenation Start_Index .. End_Index is the starting index in - -- the string S. - -- - -- Warning: it may look like we are creating too many empty-string - -- transitions, but they are needed to get the correct regexp. - -- The table is filled as follow ( s means start-state, e means - -- end-state) : - -- - -- regexp state_num | a b * empty_string - -- ------- ------------------------------ - -- a 1 (s) | 2 - - - - -- 2 (e) | - - - - - -- - -- ab 1 (s) | 2 - - - - -- 2 | - - - 3 - -- 3 | - 4 - - - -- 4 (e) | - - - - - -- - -- a|b 1 | 2 - - - - -- 2 | - - - 6 - -- 3 | - 4 - - - -- 4 | - - - 6 - -- 5 (s) | - - - 1,3 - -- 6 (e) | - - - - - -- - -- a* 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1,4 - -- 4 (e) | - - - 3 - -- - -- (a) 1 (s) | 2 - - - - -- 2 (e) | - - - - - -- - -- a+ 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1 - -- 4 (e) | - - - 3 - -- - -- a? 1 | 2 - - - - -- 2 | - - - 4 - -- 3 (s) | - - - 1,4 - -- 4 (e) | - - - - - -- - -- . 1 (s) | 2 2 2 - - -- 2 (e) | - - - - - - function Next_Sub_Expression - (Start_Index : Integer; - End_Index : Integer) return Integer; - -- Returns the index of the last character of the next sub-expression - -- in Simple. Index cannot be greater than End_Index. - - -------------------- - -- Add_Empty_Char -- - -------------------- - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index) - is - J : Column_Index := Empty_Char; - - begin - while Get (Table, State, J) /= 0 loop - J := J + 1; - end loop; - - Set (Table, State, J, To_State); - end Add_Empty_Char; - - ----------------------- - -- Create_Repetition -- - ----------------------- - - procedure Create_Repetition - (Repetition : Character; - Start_Prev : State_Index; - End_Prev : State_Index; - New_Start : out State_Index; - New_End : in out State_Index) - is - begin - New_Start := Current_State + 1; - - if New_End /= 0 then - Add_Empty_Char (New_End, New_Start); - end if; - - Current_State := Current_State + 2; - New_End := Current_State; - - Add_Empty_Char (End_Prev, New_End); - Add_Empty_Char (New_Start, Start_Prev); - - if Repetition /= '+' then - Add_Empty_Char (New_Start, New_End); - end if; - - if Repetition /= '?' then - Add_Empty_Char (New_End, New_Start); - end if; - end Create_Repetition; - - ------------------- - -- Create_Simple -- - ------------------- - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index) - is - J : Integer := Start_Index; - Last_Start : State_Index := 0; - - begin - Start_State := 0; - End_State := 0; - while J <= End_Index loop - case S (J) is - when Open_Paren => - declare - J_Start : constant Integer := J + 1; - Next_Start : State_Index; - Next_End : State_Index; - - begin - J := Next_Sub_Expression (J, End_Index); - Create_Simple (J_Start, J - 1, Next_Start, Next_End); - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Next_Start, - Next_End, - Last_Start, - End_State); - - else - Last_Start := Next_Start; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Next_End; - end if; - end; - - when '|' => - declare - Start_Prev : constant State_Index := Start_State; - End_Prev : constant State_Index := End_State; - Start_J : constant Integer := J + 1; - Start_Next : State_Index := 0; - End_Next : State_Index := 0; - - begin - J := Next_Sub_Expression (J, End_Index); - - -- Create a new state for the start of the alternative - - Current_State := Current_State + 1; - Last_Start := Current_State; - Start_State := Last_Start; - - -- Create the tree for the second part of alternative - - Create_Simple (Start_J, J, Start_Next, End_Next); - - -- Create the end state - - Add_Empty_Char (Last_Start, Start_Next); - Add_Empty_Char (Last_Start, Start_Prev); - Current_State := Current_State + 1; - End_State := Current_State; - Add_Empty_Char (End_Prev, End_State); - Add_Empty_Char (End_Next, End_State); - end; - - when Open_Bracket => - Current_State := Current_State + 1; - - declare - Next_State : State_Index := Current_State + 1; - - begin - J := J + 1; - - if S (J) = '^' then - J := J + 1; - - Next_State := 0; - - for Column in 0 .. Alphabet_Size loop - Set (Table, Current_State, Column, - Value => Current_State + 1); - end loop; - end if; - - -- Automatically add the first character - - if S (J) = '-' or else S (J) = ']' then - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - J := J + 1; - end if; - - -- Loop till closing bracket found - - loop - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= ']' - then - declare - Start : constant Integer := J - 1; - - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Set (Table, Current_State, Map (Char), - Value => Next_State); - end loop; - end; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - end if; - J := J + 1; - end loop; - end; - - Current_State := Current_State + 1; - - -- If the next symbol is a special symbol - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Current_State - 1, - Current_State, - Last_Start, - End_State); - - else - Last_Start := Current_State - 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - end if; - - when Close_Bracket - | Close_Paren - | '*' | '+' | '?' - => - Raise_Exception - ("Incorrect character in regular expression :", J); - - when others => - Current_State := Current_State + 1; - - -- Create the state for the symbol S (J) - - if S (J) = '.' then - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Current_State + 1); - end if; - - Current_State := Current_State + 1; - - -- If the next symbol is a special symbol - - if J < End_Index - and then (S (J + 1) = '*' or else - S (J + 1) = '+' or else - S (J + 1) = '?') - then - J := J + 1; - Create_Repetition - (S (J), - Current_State - 1, - Current_State, - Last_Start, - End_State); - - else - Last_Start := Current_State - 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - end if; - end case; - - if Start_State = 0 then - Start_State := Last_Start; - end if; - - J := J + 1; - end loop; - end Create_Simple; - - ------------------------- - -- Next_Sub_Expression -- - ------------------------- - - function Next_Sub_Expression - (Start_Index : Integer; - End_Index : Integer) return Integer - is - J : Integer := Start_Index; - Start_On_Alter : Boolean := False; - - begin - if S (J) = '|' then - Start_On_Alter := True; - end if; - - loop - exit when J = End_Index; - J := J + 1; - - case S (J) is - when '\' => - J := J + 1; - - when Open_Bracket => - loop - J := J + 1; - exit when S (J) = Close_Bracket; - - if S (J) = '\' then - J := J + 1; - end if; - end loop; - - when Open_Paren => - J := Next_Sub_Expression (J, End_Index); - - when Close_Paren => - return J; - - when '|' => - if Start_On_Alter then - return J - 1; - end if; - - when others => - null; - end case; - end loop; - - return J; - end Next_Sub_Expression; - - -- Start of processing for Create_Primary_Table - - begin - Table.all := (others => (others => 0)); - Create_Simple (S'First, S'Last, Start_State, End_State); - Num_States := Current_State; - end Create_Primary_Table; - - ------------------------------- - -- Create_Primary_Table_Glob -- - ------------------------------- - - procedure Create_Primary_Table_Glob - (Table : out Regexp_Array_Access; - Num_States : out State_Index; - Start_State : out State_Index; - End_State : out State_Index) - is - Empty_Char : constant Column_Index := Alphabet_Size + 1; - - Current_State : State_Index := 0; - -- Index of the last created state - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index); - -- Add a empty-character transition from State to To_State - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index); - -- Fill the table for the S (Start_Index .. End_Index). - -- This is the recursive procedure called to handle () expressions - - -------------------- - -- Add_Empty_Char -- - -------------------- - - procedure Add_Empty_Char - (State : State_Index; - To_State : State_Index) - is - J : Column_Index; - - begin - J := Empty_Char; - while Get (Table, State, J) /= 0 loop - J := J + 1; - end loop; - - Set (Table, State, J, Value => To_State); - end Add_Empty_Char; - - ------------------- - -- Create_Simple -- - ------------------- - - procedure Create_Simple - (Start_Index : Integer; - End_Index : Integer; - Start_State : out State_Index; - End_State : out State_Index) - is - J : Integer; - Last_Start : State_Index := 0; - - begin - Start_State := 0; - End_State := 0; - - J := Start_Index; - while J <= End_Index loop - case S (J) is - when Open_Bracket => - Current_State := Current_State + 1; - - declare - Next_State : State_Index := Current_State + 1; - - begin - J := J + 1; - - if S (J) = '^' then - J := J + 1; - Next_State := 0; - - for Column in 0 .. Alphabet_Size loop - Set (Table, Current_State, Column, - Value => Current_State + 1); - end loop; - end if; - - -- Automatically add the first character - - if S (J) = '-' or else S (J) = ']' then - Set (Table, Current_State, Map (S (J)), - Value => Current_State); - J := J + 1; - end if; - - -- Loop till closing bracket found - - loop - exit when S (J) = Close_Bracket; - - if S (J) = '-' - and then S (J + 1) /= ']' - then - declare - Start : constant Integer := J - 1; - - begin - J := J + 1; - - if S (J) = '\' then - J := J + 1; - end if; - - for Char in S (Start) .. S (J) loop - Set (Table, Current_State, Map (Char), - Value => Next_State); - end loop; - end; - - else - if S (J) = '\' then - J := J + 1; - end if; - - Set (Table, Current_State, Map (S (J)), - Value => Next_State); - end if; - J := J + 1; - end loop; - end; - - Last_Start := Current_State; - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - - when '{' => - declare - End_Sub : Integer; - Start_Regexp_Sub : State_Index; - End_Regexp_Sub : State_Index; - Create_Start : State_Index := 0; - - Create_End : State_Index := 0; - -- Initialized to avoid junk warning - - begin - while S (J) /= '}' loop - - -- First step : find sub pattern - - End_Sub := J + 1; - while S (End_Sub) /= ',' - and then S (End_Sub) /= '}' - loop - End_Sub := End_Sub + 1; - end loop; - - -- Second step : create a sub pattern - - Create_Simple - (J + 1, - End_Sub - 1, - Start_Regexp_Sub, - End_Regexp_Sub); - - J := End_Sub; - - -- Third step : create an alternative - - if Create_Start = 0 then - Current_State := Current_State + 1; - Create_Start := Current_State; - Add_Empty_Char (Create_Start, Start_Regexp_Sub); - Current_State := Current_State + 1; - Create_End := Current_State; - Add_Empty_Char (End_Regexp_Sub, Create_End); - - else - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Create_Start); - Create_Start := Current_State; - Add_Empty_Char (Create_Start, Start_Regexp_Sub); - Add_Empty_Char (End_Regexp_Sub, Create_End); - end if; - end loop; - - if End_State /= 0 then - Add_Empty_Char (End_State, Create_Start); - end if; - - End_State := Create_End; - Last_Start := Create_Start; - end; - - when '*' => - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Current_State); - end if; - - Add_Empty_Char (Current_State, Current_State + 1); - Add_Empty_Char (Current_State, Current_State + 3); - Last_Start := Current_State; - - Current_State := Current_State + 1; - - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Current_State + 1); - - Current_State := Current_State + 1; - Add_Empty_Char (Current_State, Last_Start); - End_State := Current_State; - - when others => - Current_State := Current_State + 1; - - if S (J) = '?' then - for K in 0 .. Alphabet_Size loop - Set (Table, Current_State, K, - Value => Current_State + 1); - end loop; - - else - if S (J) = '\' then - J := J + 1; - end if; - - -- Create the state for the symbol S (J) - - Set (Table, Current_State, Map (S (J)), - Value => Current_State + 1); - end if; - - Last_Start := Current_State; - Current_State := Current_State + 1; - - if End_State /= 0 then - Add_Empty_Char (End_State, Last_Start); - end if; - - End_State := Current_State; - end case; - - if Start_State = 0 then - Start_State := Last_Start; - end if; - - J := J + 1; - end loop; - end Create_Simple; - - -- Start of processing for Create_Primary_Table_Glob - - begin - Table.all := (others => (others => 0)); - Create_Simple (S'First, S'Last, Start_State, End_State); - Num_States := Current_State; - end Create_Primary_Table_Glob; - - ---------------------------- - -- Create_Secondary_Table -- - ---------------------------- - - function Create_Secondary_Table - (First_Table : Regexp_Array_Access; - Start_State : State_Index; - End_State : State_Index) return Regexp - is - Last_Index : constant State_Index := First_Table'Last (1); - - type Meta_State is array (0 .. Last_Index) of Boolean; - pragma Pack (Meta_State); - -- Whether a state from first_table belongs to a metastate. - - No_States : constant Meta_State := (others => False); - - type Meta_States_Array is array (State_Index range <>) of Meta_State; - type Meta_States_List is access all Meta_States_Array; - procedure Unchecked_Free is new Ada.Unchecked_Deallocation - (Meta_States_Array, Meta_States_List); - Meta_States : Meta_States_List; - -- Components of meta-states. A given state might belong to - -- several meta-states. - -- This array grows dynamically. - - type Char_To_State is array (0 .. Alphabet_Size) of State_Index; - type Meta_States_Transition_Arr is - array (State_Index range <>) of Char_To_State; - type Meta_States_Transition is access all Meta_States_Transition_Arr; - procedure Unchecked_Free is new Ada.Unchecked_Deallocation - (Meta_States_Transition_Arr, Meta_States_Transition); - Table : Meta_States_Transition; - -- Documents the transitions between each meta-state. The - -- first index is the meta-state, the second column is the - -- character seen in the input, the value is the new meta-state. - - Temp_State_Not_Null : Boolean; - - Current_State : State_Index := 1; - -- The current meta-state we are creating - - Nb_State : State_Index := 1; - -- The total number of meta-states created so far. - - procedure Closure - (Meta_State : State_Index; - State : State_Index); - -- Compute the closure of the state (that is every other state which - -- has a empty-character transition) and add it to the state - - procedure Ensure_Meta_State (Meta : State_Index); - -- grows the Meta_States array as needed to make sure that there - -- is enough space to store the new meta state. - - ----------------------- - -- Ensure_Meta_State -- - ----------------------- - - procedure Ensure_Meta_State (Meta : State_Index) is - Tmp : Meta_States_List := Meta_States; - Tmp2 : Meta_States_Transition := Table; - - begin - if Meta_States = null then - Meta_States := new Meta_States_Array - (1 .. State_Index'Max (Last_Index, Meta) + 1); - Meta_States (Meta_States'Range) := (others => No_States); - - Table := new Meta_States_Transition_Arr - (1 .. State_Index'Max (Last_Index, Meta) + 1); - Table.all := (others => (others => 0)); - - elsif Meta > Meta_States'Last then - Meta_States := new Meta_States_Array - (1 .. State_Index'Max (2 * Tmp'Last, Meta)); - Meta_States (Tmp'Range) := Tmp.all; - Meta_States (Tmp'Last + 1 .. Meta_States'Last) := - (others => No_States); - Unchecked_Free (Tmp); - - Table := new Meta_States_Transition_Arr - (1 .. State_Index'Max (2 * Tmp2'Last, Meta) + 1); - Table (Tmp2'Range) := Tmp2.all; - Table (Tmp2'Last + 1 .. Table'Last) := - (others => (others => 0)); - Unchecked_Free (Tmp2); - end if; - end Ensure_Meta_State; - - ------------- - -- Closure -- - ------------- - - procedure Closure - (Meta_State : State_Index; - State : State_Index) - is - begin - if not Meta_States (Meta_State)(State) then - Meta_States (Meta_State)(State) := True; - - -- For each transition on empty-character - - for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop - exit when First_Table (State, Column) = 0; - Closure (Meta_State, First_Table (State, Column)); - end loop; - end if; - end Closure; - - -- Start of processing for Create_Secondary_Table - - begin - -- Create a new state - - Ensure_Meta_State (Current_State); - Closure (Current_State, Start_State); - - while Current_State <= Nb_State loop - - -- We will be trying, below, to create the next meta-state - - Ensure_Meta_State (Nb_State + 1); - - -- For every character in the regexp, calculate the possible - -- transitions from Current_State. - - for Column in 0 .. Alphabet_Size loop - Temp_State_Not_Null := False; - - for K in Meta_States (Current_State)'Range loop - if Meta_States (Current_State)(K) - and then First_Table (K, Column) /= 0 - then - Closure (Nb_State + 1, First_Table (K, Column)); - Temp_State_Not_Null := True; - end if; - end loop; - - -- If at least one transition existed - - if Temp_State_Not_Null then - - -- Check if this new state corresponds to an old one - - for K in 1 .. Nb_State loop - if Meta_States (K) = Meta_States (Nb_State + 1) then - Table (Current_State)(Column) := K; - - -- Reset data, for the next time we try that state - - Meta_States (Nb_State + 1) := No_States; - exit; - end if; - end loop; - - -- If not, create a new state - - if Table (Current_State)(Column) = 0 then - Nb_State := Nb_State + 1; - Ensure_Meta_State (Nb_State + 1); - Table (Current_State)(Column) := Nb_State; - end if; - end if; - end loop; - - Current_State := Current_State + 1; - end loop; - - -- Returns the regexp - - declare - R : Regexp_Access; - - begin - R := new Regexp_Value (Alphabet_Size => Alphabet_Size, - Num_States => Nb_State); - R.Map := Map; - R.Case_Sensitive := Case_Sensitive; - - for S in 1 .. Nb_State loop - R.Is_Final (S) := Meta_States (S)(End_State); - end loop; - - for State in 1 .. Nb_State loop - for K in 0 .. Alphabet_Size loop - R.States (State, K) := Table (State)(K); - end loop; - end loop; - - Unchecked_Free (Meta_States); - Unchecked_Free (Table); - - return (Ada.Finalization.Controlled with R => R); - end; - end Create_Secondary_Table; - - --------------------- - -- Raise_Exception -- - --------------------- - - procedure Raise_Exception (M : String; Index : Integer) is - begin - raise Error_In_Regexp with M & " at offset" & Index'Img; - end Raise_Exception; - - -- Start of processing for Compile - - begin - -- Special case for the empty string: it always matches, and the - -- following processing would fail on it. - - if S = "" then - return (Ada.Finalization.Controlled with - R => new Regexp_Value' - (Alphabet_Size => 0, - Num_States => 1, - Map => (others => 0), - States => (others => (others => 1)), - Is_Final => (others => True), - Case_Sensitive => True)); - end if; - - if not Case_Sensitive then - System.Case_Util.To_Lower (S); - end if; - - -- Check the pattern is well-formed before any treatment - - Check_Well_Formed_Pattern; - - Create_Mapping; - - -- Creates the primary table - - declare - Table : Regexp_Array_Access; - Num_States : State_Index; - Start_State : State_Index; - End_State : State_Index; - R : Regexp; - - begin - Table := new Regexp_Array (1 .. Initial_Max_States_In_Primary_Table, - 0 .. Alphabet_Size + 10); - if not Glob then - Create_Primary_Table (Table, Num_States, Start_State, End_State); - else - Create_Primary_Table_Glob - (Table, Num_States, Start_State, End_State); - end if; - - -- Creates the secondary table - - R := Create_Secondary_Table (Table, Start_State, End_State); - Free (Table); - return R; - end; - end Compile; - - -------------- - -- Finalize -- - -------------- - - procedure Finalize (R : in out Regexp) is - procedure Free is new - Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access); - begin - Free (R.R); - end Finalize; - - --------- - -- Get -- - --------- - - function Get - (Table : Regexp_Array_Access; - State : State_Index; - Column : Column_Index) return State_Index - is - begin - if State <= Table'Last (1) - and then Column <= Table'Last (2) - then - return Table (State, Column); - else - return 0; - end if; - end Get; - - ----------- - -- Match -- - ----------- - - function Match (S : String; R : Regexp) return Boolean is - Current_State : State_Index := 1; - - begin - if R.R = null then - raise Constraint_Error; - end if; - - for Char in S'Range loop - - if R.R.Case_Sensitive then - Current_State := R.R.States (Current_State, R.R.Map (S (Char))); - else - Current_State := - R.R.States (Current_State, - R.R.Map (System.Case_Util.To_Lower (S (Char)))); - end if; - - if Current_State = 0 then - return False; - end if; - - end loop; - - return R.R.Is_Final (Current_State); - end Match; - - --------- - -- Set -- - --------- - - procedure Set - (Table : in out Regexp_Array_Access; - State : State_Index; - Column : Column_Index; - Value : State_Index) - is - New_Lines : State_Index; - New_Columns : Column_Index; - New_Table : Regexp_Array_Access; - - begin - if State <= Table'Last (1) - and then Column <= Table'Last (2) - then - Table (State, Column) := Value; - else - -- Doubles the size of the table until it is big enough that - -- (State, Column) is a valid index. - - New_Lines := Table'Last (1) * (State / Table'Last (1) + 1); - New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1); - New_Table := new Regexp_Array (Table'First (1) .. New_Lines, - Table'First (2) .. New_Columns); - New_Table.all := (others => (others => 0)); - - for J in Table'Range (1) loop - for K in Table'Range (2) loop - New_Table (J, K) := Table (J, K); - end loop; - end loop; - - Free (Table); - Table := New_Table; - Table (State, Column) := Value; - end if; - end Set; - -end System.Regexp; |