diff options
author | Arnaud Charlet <charlet@gcc.gnu.org> | 2009-07-23 14:27:37 +0200 |
---|---|---|
committer | Arnaud Charlet <charlet@gcc.gnu.org> | 2009-07-23 14:27:37 +0200 |
commit | 19f21e1133746fa91c6a68e7514fb51968f45050 (patch) | |
tree | d72eed2b844820d6ec143ac42455485d1db1d8a8 /gcc/ada | |
parent | 84d60eea81f358bb5e81cda00631786d05653ff0 (diff) | |
download | gcc-19f21e1133746fa91c6a68e7514fb51968f45050.zip gcc-19f21e1133746fa91c6a68e7514fb51968f45050.tar.gz gcc-19f21e1133746fa91c6a68e7514fb51968f45050.tar.bz2 |
[multiple changes]
2009-07-23 Yannick Moy <moy@adacore.com>
* s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the
pattern.
(Raise_Exception_If_No_More_Chars): Remove extra blank in exception
string.
(Raise_Exception): Ditto.
2009-07-23 Olivier Hainque <hainque@adacore.com>
* g-sse.ads: Simplify comment.
From-SVN: r150000
Diffstat (limited to 'gcc/ada')
-rw-r--r-- | gcc/ada/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/ada/g-sse.ads | 6 | ||||
-rwxr-xr-x | gcc/ada/s-regexp.adb | 280 |
3 files changed, 291 insertions, 7 deletions
diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index dc4d8a5..4cd15a9 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,15 @@ +2009-07-23 Yannick Moy <moy@adacore.com> + + * s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the + pattern. + (Raise_Exception_If_No_More_Chars): Remove extra blank in exception + string. + (Raise_Exception): Ditto. + +2009-07-23 Olivier Hainque <hainque@adacore.com> + + * g-sse.ads: Simplify comment. + 2009-07-23 Olivier Hainque <hainque@adacore.com> * g-ssinty.ads: New unit. GNAT.SSE.Internal_Types. Factorize diff --git a/gcc/ada/g-sse.ads b/gcc/ada/g-sse.ads index 4b0937a..f669f2e 100644 --- a/gcc/ada/g-sse.ads +++ b/gcc/ada/g-sse.ads @@ -82,11 +82,7 @@ -- end if; -- end; --- Use of Unchecked_Union is very tempting, however hits difficulties with --- e.g. implicit front-end expanded equality operators, which typically --- feature a subcase comparing the m128 components, not supported by the --- middle-end. This needs more explanation, should it be fixed ??? It --- reads like a bug in this paragraph. +-- Use of Unchecked_Union to perform the overlays is not supported. package GNAT.SSE is type Float32 is new Float; diff --git a/gcc/ada/s-regexp.adb b/gcc/ada/s-regexp.adb index 48ebd44..02d0a99 100755 --- a/gcc/ada/s-regexp.adb +++ b/gcc/ada/s-regexp.adb @@ -129,6 +129,14 @@ package body System.Regexp is -- Number of significant characters in the regular expression. -- This total does not include special operators, such as *, (, ... + procedure Check_Well_Formed_Pattern; + -- Check that the pattern to compile is well-formed, so that + -- subsequent code can rely on this without performing each time + -- the checks to avoid accessing the pattern outside its bounds. + -- Except that, not all well-formedness rules are checked. + -- In particular, the rules about special characters not being + -- treated as regular characters are not checked. + procedure Create_Mapping; -- Creates a mapping between characters in the regexp and columns -- in the tables representing the regexp. Test that the regexp is @@ -180,6 +188,270 @@ package body System.Regexp is pragma No_Return (Raise_Exception); -- Raise an exception, indicating an error at character Index in S + ------------------------------- + -- Check_Well_Formed_Pattern -- + ------------------------------- + + procedure Check_Well_Formed_Pattern is + + J : Integer := S'First; + Past_Elmt : Boolean := False; + -- Set to True everywhere an elmt has been parsed, if Glob=False, + -- meaning there can be now an occurence of '*', '+' and '?'. + Past_Term : Boolean := False; + -- Set to True everywhere a term has been parsed, if Glob=False, + -- meaning there can be now an occurence of '|'. + Parenthesis_Level : Integer := 0; + Curly_Level : Integer := 0; + Last_Open : Integer := S'First - 1; + -- The last occurence of an opening parenthesis, if Glob=False, + -- or the last occurence of an opening curly brace, if Glob=True. + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0); + + -------------------------------------- + -- Raise_Exception_If_No_More_Chars -- + -------------------------------------- + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is + begin + if J + K > S'Last then + Raise_Exception + ("Ill-formed pattern while parsing", J); + end if; + end Raise_Exception_If_No_More_Chars; + + -- Start of processing for Check_Well_Formed_Pattern + + begin + while J <= S'Last loop + case S (J) is + when Open_Bracket => + J := J + 1; + Raise_Exception_If_No_More_Chars; + + if not Glob then + if S (J) = '^' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + end if; + + -- The first character never has a special meaning + + if S (J) = ']' or else S (J) = '-' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + -- The set of characters cannot be empty + + if S (J) = ']' then + Raise_Exception + ("Set of characters cannot be empty in regular " + & "expression", J); + end if; + + declare + Possible_Range_Start : Boolean := True; + -- Set to True everywhere a range character '-' + -- can occur. + begin + loop + exit when S (J) = Close_Bracket; + + -- The current character should be followed by + -- a closing bracket. + + Raise_Exception_If_No_More_Chars (1); + + if S (J) = '-' + and then S (J + 1) /= Close_Bracket + then + if not Possible_Range_Start then + Raise_Exception + ("No mix of ranges is allowed in " + & "regular expression", J); + end if; + + J := J + 1; + Raise_Exception_If_No_More_Chars; + + -- Range cannot be followed by '-' character, + -- except as last character in the set. + + Possible_Range_Start := False; + else + Possible_Range_Start := True; + end if; + + if S (J) = '\' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + J := J + 1; + end loop; + end; + + -- A closing bracket can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + + when Close_Bracket => + -- A close bracket must follow a open_bracket, + -- and cannot be found alone on the line. + + Raise_Exception + ("Incorrect character ']' in regular expression", J); + + when '\' => + if J < S'Last then + J := J + 1; + + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + else + -- \ not allowed at the end of the regexp + + Raise_Exception + ("Incorrect character '\' in regular expression", J); + end if; + + when Open_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level + 1; + Last_Open := J; + + -- An open parenthesis does not end an elmt or term + + Past_Elmt := False; + Past_Term := False; + end if; + + when Close_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level - 1; + + if Parenthesis_Level < 0 then + Raise_Exception + ("')' is not associated with '(' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty parentheses not allowed in regular " + & "expression", J); + end if; + + if not Past_Term then + Raise_Exception + ("Closing parenthesis not allowed here in regular " + & "expression", J); + end if; + + -- A closing parenthesis can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '{' => + if Glob then + Curly_Level := Curly_Level + 1; + Last_Open := J; + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + -- No need to check for ',' as the code always accepts them + + when '}' => + if Glob then + Curly_Level := Curly_Level - 1; + + if Curly_Level < 0 then + Raise_Exception + ("'}' is not associated with '{' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty curly braces not allowed in regular " + & "expression", J); + end if; + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '*' | '?' | '+' => + if not Glob then + -- These operators must apply to an elmt sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Elmt then + Raise_Exception + ("'*', '+' and '?' operators must be " + & "applied to an element in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := True; + end if; + + when '|' => + if not Glob then + -- This operator must apply to a term sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Term then + Raise_Exception + ("'|' operator must be " + & "applied to a term in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := False; + end if; + + when others => + if not Glob then + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + end case; + + J := J + 1; + end loop; + + -- A closing parenthesis must follow an open parenthesis + + if Parenthesis_Level /= 0 then + Raise_Exception + ("'(' must always be associated with a ')'", J); + end if; + + -- A closing curly brace must follow an open curly brace + + if Curly_Level /= 0 then + Raise_Exception + ("'{' must always be associated with a '}'", J); + end if; + end Check_Well_Formed_Pattern; + -------------------- -- Create_Mapping -- -------------------- @@ -1224,7 +1496,7 @@ package body System.Regexp is procedure Raise_Exception (M : String; Index : Integer) is begin - raise Error_In_Regexp with M & " at offset " & Index'Img; + raise Error_In_Regexp with M & " at offset" & Index'Img; end Raise_Exception; -- Start of processing for Compile @@ -1247,12 +1519,16 @@ package body System.Regexp is System.Case_Util.To_Lower (S); end if; + -- Check the pattern is well-formed before any treatment + + Check_Well_Formed_Pattern; + Create_Mapping; -- Creates the primary table declare - Table : Regexp_Array_Access; + Table : Regexp_Array_Access; Num_States : State_Index; Start_State : State_Index; End_State : State_Index; |