diff options
Diffstat (limited to 'gcc/ada/g-decstr.adb')
-rw-r--r-- | gcc/ada/g-decstr.adb | 796 |
1 files changed, 0 insertions, 796 deletions
diff --git a/gcc/ada/g-decstr.adb b/gcc/ada/g-decstr.adb deleted file mode 100644 index ab8d06c..0000000 --- a/gcc/ada/g-decstr.adb +++ /dev/null @@ -1,796 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- G N A T . D E C O D E _ S T R I N G -- --- -- --- S p e c -- --- -- --- Copyright (C) 2007-2014, AdaCore -- --- -- --- GNAT is free software; you can redistribute it and/or modify it under -- --- terms of the GNU General Public License as published by the Free Soft- -- --- ware Foundation; either version 3, or (at your option) any later ver- -- --- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- --- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- --- or FITNESS FOR A PARTICULAR PURPOSE. -- --- -- --- As a special exception under Section 7 of GPL version 3, you are granted -- --- additional permissions described in the GCC Runtime Library Exception, -- --- version 3.1, as published by the Free Software Foundation. -- --- -- --- You should have received a copy of the GNU General Public License and -- --- a copy of the GCC Runtime Library Exception along with this program; -- --- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- --- <http://www.gnu.org/licenses/>. -- --- -- --- GNAT was originally developed by the GNAT team at New York University. -- --- Extensive contributions were provided by Ada Core Technologies Inc. -- --- -- ------------------------------------------------------------------------------- - --- This package provides a utility routine for converting from an encoded --- string to a corresponding Wide_String or Wide_Wide_String value. - -with Interfaces; use Interfaces; - -with System.WCh_Cnv; use System.WCh_Cnv; -with System.WCh_Con; use System.WCh_Con; - -package body GNAT.Decode_String is - - ----------------------- - -- Local Subprograms -- - ----------------------- - - procedure Bad; - pragma No_Return (Bad); - -- Raise error for bad encoding - - procedure Past_End; - pragma No_Return (Past_End); - -- Raise error for off end of string - - --------- - -- Bad -- - --------- - - procedure Bad is - begin - raise Constraint_Error with - "bad encoding or character out of range"; - end Bad; - - --------------------------- - -- Decode_Wide_Character -- - --------------------------- - - procedure Decode_Wide_Character - (Input : String; - Ptr : in out Natural; - Result : out Wide_Character) - is - Char : Wide_Wide_Character; - begin - Decode_Wide_Wide_Character (Input, Ptr, Char); - - if Wide_Wide_Character'Pos (Char) > 16#FFFF# then - Bad; - else - Result := Wide_Character'Val (Wide_Wide_Character'Pos (Char)); - end if; - end Decode_Wide_Character; - - ------------------------ - -- Decode_Wide_String -- - ------------------------ - - function Decode_Wide_String (S : String) return Wide_String is - Result : Wide_String (1 .. S'Length); - Length : Natural; - begin - Decode_Wide_String (S, Result, Length); - return Result (1 .. Length); - end Decode_Wide_String; - - procedure Decode_Wide_String - (S : String; - Result : out Wide_String; - Length : out Natural) - is - Ptr : Natural; - - begin - Ptr := S'First; - Length := 0; - while Ptr <= S'Last loop - if Length >= Result'Last then - Past_End; - end if; - - Length := Length + 1; - Decode_Wide_Character (S, Ptr, Result (Length)); - end loop; - end Decode_Wide_String; - - -------------------------------- - -- Decode_Wide_Wide_Character -- - -------------------------------- - - procedure Decode_Wide_Wide_Character - (Input : String; - Ptr : in out Natural; - Result : out Wide_Wide_Character) - is - C : Character; - - function In_Char return Character; - pragma Inline (In_Char); - -- Function to get one input character - - ------------- - -- In_Char -- - ------------- - - function In_Char return Character is - begin - if Ptr <= Input'Last then - Ptr := Ptr + 1; - return Input (Ptr - 1); - else - Past_End; - end if; - end In_Char; - - -- Start of processing for Decode_Wide_Wide_Character - - begin - C := In_Char; - - -- Special fast processing for UTF-8 case - - if Encoding_Method = WCEM_UTF8 then - UTF8 : declare - U : Unsigned_32; - W : Unsigned_32; - - procedure Get_UTF_Byte; - pragma Inline (Get_UTF_Byte); - -- Used to interpret 2#10xxxxxx# continuation byte in UTF-8 mode. - -- Reads a byte, and raises CE if the first two bits are not 10. - -- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits. - - ------------------ - -- Get_UTF_Byte -- - ------------------ - - procedure Get_UTF_Byte is - begin - U := Unsigned_32 (Character'Pos (In_Char)); - - if (U and 2#11000000#) /= 2#10_000000# then - Bad; - end if; - - W := Shift_Left (W, 6) or (U and 2#00111111#); - end Get_UTF_Byte; - - -- Start of processing for UTF8 case - - begin - -- Note: for details of UTF8 encoding see RFC 3629 - - U := Unsigned_32 (Character'Pos (C)); - - -- 16#00_0000#-16#00_007F#: 0xxxxxxx - - if (U and 2#10000000#) = 2#00000000# then - Result := Wide_Wide_Character'Val (Character'Pos (C)); - - -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx - - elsif (U and 2#11100000#) = 2#110_00000# then - W := U and 2#00011111#; - Get_UTF_Byte; - - if W not in 16#00_0080# .. 16#00_07FF# then - Bad; - end if; - - Result := Wide_Wide_Character'Val (W); - - -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx - - elsif (U and 2#11110000#) = 2#1110_0000# then - W := U and 2#00001111#; - Get_UTF_Byte; - Get_UTF_Byte; - - if W not in 16#00_0800# .. 16#00_FFFF# then - Bad; - end if; - - Result := Wide_Wide_Character'Val (W); - - -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - - elsif (U and 2#11111000#) = 2#11110_000# then - W := U and 2#00000111#; - - for K in 1 .. 3 loop - Get_UTF_Byte; - end loop; - - if W not in 16#01_0000# .. 16#10_FFFF# then - Bad; - end if; - - Result := Wide_Wide_Character'Val (W); - - -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx - -- 10xxxxxx 10xxxxxx - - elsif (U and 2#11111100#) = 2#111110_00# then - W := U and 2#00000011#; - - for K in 1 .. 4 loop - Get_UTF_Byte; - end loop; - - if W not in 16#0020_0000# .. 16#03FF_FFFF# then - Bad; - end if; - - Result := Wide_Wide_Character'Val (W); - - -- All other cases are invalid, note that this includes: - - -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx - -- 10xxxxxx 10xxxxxx 10xxxxxx - - -- since Wide_Wide_Character does not include code values - -- greater than 16#03FF_FFFF#. - - else - Bad; - end if; - end UTF8; - - -- All encoding functions other than UTF-8 - - else - Non_UTF8 : declare - function Char_Sequence_To_UTF is - new Char_Sequence_To_UTF_32 (In_Char); - - begin - -- For brackets, must test for specific case of [ not followed by - -- quotation, where we must not call Char_Sequence_To_UTF, but - -- instead just return the bracket unchanged. - - if Encoding_Method = WCEM_Brackets - and then C = '[' - and then (Ptr > Input'Last or else Input (Ptr) /= '"') - then - Result := '['; - - -- All other cases including [" with Brackets - - else - Result := - Wide_Wide_Character'Val - (Char_Sequence_To_UTF (C, Encoding_Method)); - end if; - end Non_UTF8; - end if; - end Decode_Wide_Wide_Character; - - ----------------------------- - -- Decode_Wide_Wide_String -- - ----------------------------- - - function Decode_Wide_Wide_String (S : String) return Wide_Wide_String is - Result : Wide_Wide_String (1 .. S'Length); - Length : Natural; - begin - Decode_Wide_Wide_String (S, Result, Length); - return Result (1 .. Length); - end Decode_Wide_Wide_String; - - procedure Decode_Wide_Wide_String - (S : String; - Result : out Wide_Wide_String; - Length : out Natural) - is - Ptr : Natural; - - begin - Ptr := S'First; - Length := 0; - while Ptr <= S'Last loop - if Length >= Result'Last then - Past_End; - end if; - - Length := Length + 1; - Decode_Wide_Wide_Character (S, Ptr, Result (Length)); - end loop; - end Decode_Wide_Wide_String; - - ------------------------- - -- Next_Wide_Character -- - ------------------------- - - procedure Next_Wide_Character (Input : String; Ptr : in out Natural) is - Discard : Wide_Character; - begin - Decode_Wide_Character (Input, Ptr, Discard); - end Next_Wide_Character; - - ------------------------------ - -- Next_Wide_Wide_Character -- - ------------------------------ - - procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural) is - Discard : Wide_Wide_Character; - begin - Decode_Wide_Wide_Character (Input, Ptr, Discard); - end Next_Wide_Wide_Character; - - -------------- - -- Past_End -- - -------------- - - procedure Past_End is - begin - raise Constraint_Error with "past end of string"; - end Past_End; - - ------------------------- - -- Prev_Wide_Character -- - ------------------------- - - procedure Prev_Wide_Character (Input : String; Ptr : in out Natural) is - begin - if Ptr > Input'Last + 1 then - Past_End; - end if; - - -- Special efficient encoding for UTF-8 case - - if Encoding_Method = WCEM_UTF8 then - UTF8 : declare - U : Unsigned_32; - - procedure Getc; - pragma Inline (Getc); - -- Gets the character at Input (Ptr - 1) and returns code in U as - -- Unsigned_32 value. On return Ptr is decremented by one. - - procedure Skip_UTF_Byte; - pragma Inline (Skip_UTF_Byte); - -- Checks that U is 2#10xxxxxx# and then calls Get - - ---------- - -- Getc -- - ---------- - - procedure Getc is - begin - if Ptr <= Input'First then - Past_End; - else - Ptr := Ptr - 1; - U := Unsigned_32 (Character'Pos (Input (Ptr))); - end if; - end Getc; - - ------------------- - -- Skip_UTF_Byte -- - ------------------- - - procedure Skip_UTF_Byte is - begin - if (U and 2#11000000#) = 2#10_000000# then - Getc; - else - Bad; - end if; - end Skip_UTF_Byte; - - -- Start of processing for UTF-8 case - - begin - -- 16#00_0000#-16#00_007F#: 0xxxxxxx - - Getc; - - if (U and 2#10000000#) = 2#00000000# then - return; - - -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11100000#) = 2#110_00000# then - return; - - -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11110000#) = 2#1110_0000# then - return; - - -- Any other code is invalid, note that this includes: - - -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx - -- 10xxxxxx - - -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx - -- 10xxxxxx 10xxxxxx - -- 10xxxxxx - - -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx - -- 10xxxxxx 10xxxxxx - -- 10xxxxxx 10xxxxxx - - -- since Wide_Character does not allow codes > 16#FFFF# - - else - Bad; - end if; - end if; - end if; - end UTF8; - - -- Special efficient encoding for brackets case - - elsif Encoding_Method = WCEM_Brackets then - Brackets : declare - P : Natural; - S : Natural; - - begin - -- See if we have "] at end positions - - if Ptr > Input'First + 1 - and then Input (Ptr - 1) = ']' - and then Input (Ptr - 2) = '"' - then - P := Ptr - 2; - - -- Loop back looking for [" at start - - while P >= Ptr - 10 loop - if P <= Input'First + 1 then - Bad; - - elsif Input (P - 1) = '"' - and then Input (P - 2) = '[' - then - -- Found ["..."], scan forward to check it - - S := P - 2; - P := S; - Next_Wide_Character (Input, P); - - -- OK if at original pointer, else error - - if P = Ptr then - Ptr := S; - return; - else - Bad; - end if; - end if; - - P := P - 1; - end loop; - - -- Falling through loop means more than 8 chars between the - -- enclosing brackets (or simply a missing left bracket) - - Bad; - - -- Here if no bracket sequence present - - else - if Ptr = Input'First then - Past_End; - else - Ptr := Ptr - 1; - end if; - end if; - end Brackets; - - -- Non-UTF-8/Brackets. These are the inefficient cases where we have to - -- go to the start of the string and skip forwards till Ptr matches. - - else - Non_UTF_Brackets : declare - Discard : Wide_Character; - PtrS : Natural; - PtrP : Natural; - - begin - PtrS := Input'First; - - if Ptr <= PtrS then - Past_End; - end if; - - loop - PtrP := PtrS; - Decode_Wide_Character (Input, PtrS, Discard); - - if PtrS = Ptr then - Ptr := PtrP; - return; - - elsif PtrS > Ptr then - Bad; - end if; - end loop; - - exception - when Constraint_Error => - Bad; - end Non_UTF_Brackets; - end if; - end Prev_Wide_Character; - - ------------------------------ - -- Prev_Wide_Wide_Character -- - ------------------------------ - - procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural) is - begin - if Ptr > Input'Last + 1 then - Past_End; - end if; - - -- Special efficient encoding for UTF-8 case - - if Encoding_Method = WCEM_UTF8 then - UTF8 : declare - U : Unsigned_32; - - procedure Getc; - pragma Inline (Getc); - -- Gets the character at Input (Ptr - 1) and returns code in U as - -- Unsigned_32 value. On return Ptr is decremented by one. - - procedure Skip_UTF_Byte; - pragma Inline (Skip_UTF_Byte); - -- Checks that U is 2#10xxxxxx# and then calls Get - - ---------- - -- Getc -- - ---------- - - procedure Getc is - begin - if Ptr <= Input'First then - Past_End; - else - Ptr := Ptr - 1; - U := Unsigned_32 (Character'Pos (Input (Ptr))); - end if; - end Getc; - - ------------------- - -- Skip_UTF_Byte -- - ------------------- - - procedure Skip_UTF_Byte is - begin - if (U and 2#11000000#) = 2#10_000000# then - Getc; - else - Bad; - end if; - end Skip_UTF_Byte; - - -- Start of processing for UTF-8 case - - begin - -- 16#00_0000#-16#00_007F#: 0xxxxxxx - - Getc; - - if (U and 2#10000000#) = 2#00000000# then - return; - - -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11100000#) = 2#110_00000# then - return; - - -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11110000#) = 2#1110_0000# then - return; - - -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx - -- 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11111000#) = 2#11110_000# then - return; - - -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx - -- 10xxxxxx 10xxxxxx - -- 10xxxxxx - - else - Skip_UTF_Byte; - - if (U and 2#11111100#) = 2#111110_00# then - return; - - -- Any other code is invalid, note that this includes: - - -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx - -- 10xxxxxx 10xxxxxx - -- 10xxxxxx 10xxxxxx - - -- since Wide_Wide_Character does not allow codes - -- greater than 16#03FF_FFFF# - - else - Bad; - end if; - end if; - end if; - end if; - end if; - end UTF8; - - -- Special efficient encoding for brackets case - - elsif Encoding_Method = WCEM_Brackets then - Brackets : declare - P : Natural; - S : Natural; - - begin - -- See if we have "] at end positions - - if Ptr > Input'First + 1 - and then Input (Ptr - 1) = ']' - and then Input (Ptr - 2) = '"' - then - P := Ptr - 2; - - -- Loop back looking for [" at start - - while P >= Ptr - 10 loop - if P <= Input'First + 1 then - Bad; - - elsif Input (P - 1) = '"' - and then Input (P - 2) = '[' - then - -- Found ["..."], scan forward to check it - - S := P - 2; - P := S; - Next_Wide_Wide_Character (Input, P); - - -- OK if at original pointer, else error - - if P = Ptr then - Ptr := S; - return; - else - Bad; - end if; - end if; - - P := P - 1; - end loop; - - -- Falling through loop means more than 8 chars between the - -- enclosing brackets (or simply a missing left bracket) - - Bad; - - -- Here if no bracket sequence present - - else - if Ptr = Input'First then - Past_End; - else - Ptr := Ptr - 1; - end if; - end if; - end Brackets; - - -- Non-UTF-8/Brackets. These are the inefficient cases where we have to - -- go to the start of the string and skip forwards till Ptr matches. - - else - Non_UTF8_Brackets : declare - Discard : Wide_Wide_Character; - PtrS : Natural; - PtrP : Natural; - - begin - PtrS := Input'First; - - if Ptr <= PtrS then - Past_End; - end if; - - loop - PtrP := PtrS; - Decode_Wide_Wide_Character (Input, PtrS, Discard); - - if PtrS = Ptr then - Ptr := PtrP; - return; - - elsif PtrS > Ptr then - Bad; - end if; - end loop; - - exception - when Constraint_Error => - Bad; - end Non_UTF8_Brackets; - end if; - end Prev_Wide_Wide_Character; - - -------------------------- - -- Validate_Wide_String -- - -------------------------- - - function Validate_Wide_String (S : String) return Boolean is - Ptr : Natural; - - begin - Ptr := S'First; - while Ptr <= S'Last loop - Next_Wide_Character (S, Ptr); - end loop; - - return True; - - exception - when Constraint_Error => - return False; - end Validate_Wide_String; - - ------------------------------- - -- Validate_Wide_Wide_String -- - ------------------------------- - - function Validate_Wide_Wide_String (S : String) return Boolean is - Ptr : Natural; - - begin - Ptr := S'First; - while Ptr <= S'Last loop - Next_Wide_Wide_Character (S, Ptr); - end loop; - - return True; - - exception - when Constraint_Error => - return False; - end Validate_Wide_Wide_String; - -end GNAT.Decode_String; |