From 9694c03951602dd1216838de82dc1c2de54d2754 Mon Sep 17 00:00:00 2001 From: Arnaud Charlet Date: Mon, 11 Oct 2010 10:23:31 +0200 Subject: [multiple changes] 2010-10-11 Robert Dewar * a-textio.adb: Minor reformatting 2010-10-11 Robert Dewar * a-suesen.ads, a-suenst.ads, a-suesen.adb, a-suenst.adb, a-suewse.adb, a-suewst.adb, a-suewse.ads, a-suewst.ads, a-suezse.ads, a-suezst.ads, a-suezse.adb, a-suezst.adb: New name for string encoding packages. * impunit.adb: New names for string encoding units * Makefile.rtl: New names for string encoding units * rtsfind.ads: Minor code reorganization. 2010-10-11 Ed Schonberg * exp_ch5.adb: Code clean up. 2010-10-11 Ed Schonberg * sem_ch6.adb (Check_Limited_Return): Specialize warning on limited returns when in a generic context. (Analyze_Function_Return): ditto. From-SVN: r165276 --- gcc/ada/ChangeLog | 26 ++++ gcc/ada/Makefile.rtl | 6 +- gcc/ada/a-suenst.adb | 341 ++++++++++++++++++++++++++++++++++++++++ gcc/ada/a-suenst.ads | 65 ++++++++ gcc/ada/a-suesen.adb | 341 ---------------------------------------- gcc/ada/a-suesen.ads | 65 -------- gcc/ada/a-suewse.adb | 370 -------------------------------------------- gcc/ada/a-suewse.ads | 67 -------- gcc/ada/a-suewst.adb | 370 ++++++++++++++++++++++++++++++++++++++++++++ gcc/ada/a-suewst.ads | 67 ++++++++ gcc/ada/a-suezse.adb | 429 --------------------------------------------------- gcc/ada/a-suezse.ads | 64 -------- gcc/ada/a-suezst.adb | 429 +++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/ada/a-suezst.ads | 64 ++++++++ gcc/ada/a-textio.adb | 8 + gcc/ada/exp_ch5.adb | 39 +++-- gcc/ada/impunit.adb | 6 +- gcc/ada/rtsfind.ads | 1 - gcc/ada/sem_ch6.adb | 47 ++++-- 19 files changed, 1434 insertions(+), 1371 deletions(-) create mode 100755 gcc/ada/a-suenst.adb create mode 100755 gcc/ada/a-suenst.ads delete mode 100755 gcc/ada/a-suesen.adb delete mode 100755 gcc/ada/a-suesen.ads delete mode 100755 gcc/ada/a-suewse.adb delete mode 100755 gcc/ada/a-suewse.ads create mode 100755 gcc/ada/a-suewst.adb create mode 100755 gcc/ada/a-suewst.ads delete mode 100755 gcc/ada/a-suezse.adb delete mode 100755 gcc/ada/a-suezse.ads create mode 100755 gcc/ada/a-suezst.adb create mode 100755 gcc/ada/a-suezst.ads (limited to 'gcc') diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index b9614a6..d216fcb 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,5 +1,31 @@ 2010-10-11 Robert Dewar + * a-textio.adb: Minor reformatting + +2010-10-11 Robert Dewar + + * a-suesen.ads, a-suenst.ads, + a-suesen.adb, a-suenst.adb, + a-suewse.adb, a-suewst.adb, + a-suewse.ads, a-suewst.ads, + a-suezse.ads, a-suezst.ads, + a-suezse.adb, a-suezst.adb: New name for string encoding packages. + * impunit.adb: New names for string encoding units + * Makefile.rtl: New names for string encoding units + * rtsfind.ads: Minor code reorganization. + +2010-10-11 Ed Schonberg + + * exp_ch5.adb: Code clean up. + +2010-10-11 Ed Schonberg + + * sem_ch6.adb (Check_Limited_Return): Specialize warning on limited + returns when in a generic context. + (Analyze_Function_Return): ditto. + +2010-10-11 Robert Dewar + * s-multip.ads: Fix header. * sem_ch3.adb, s-multip.adb, a-tigeli.adb: Minor reformatting. diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl index 856dd49..4989e79 100644 --- a/gcc/ada/Makefile.rtl +++ b/gcc/ada/Makefile.rtl @@ -227,9 +227,9 @@ GNATRTL_NONTASKING_OBJS= \ a-stzsup$(objext) \ a-stzunb$(objext) \ a-suenco$(objext) \ - a-suesen$(objext) \ - a-suewse$(objext) \ - a-suezse$(objext) \ + a-suenst$(objext) \ + a-suewst$(objext) \ + a-suezst$(objext) \ a-suteio$(objext) \ a-swbwha$(objext) \ a-swfwha$(objext) \ diff --git a/gcc/ada/a-suenst.adb b/gcc/ada/a-suenst.adb new file mode 100755 index 0000000..d4207ed --- /dev/null +++ b/gcc/ada/a-suenst.adb @@ -0,0 +1,341 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.STRINGS -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +package body Ada.Strings.UTF_Encoding.Strings is + use Interfaces; + + ------------ + -- Decode -- + ------------ + + -- Decode UTF-8/UTF-16BE/UTF-16LE input to String + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return String + is + begin + if Input_Scheme = UTF_8 then + return Decode (Item); + else + return Decode (To_UTF_16 (Item, Input_Scheme)); + end if; + end Decode; + + -- Decode UTF-8 input to String + + function Decode (Item : UTF_8_String) return String is + Result : String (1 .. Item'Length); + -- Result string (worst case is same length as input) + + Len : Natural := 0; + -- Length of result stored so far + + Iptr : Natural; + -- Input Item pointer + + C : Unsigned_8; + R : Unsigned_16; + + procedure Get_Continuation; + -- Reads a continuation byte of the form 10xxxxxx, shifts R left + -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On + -- return Ptr is incremented. Raises exceptioon if continuation + -- byte does not exist or is invalid. + + ---------------------- + -- Get_Continuation -- + ---------------------- + + procedure Get_Continuation is + begin + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + else + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 2#10_000000# .. 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + else + R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#); + end if; + end if; + end Get_Continuation; + + -- Start of processing for Decode + + begin + Iptr := Item'First; + + -- Skip BOM at start + + if Item'Length >= 3 + and then Item (Iptr .. Iptr + 2) = BOM_8 + then + Iptr := Iptr + 3; + + -- Error if bad BOM + + elsif Item'Length >= 2 + and then (Item (Iptr .. Iptr + 1) = BOM_16BE + or else + Item (Iptr .. Iptr + 1) = BOM_16LE) + then + Raise_Encoding_Error (Iptr); + end if; + + while Iptr <= Item'Last loop + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + R := Unsigned_16 (C); + + -- No initial code can be of the form 10xxxxxx. Such codes are used + -- only for continuations. + + elsif C <= 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 2#110_11111# then + R := Unsigned_16 (C and 2#000_11111#); + Get_Continuation; + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + -- Such codes are out of range for type Character + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + -- Such codes are out of range for Wide_String output + + -- Thus all remaining cases raise Encoding_Error + + else + Raise_Encoding_Error (Iptr - 1); + end if; + + Len := Len + 1; + Result (Len) := Character'Val (R); + end loop; + + return Result (1 .. Len); + end Decode; + + -- Decode UTF-16 input to String + + function Decode (Item : UTF_16_Wide_String) return String is + Result : String (1 .. Item'Length); + -- Result is same length as input (possibly minus 1 if BOM present) + + Len : Natural := 0; + -- Length of result + + Iptr : Natural; + -- Index of next Item element + + C : Unsigned_16; + + begin + -- Skip UTF-16 BOM at start + + Iptr := Item'First; + + if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then + Iptr := Iptr + 1; + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#0000#..16#00FF# represent their own value + + if C <= 16#00FF# then + Len := Len + 1; + Result (Len) := Character'Val (C); + + -- All other codes are invalid, either they are invalid UTF-16 + -- encoding sequences, or they represent values that are out of + -- range for type Character. + + else + Raise_Encoding_Error (Iptr - 1); + end if; + end loop; + + return Result (1 .. Len); + end Decode; + + ------------ + -- Encode -- + ------------ + + -- Encode String in UTF-8, UTF-16BE or UTF-16LE + + function Encode + (Item : String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String + is + begin + -- Case of UTF_8 + + if Output_Scheme = UTF_8 then + return Encode (Item, Output_BOM); + + -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary + + else + return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)), + Output_Scheme, Output_BOM); + end if; + end Encode; + + -- Encode String in UTF-8 + + function Encode + (Item : String; + Output_BOM : Boolean := False) return UTF_8_String + is + Result : UTF_8_String (1 .. 3 * Item'Length + 3); + -- Worst case is three bytes per input byte + space for BOM + + Len : Natural; + -- Number of output codes stored in Result + + C : Unsigned_8; + -- Single input character + + procedure Store (C : Unsigned_8); + pragma Inline (Store); + -- Store one output code, C is in the range 0 .. 255 + + ----------- + -- Store -- + ----------- + + procedure Store (C : Unsigned_8) is + begin + Len := Len + 1; + Result (Len) := Character'Val (C); + end Store; + + -- Start of processing for UTF8_Encode + + begin + -- Output BOM if required + + if Output_BOM then + Result (1 .. 3) := BOM_8; + Len := 3; + else + Len := 0; + end if; + + -- Loop through characters of input + + for J in Item'Range loop + C := To_Unsigned_8 (Item (J)); + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + Store (C); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + -- For type character of course, the limit is 16#FF# in any case + + else + Store (2#110_00000# or Shift_Right (C, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + + -- Encode String in UTF-16 + + function Encode + (Item : String; + Output_BOM : Boolean := False) return UTF_16_Wide_String + is + Result : UTF_16_Wide_String + (1 .. Item'Length + Boolean'Pos (Output_BOM)); + -- Output is same length as input + possible BOM + + Len : Integer; + -- Length of output string + + C : Unsigned_8; + + begin + -- Output BOM if required + + if Output_BOM then + Result (1) := BOM_16 (1); + Len := 1; + else + Len := 0; + end if; + + -- Loop through input characters encoding them + + for Iptr in Item'Range loop + C := To_Unsigned_8 (Item (Iptr)); + + -- Codes in the range 16#0000#..16#00FF# are output unchanged. This + -- includes all possible cases of Character values. + + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + end loop; + + return Result; + end Encode; + +end Ada.Strings.UTF_Encoding.Strings; diff --git a/gcc/ada/a-suenst.ads b/gcc/ada/a-suenst.ads new file mode 100755 index 0000000..1706cd6 --- /dev/null +++ b/gcc/ada/a-suenst.ads @@ -0,0 +1,65 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.STRINGS -- +-- -- +-- S p e c -- +-- -- +-- This specification is derived from the Ada Reference Manual for use with -- +-- GNAT. In accordance with the copyright of that document, you can freely -- +-- copy and modify this specification, provided that if you redistribute a -- +-- modified version, any changes that you have made are clearly indicated. -- +-- -- +------------------------------------------------------------------------------ + +-- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding +-- and decoding String values using UTF encodings. Note: this package is +-- consistent with Ada 95, and may be included in Ada 95 implementations. + +package Ada.Strings.UTF_Encoding.Strings is + pragma Pure (Strings); + + -- The encoding routines take a String as input and encode the result + -- using the specified UTF encoding method. The result includes a BOM if + -- the Output_BOM argument is set to True. All 256 values of type Character + -- are valid, so Encoding_Error cannot be raised for string input data. + + function Encode + (Item : String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String; + -- Encode String using UTF-8, UTF-16LE or UTF-16BE encoding as specified by + -- the Output_Scheme parameter. + + function Encode + (Item : String; + Output_BOM : Boolean := False) return UTF_8_String; + -- Encode String using UTF-8 encoding + + function Encode + (Item : String; + Output_BOM : Boolean := False) return UTF_16_Wide_String; + -- Encode String using UTF_16 encoding + + -- The decoding routines take a UTF String as input, and return a decoded + -- Wide_String. If the UTF String starts with a BOM that matches the + -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error, + -- as does a code out of range of type Character. + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return String; + -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the + -- Input_Scheme parameter. It is decoded and returned as a String value. + -- Note: a convenient form for scheme may be Encoding (UTF_String). + + function Decode + (Item : UTF_8_String) return String; + -- The input is encoded in UTF-8 and returned as a String value + + function Decode + (Item : UTF_16_Wide_String) return String; + -- The input is encoded in UTF-16 and returned as a String value + +end Ada.Strings.UTF_Encoding.Strings; diff --git a/gcc/ada/a-suesen.adb b/gcc/ada/a-suesen.adb deleted file mode 100755 index 263e6ec..0000000 --- a/gcc/ada/a-suesen.adb +++ /dev/null @@ -1,341 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.STRING_ENCODING -- --- -- --- B o d y -- --- -- --- Copyright (C) 2010, Free Software Foundation, Inc. -- --- -- --- GNAT is free software; you can redistribute it and/or modify it under -- --- terms of the GNU General Public License as published by the Free Soft- -- --- ware Foundation; either version 3, or (at your option) any later ver- -- --- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- --- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- --- or FITNESS FOR A PARTICULAR PURPOSE. -- --- -- --- As a special exception under Section 7 of GPL version 3, you are granted -- --- additional permissions described in the GCC Runtime Library Exception, -- --- version 3.1, as published by the Free Software Foundation. -- --- -- --- You should have received a copy of the GNU General Public License and -- --- a copy of the GCC Runtime Library Exception along with this program; -- --- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- --- . -- --- -- --- GNAT was originally developed by the GNAT team at New York University. -- --- Extensive contributions were provided by Ada Core Technologies Inc. -- --- -- ------------------------------------------------------------------------------- - -package body Ada.Strings.UTF_Encoding.String_Encoding is - use Interfaces; - - ------------ - -- Decode -- - ------------ - - -- Decode UTF-8/UTF-16BE/UTF-16LE input to String - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return String - is - begin - if Input_Scheme = UTF_8 then - return Decode (Item); - else - return Decode (To_UTF_16 (Item, Input_Scheme)); - end if; - end Decode; - - -- Decode UTF-8 input to String - - function Decode (Item : UTF_8_String) return String is - Result : String (1 .. Item'Length); - -- Result string (worst case is same length as input) - - Len : Natural := 0; - -- Length of result stored so far - - Iptr : Natural; - -- Input Item pointer - - C : Unsigned_8; - R : Unsigned_16; - - procedure Get_Continuation; - -- Reads a continuation byte of the form 10xxxxxx, shifts R left - -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On - -- return Ptr is incremented. Raises exceptioon if continuation - -- byte does not exist or is invalid. - - ---------------------- - -- Get_Continuation -- - ---------------------- - - procedure Get_Continuation is - begin - if Iptr > Item'Last then - Raise_Encoding_Error (Iptr - 1); - - else - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - if C not in 2#10_000000# .. 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - else - R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#); - end if; - end if; - end Get_Continuation; - - -- Start of processing for Decode - - begin - Iptr := Item'First; - - -- Skip BOM at start - - if Item'Length >= 3 - and then Item (Iptr .. Iptr + 2) = BOM_8 - then - Iptr := Iptr + 3; - - -- Error if bad BOM - - elsif Item'Length >= 2 - and then (Item (Iptr .. Iptr + 1) = BOM_16BE - or else - Item (Iptr .. Iptr + 1) = BOM_16LE) - then - Raise_Encoding_Error (Iptr); - end if; - - while Iptr <= Item'Last loop - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#00# - 16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - R := Unsigned_16 (C); - - -- No initial code can be of the form 10xxxxxx. Such codes are used - -- only for continuations. - - elsif C <= 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - - -- Codes in the range 16#80# - 16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - elsif C <= 2#110_11111# then - R := Unsigned_16 (C and 2#000_11111#); - Get_Continuation; - - -- Codes in the range 16#800# - 16#FFFF# are represented as - -- 1110yyyy 10yyyyxx 10xxxxxx - - -- Such codes are out of range for type Character - - -- Codes in the range 16#10000# - 16#10FFFF# are represented as - -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx - - -- Such codes are out of range for Wide_String output - - -- Thus all remaining cases raise Encoding_Error - - else - Raise_Encoding_Error (Iptr - 1); - end if; - - Len := Len + 1; - Result (Len) := Character'Val (R); - end loop; - - return Result (1 .. Len); - end Decode; - - -- Decode UTF-16 input to String - - function Decode (Item : UTF_16_Wide_String) return String is - Result : String (1 .. Item'Length); - -- Result is same length as input (possibly minus 1 if BOM present) - - Len : Natural := 0; - -- Length of result - - Iptr : Natural; - -- Index of next Item element - - C : Unsigned_16; - - begin - -- Skip UTF-16 BOM at start - - Iptr := Item'First; - - if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then - Iptr := Iptr + 1; - end if; - - -- Loop through input characters - - while Iptr <= Item'Last loop - C := To_Unsigned_16 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#0000#..16#00FF# represent their own value - - if C <= 16#00FF# then - Len := Len + 1; - Result (Len) := Character'Val (C); - - -- All other codes are invalid, either they are invalid UTF-16 - -- encoding sequences, or they represent values that are out of - -- range for type Character. - - else - Raise_Encoding_Error (Iptr - 1); - end if; - end loop; - - return Result (1 .. Len); - end Decode; - - ------------ - -- Encode -- - ------------ - - -- Encode String in UTF-8, UTF-16BE or UTF-16LE - - function Encode - (Item : String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String - is - begin - -- Case of UTF_8 - - if Output_Scheme = UTF_8 then - return Encode (Item, Output_BOM); - - -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary - - else - return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)), - Output_Scheme, Output_BOM); - end if; - end Encode; - - -- Encode String in UTF-8 - - function Encode - (Item : String; - Output_BOM : Boolean := False) return UTF_8_String - is - Result : UTF_8_String (1 .. 3 * Item'Length + 3); - -- Worst case is three bytes per input byte + space for BOM - - Len : Natural; - -- Number of output codes stored in Result - - C : Unsigned_8; - -- Single input character - - procedure Store (C : Unsigned_8); - pragma Inline (Store); - -- Store one output code, C is in the range 0 .. 255 - - ----------- - -- Store -- - ----------- - - procedure Store (C : Unsigned_8) is - begin - Len := Len + 1; - Result (Len) := Character'Val (C); - end Store; - - -- Start of processing for UTF8_Encode - - begin - -- Output BOM if required - - if Output_BOM then - Result (1 .. 3) := BOM_8; - Len := 3; - else - Len := 0; - end if; - - -- Loop through characters of input - - for J in Item'Range loop - C := To_Unsigned_8 (Item (J)); - - -- Codes in the range 16#00# - 16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - Store (C); - - -- Codes in the range 16#80# - 16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - -- For type character of course, the limit is 16#FF# in any case - - else - Store (2#110_00000# or Shift_Right (C, 6)); - Store (2#10_000000# or (C and 2#00_111111#)); - end if; - end loop; - - return Result (1 .. Len); - end Encode; - - -- Encode String in UTF-16 - - function Encode - (Item : String; - Output_BOM : Boolean := False) return UTF_16_Wide_String - is - Result : UTF_16_Wide_String - (1 .. Item'Length + Boolean'Pos (Output_BOM)); - -- Output is same length as input + possible BOM - - Len : Integer; - -- Length of output string - - C : Unsigned_8; - - begin - -- Output BOM if required - - if Output_BOM then - Result (1) := BOM_16 (1); - Len := 1; - else - Len := 0; - end if; - - -- Loop through input characters encoding them - - for Iptr in Item'Range loop - C := To_Unsigned_8 (Item (Iptr)); - - -- Codes in the range 16#0000#..16#00FF# are output unchanged. This - -- includes all possible cases of Character values. - - Len := Len + 1; - Result (Len) := Wide_Character'Val (C); - end loop; - - return Result; - end Encode; - -end Ada.Strings.UTF_Encoding.String_Encoding; diff --git a/gcc/ada/a-suesen.ads b/gcc/ada/a-suesen.ads deleted file mode 100755 index a8f913e..0000000 --- a/gcc/ada/a-suesen.ads +++ /dev/null @@ -1,65 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.STRING_ENCODING -- --- -- --- S p e c -- --- -- --- This specification is derived from the Ada Reference Manual for use with -- --- GNAT. In accordance with the copyright of that document, you can freely -- --- copy and modify this specification, provided that if you redistribute a -- --- modified version, any changes that you have made are clearly indicated. -- --- -- ------------------------------------------------------------------------------- - --- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding --- and decoding String values using UTF encodings. Note: this package is --- consistent with Ada 95, and may be included in Ada 95 implementations. - -package Ada.Strings.UTF_Encoding.String_Encoding is - pragma Pure (String_Encoding); - - -- The encoding routines take a String as input and encode the result - -- using the specified UTF encoding method. The result includes a BOM if - -- the Output_BOM argument is set to True. All 256 values of type Character - -- are valid, so Encoding_Error cannot be raised for string input data. - - function Encode - (Item : String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String; - -- Encode String using UTF-8, UTF-16LE or UTF-16BE encoding as specified by - -- the Output_Scheme parameter. - - function Encode - (Item : String; - Output_BOM : Boolean := False) return UTF_8_String; - -- Encode String using UTF-8 encoding - - function Encode - (Item : String; - Output_BOM : Boolean := False) return UTF_16_Wide_String; - -- Encode String using UTF_16 encoding - - -- The decoding routines take a UTF String as input, and return a decoded - -- Wide_String. If the UTF String starts with a BOM that matches the - -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error, - -- as does a code out of range of type Character. - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return String; - -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the - -- Input_Scheme parameter. It is decoded and returned as a String value. - -- Note: a convenient form for scheme may be Encoding (UTF_String). - - function Decode - (Item : UTF_8_String) return String; - -- The input is encoded in UTF-8 and returned as a String value - - function Decode - (Item : UTF_16_Wide_String) return String; - -- The input is encoded in UTF-16 and returned as a String value - -end Ada.Strings.UTF_Encoding.String_Encoding; diff --git a/gcc/ada/a-suewse.adb b/gcc/ada/a-suewse.adb deleted file mode 100755 index 1b697b9..0000000 --- a/gcc/ada/a-suewse.adb +++ /dev/null @@ -1,370 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.WIDE_STRING_ENCODING -- --- -- --- B o d y -- --- -- --- Copyright (C) 2010, Free Software Foundation, Inc. -- --- -- --- GNAT is free software; you can redistribute it and/or modify it under -- --- terms of the GNU General Public License as published by the Free Soft- -- --- ware Foundation; either version 3, or (at your option) any later ver- -- --- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- --- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- --- or FITNESS FOR A PARTICULAR PURPOSE. -- --- -- --- As a special exception under Section 7 of GPL version 3, you are granted -- --- additional permissions described in the GCC Runtime Library Exception, -- --- version 3.1, as published by the Free Software Foundation. -- --- -- --- You should have received a copy of the GNU General Public License and -- --- a copy of the GCC Runtime Library Exception along with this program; -- --- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- --- . -- --- -- --- GNAT was originally developed by the GNAT team at New York University. -- --- Extensive contributions were provided by Ada Core Technologies Inc. -- --- -- ------------------------------------------------------------------------------- - -package body Ada.Strings.UTF_Encoding.Wide_String_Encoding is - use Interfaces; - - ------------ - -- Decode -- - ------------ - - -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_String - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return Wide_String - is - begin - if Input_Scheme = UTF_8 then - return Decode (Item); - else - return Decode (To_UTF_16 (Item, Input_Scheme)); - end if; - end Decode; - - -- Decode UTF-8 input to Wide_String - - function Decode (Item : UTF_8_String) return Wide_String is - Result : Wide_String (1 .. Item'Length); - -- Result string (worst case is same length as input) - - Len : Natural := 0; - -- Length of result stored so far - - Iptr : Natural; - -- Input Item pointer - - C : Unsigned_8; - R : Unsigned_16; - - procedure Get_Continuation; - -- Reads a continuation byte of the form 10xxxxxx, shifts R left - -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On - -- return Ptr is incremented. Raises exceptioon if continuation - -- byte does not exist or is invalid. - - ---------------------- - -- Get_Continuation -- - ---------------------- - - procedure Get_Continuation is - begin - if Iptr > Item'Last then - Raise_Encoding_Error (Iptr - 1); - - else - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - if C not in 2#10_000000# .. 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - else - R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#); - end if; - end if; - end Get_Continuation; - - -- Start of processing for Decode - - begin - Iptr := Item'First; - - -- Skip BOM at start - - if Item'Length >= 3 - and then Item (Iptr .. Iptr + 2) = BOM_8 - then - Iptr := Iptr + 3; - - -- Error if bad BOM - - elsif Item'Length >= 2 - and then (Item (Iptr .. Iptr + 1) = BOM_16BE - or else - Item (Iptr .. Iptr + 1) = BOM_16LE) - then - Raise_Encoding_Error (Iptr); - end if; - - while Iptr <= Item'Last loop - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#00# - 16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - R := Unsigned_16 (C); - - -- No initial code can be of the form 10xxxxxx. Such codes are used - -- only for continuations. - - elsif C <= 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - - -- Codes in the range 16#80# - 16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - elsif C <= 2#110_11111# then - R := Unsigned_16 (C and 2#000_11111#); - Get_Continuation; - - -- Codes in the range 16#800# - 16#FFFF# are represented as - -- 1110yyyy 10yyyyxx 10xxxxxx - - elsif C <= 2#1110_1111# then - R := Unsigned_16 (C and 2#0000_1111#); - Get_Continuation; - Get_Continuation; - - -- Codes in the range 16#10000# - 16#10FFFF# are represented as - -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx - - -- Such codes are out of range for Wide_String output - - else - Raise_Encoding_Error (Iptr - 1); - end if; - - Len := Len + 1; - Result (Len) := Wide_Character'Val (R); - end loop; - - return Result (1 .. Len); - end Decode; - - -- Decode UTF-16 input to Wide_String - - function Decode (Item : UTF_16_Wide_String) return Wide_String is - Result : Wide_String (1 .. Item'Length); - -- Result is same length as input (possibly minus 1 if BOM present) - - Len : Natural := 0; - -- Length of result - - Iptr : Natural; - -- Index of next Item element - - C : Unsigned_16; - - begin - -- Skip UTF-16 BOM at start - - Iptr := Item'First; - - if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then - Iptr := Iptr + 1; - end if; - - -- Loop through input characters - - while Iptr <= Item'Last loop - C := To_Unsigned_16 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# - -- represent their own value. - - if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then - Len := Len + 1; - Result (Len) := Wide_Character'Val (C); - - -- Codes in the range 16#D800#..16#DBFF# represent the first of the - -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". - -- Such codes are out of range for 16-bit output. - - -- The case of input in the range 16#DC00#..16#DFFF# must never - -- occur, since it means we have a second surrogate character with - -- no corresponding first surrogate. - - -- Codes in the range 16#FFFE# .. 16#FFFF# are also invalid since - -- they conflict with codes used for BOM values. - - -- Thus all remaining codes are invalid - - else - Raise_Encoding_Error (Iptr - 1); - end if; - end loop; - - return Result (1 .. Len); - end Decode; - - ------------ - -- Encode -- - ------------ - - -- Encode Wide_String in UTF-8, UTF-16BE or UTF-16LE - - function Encode - (Item : Wide_String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String - is - begin - -- Case of UTF_8 - - if Output_Scheme = UTF_8 then - return Encode (Item, Output_BOM); - - -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary - - else - return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)), - Output_Scheme, Output_BOM); - end if; - end Encode; - - -- Encode Wide_String in UTF-8 - - function Encode - (Item : Wide_String; - Output_BOM : Boolean := False) return UTF_8_String - is - Result : UTF_8_String (1 .. 3 * Item'Length + 3); - -- Worst case is three bytes per input byte + space for BOM - - Len : Natural; - -- Number of output codes stored in Result - - C : Unsigned_16; - -- Single input character - - procedure Store (C : Unsigned_16); - pragma Inline (Store); - -- Store one output code, C is in the range 0 .. 255 - - ----------- - -- Store -- - ----------- - - procedure Store (C : Unsigned_16) is - begin - Len := Len + 1; - Result (Len) := Character'Val (C); - end Store; - - -- Start of processing for UTF8_Encode - - begin - -- Output BOM if required - - if Output_BOM then - Result (1 .. 3) := BOM_8; - Len := 3; - else - Len := 0; - end if; - - -- Loop through characters of input - - for J in Item'Range loop - C := To_Unsigned_16 (Item (J)); - - -- Codes in the range 16#00# - 16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - Store (C); - - -- Codes in the range 16#80# - 16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - elsif C <= 16#7FF# then - Store (2#110_00000# or Shift_Right (C, 6)); - Store (2#10_000000# or (C and 2#00_111111#)); - - -- Codes in the range 16#800# - 16#FFFF# are represented as - -- 1110yyyy 10yyyyxx 10xxxxxx - - else - Store (2#1110_0000# or Shift_Right (C, 12)); - Store (2#10_000000# or - Shift_Right (C and 2#111111_000000#, 6)); - Store (2#10_000000# or (C and 2#00_111111#)); - end if; - end loop; - - return Result (1 .. Len); - end Encode; - - -- Encode Wide_String in UTF-16 - - function Encode - (Item : Wide_String; - Output_BOM : Boolean := False) return UTF_16_Wide_String - is - Result : UTF_16_Wide_String - (1 .. Item'Length + Boolean'Pos (Output_BOM)); - -- Output is same length as input + possible BOM - - Len : Integer; - -- Length of output string - - C : Unsigned_16; - - begin - -- Output BOM if required - - if Output_BOM then - Result (1) := BOM_16 (1); - Len := 1; - else - Len := 0; - end if; - - -- Loop through input characters encoding them - - for Iptr in Item'Range loop - C := To_Unsigned_16 (Item (Iptr)); - - -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# are - -- output unchanged. - - if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then - Len := Len + 1; - Result (Len) := Wide_Character'Val (C); - - -- Codes in tne range 16#D800#..16#DFFF# should never appear in the - -- input, since no valid Unicode characters are in this range (which - -- would conflict with the UTF-16 surrogate encodings). Similarly - -- codes in the range 16#FFFE#..16#FFFF conflict with BOM codes. - -- Thus all remaining codes are illegal. - - else - Raise_Encoding_Error (Iptr); - end if; - end loop; - - return Result; - end Encode; - -end Ada.Strings.UTF_Encoding.Wide_String_Encoding; diff --git a/gcc/ada/a-suewse.ads b/gcc/ada/a-suewse.ads deleted file mode 100755 index c013bad..0000000 --- a/gcc/ada/a-suewse.ads +++ /dev/null @@ -1,67 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.WIDE_STRING_ENCODING -- --- -- --- S p e c -- --- -- --- This specification is derived from the Ada Reference Manual for use with -- --- GNAT. In accordance with the copyright of that document, you can freely -- --- copy and modify this specification, provided that if you redistribute a -- --- modified version, any changes that you have made are clearly indicated. -- --- -- ------------------------------------------------------------------------------- - --- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding --- and decoding Wide_String values using UTF encodings. Note: this package is --- consistent with Ada 95, and may be included in Ada 95 implementations. - -package Ada.Strings.UTF_Encoding.Wide_String_Encoding is - pragma Pure (Wide_String_Encoding); - - -- The encoding routines take a Wide_String as input and encode the result - -- using the specified UTF encoding method. The result includes a BOM if - -- the Output_BOM argument is set to True. Encoding_Error is raised if an - -- invalid character appears in the input. In particular the characters - -- in the range 16#D800# .. 16#DFFF# are invalid because they conflict - -- with UTF-16 surrogate encodings, and the characters 16#FFFE# and - -- 16#FFFF# are also invalid because they conflict with BOM codes. - - function Encode - (Item : Wide_String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String; - -- Encode Wide_String using UTF-8, UTF-16LE or UTF-16BE encoding as - -- specified by the Output_Scheme parameter. - - function Encode - (Item : Wide_String; - Output_BOM : Boolean := False) return UTF_8_String; - -- Encode Wide_String using UTF-8 encoding - - function Encode - (Item : Wide_String; - Output_BOM : Boolean := False) return UTF_16_Wide_String; - -- Encode Wide_String using UTF_16 encoding - - -- The decoding routines take a UTF String as input, and return a decoded - -- Wide_String. If the UTF String starts with a BOM that matches the - -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error. - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return Wide_String; - -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the - -- Input_Scheme parameter. It is decoded and returned as a Wide_String - -- value. Note: a convenient form for scheme may be Encoding (UTF_String). - - function Decode - (Item : UTF_8_String) return Wide_String; - -- The input is encoded in UTF-8 and returned as a Wide_String value - - function Decode - (Item : UTF_16_Wide_String) return Wide_String; - -- The input is encoded in UTF-16 and returned as a Wide_String value - -end Ada.Strings.UTF_Encoding.Wide_String_Encoding; diff --git a/gcc/ada/a-suewst.adb b/gcc/ada/a-suewst.adb new file mode 100755 index 0000000..be8677a --- /dev/null +++ b/gcc/ada/a-suewst.adb @@ -0,0 +1,370 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_STRINGS -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +package body Ada.Strings.UTF_Encoding.Wide_Strings is + use Interfaces; + + ------------ + -- Decode -- + ------------ + + -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_String + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_String + is + begin + if Input_Scheme = UTF_8 then + return Decode (Item); + else + return Decode (To_UTF_16 (Item, Input_Scheme)); + end if; + end Decode; + + -- Decode UTF-8 input to Wide_String + + function Decode (Item : UTF_8_String) return Wide_String is + Result : Wide_String (1 .. Item'Length); + -- Result string (worst case is same length as input) + + Len : Natural := 0; + -- Length of result stored so far + + Iptr : Natural; + -- Input Item pointer + + C : Unsigned_8; + R : Unsigned_16; + + procedure Get_Continuation; + -- Reads a continuation byte of the form 10xxxxxx, shifts R left + -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On + -- return Ptr is incremented. Raises exceptioon if continuation + -- byte does not exist or is invalid. + + ---------------------- + -- Get_Continuation -- + ---------------------- + + procedure Get_Continuation is + begin + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + else + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 2#10_000000# .. 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + else + R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#); + end if; + end if; + end Get_Continuation; + + -- Start of processing for Decode + + begin + Iptr := Item'First; + + -- Skip BOM at start + + if Item'Length >= 3 + and then Item (Iptr .. Iptr + 2) = BOM_8 + then + Iptr := Iptr + 3; + + -- Error if bad BOM + + elsif Item'Length >= 2 + and then (Item (Iptr .. Iptr + 1) = BOM_16BE + or else + Item (Iptr .. Iptr + 1) = BOM_16LE) + then + Raise_Encoding_Error (Iptr); + end if; + + while Iptr <= Item'Last loop + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + R := Unsigned_16 (C); + + -- No initial code can be of the form 10xxxxxx. Such codes are used + -- only for continuations. + + elsif C <= 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 2#110_11111# then + R := Unsigned_16 (C and 2#000_11111#); + Get_Continuation; + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#1110_1111# then + R := Unsigned_16 (C and 2#0000_1111#); + Get_Continuation; + Get_Continuation; + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + -- Such codes are out of range for Wide_String output + + else + Raise_Encoding_Error (Iptr - 1); + end if; + + Len := Len + 1; + Result (Len) := Wide_Character'Val (R); + end loop; + + return Result (1 .. Len); + end Decode; + + -- Decode UTF-16 input to Wide_String + + function Decode (Item : UTF_16_Wide_String) return Wide_String is + Result : Wide_String (1 .. Item'Length); + -- Result is same length as input (possibly minus 1 if BOM present) + + Len : Natural := 0; + -- Length of result + + Iptr : Natural; + -- Index of next Item element + + C : Unsigned_16; + + begin + -- Skip UTF-16 BOM at start + + Iptr := Item'First; + + if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then + Iptr := Iptr + 1; + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# + -- represent their own value. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in the range 16#D800#..16#DBFF# represent the first of the + -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". + -- Such codes are out of range for 16-bit output. + + -- The case of input in the range 16#DC00#..16#DFFF# must never + -- occur, since it means we have a second surrogate character with + -- no corresponding first surrogate. + + -- Codes in the range 16#FFFE# .. 16#FFFF# are also invalid since + -- they conflict with codes used for BOM values. + + -- Thus all remaining codes are invalid + + else + Raise_Encoding_Error (Iptr - 1); + end if; + end loop; + + return Result (1 .. Len); + end Decode; + + ------------ + -- Encode -- + ------------ + + -- Encode Wide_String in UTF-8, UTF-16BE or UTF-16LE + + function Encode + (Item : Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String + is + begin + -- Case of UTF_8 + + if Output_Scheme = UTF_8 then + return Encode (Item, Output_BOM); + + -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary + + else + return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)), + Output_Scheme, Output_BOM); + end if; + end Encode; + + -- Encode Wide_String in UTF-8 + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_8_String + is + Result : UTF_8_String (1 .. 3 * Item'Length + 3); + -- Worst case is three bytes per input byte + space for BOM + + Len : Natural; + -- Number of output codes stored in Result + + C : Unsigned_16; + -- Single input character + + procedure Store (C : Unsigned_16); + pragma Inline (Store); + -- Store one output code, C is in the range 0 .. 255 + + ----------- + -- Store -- + ----------- + + procedure Store (C : Unsigned_16) is + begin + Len := Len + 1; + Result (Len) := Character'Val (C); + end Store; + + -- Start of processing for UTF8_Encode + + begin + -- Output BOM if required + + if Output_BOM then + Result (1 .. 3) := BOM_8; + Len := 3; + else + Len := 0; + end if; + + -- Loop through characters of input + + for J in Item'Range loop + C := To_Unsigned_16 (Item (J)); + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + Store (C); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 16#7FF# then + Store (2#110_00000# or Shift_Right (C, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + else + Store (2#1110_0000# or Shift_Right (C, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + + -- Encode Wide_String in UTF-16 + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String + is + Result : UTF_16_Wide_String + (1 .. Item'Length + Boolean'Pos (Output_BOM)); + -- Output is same length as input + possible BOM + + Len : Integer; + -- Length of output string + + C : Unsigned_16; + + begin + -- Output BOM if required + + if Output_BOM then + Result (1) := BOM_16 (1); + Len := 1; + else + Len := 0; + end if; + + -- Loop through input characters encoding them + + for Iptr in Item'Range loop + C := To_Unsigned_16 (Item (Iptr)); + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# are + -- output unchanged. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in tne range 16#D800#..16#DFFF# should never appear in the + -- input, since no valid Unicode characters are in this range (which + -- would conflict with the UTF-16 surrogate encodings). Similarly + -- codes in the range 16#FFFE#..16#FFFF conflict with BOM codes. + -- Thus all remaining codes are illegal. + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result; + end Encode; + +end Ada.Strings.UTF_Encoding.Wide_Strings; diff --git a/gcc/ada/a-suewst.ads b/gcc/ada/a-suewst.ads new file mode 100755 index 0000000..e0f8d4c --- /dev/null +++ b/gcc/ada/a-suewst.ads @@ -0,0 +1,67 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_STRINGS -- +-- -- +-- S p e c -- +-- -- +-- This specification is derived from the Ada Reference Manual for use with -- +-- GNAT. In accordance with the copyright of that document, you can freely -- +-- copy and modify this specification, provided that if you redistribute a -- +-- modified version, any changes that you have made are clearly indicated. -- +-- -- +------------------------------------------------------------------------------ + +-- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding +-- and decoding Wide_String values using UTF encodings. Note: this package is +-- consistent with Ada 95, and may be included in Ada 95 implementations. + +package Ada.Strings.UTF_Encoding.Wide_Strings is + pragma Pure (Wide_Strings); + + -- The encoding routines take a Wide_String as input and encode the result + -- using the specified UTF encoding method. The result includes a BOM if + -- the Output_BOM argument is set to True. Encoding_Error is raised if an + -- invalid character appears in the input. In particular the characters + -- in the range 16#D800# .. 16#DFFF# are invalid because they conflict + -- with UTF-16 surrogate encodings, and the characters 16#FFFE# and + -- 16#FFFF# are also invalid because they conflict with BOM codes. + + function Encode + (Item : Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String; + -- Encode Wide_String using UTF-8, UTF-16LE or UTF-16BE encoding as + -- specified by the Output_Scheme parameter. + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_8_String; + -- Encode Wide_String using UTF-8 encoding + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String; + -- Encode Wide_String using UTF_16 encoding + + -- The decoding routines take a UTF String as input, and return a decoded + -- Wide_String. If the UTF String starts with a BOM that matches the + -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error. + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_String; + -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the + -- Input_Scheme parameter. It is decoded and returned as a Wide_String + -- value. Note: a convenient form for scheme may be Encoding (UTF_String). + + function Decode + (Item : UTF_8_String) return Wide_String; + -- The input is encoded in UTF-8 and returned as a Wide_String value + + function Decode + (Item : UTF_16_Wide_String) return Wide_String; + -- The input is encoded in UTF-16 and returned as a Wide_String value + +end Ada.Strings.UTF_Encoding.Wide_Strings; diff --git a/gcc/ada/a-suezse.adb b/gcc/ada/a-suezse.adb deleted file mode 100755 index 2aaf6b8..0000000 --- a/gcc/ada/a-suezse.adb +++ /dev/null @@ -1,429 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRING_ENCODING -- --- -- --- B o d y -- --- -- --- Copyright (C) 2010, Free Software Foundation, Inc. -- --- -- --- GNAT is free software; you can redistribute it and/or modify it under -- --- terms of the GNU General Public License as published by the Free Soft- -- --- ware Foundation; either version 3, or (at your option) any later ver- -- --- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- --- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- --- or FITNESS FOR A PARTICULAR PURPOSE. -- --- -- --- As a special exception under Section 7 of GPL version 3, you are granted -- --- additional permissions described in the GCC Runtime Library Exception, -- --- version 3.1, as published by the Free Software Foundation. -- --- -- --- You should have received a copy of the GNU General Public License and -- --- a copy of the GCC Runtime Library Exception along with this program; -- --- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- --- . -- --- -- --- GNAT was originally developed by the GNAT team at New York University. -- --- Extensive contributions were provided by Ada Core Technologies Inc. -- --- -- ------------------------------------------------------------------------------- - -package body Ada.Strings.UTF_Encoding.Wide_Wide_String_Encoding is - use Interfaces; - - ------------ - -- Decode -- - ------------ - - -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_Wide_String - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return Wide_Wide_String - is - begin - if Input_Scheme = UTF_8 then - return Decode (Item); - else - return Decode (To_UTF_16 (Item, Input_Scheme)); - end if; - end Decode; - - -- Decode UTF-8 input to Wide_Wide_String - - function Decode (Item : UTF_8_String) return Wide_Wide_String is - Result : Wide_Wide_String (1 .. Item'Length); - -- Result string (worst case is same length as input) - - Len : Natural := 0; - -- Length of result stored so far - - Iptr : Natural; - -- Input string pointer - - C : Unsigned_8; - R : Unsigned_32; - - procedure Get_Continuation; - -- Reads a continuation byte of the form 10xxxxxx, shifts R left - -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On - -- return Ptr is incremented. Raises exceptioon if continuation - -- byte does not exist or is invalid. - - ---------------------- - -- Get_Continuation -- - ---------------------- - - procedure Get_Continuation is - begin - if Iptr > Item'Last then - Raise_Encoding_Error (Iptr - 1); - - else - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - if C not in 2#10_000000# .. 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - else - R := Shift_Left (R, 6) or Unsigned_32 (C and 2#00_111111#); - end if; - end if; - end Get_Continuation; - - -- Start of processing for Decode - - begin - Iptr := Item'First; - - -- Skip BOM at start - - if Item'Length >= 3 - and then Item (Iptr .. Iptr + 2) = BOM_8 - then - Iptr := Iptr + 3; - - -- Error if bad BOM - - elsif Item'Length >= 2 - and then (Item (Iptr .. Iptr + 1) = BOM_16BE - or else - Item (Iptr .. Iptr + 1) = BOM_16LE) - then - Raise_Encoding_Error (Iptr); - end if; - - -- Loop through input characters - - while Iptr <= Item'Last loop - C := To_Unsigned_8 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#00# - 16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - R := Unsigned_32 (C); - - -- No initial code can be of the form 10xxxxxx. Such codes are used - -- only for continuations. - - elsif C <= 2#10_111111# then - Raise_Encoding_Error (Iptr - 1); - - -- Codes in the range 16#80# - 16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - elsif C <= 2#110_11111# then - R := Unsigned_32 (C and 2#000_11111#); - Get_Continuation; - - -- Codes in the range 16#800# - 16#FFFF# are represented as - -- 1110yyyy 10yyyyxx 10xxxxxx - - elsif C <= 2#1110_1111# then - R := Unsigned_32 (C and 2#0000_1111#); - Get_Continuation; - Get_Continuation; - - -- Codes in the range 16#10000# - 16#10FFFF# are represented as - -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx - - elsif C <= 2#11110_111# then - R := Unsigned_32 (C and 2#00000_111#); - Get_Continuation; - Get_Continuation; - Get_Continuation; - - -- Any other code is an error - - else - Raise_Encoding_Error (Iptr - 1); - end if; - - Len := Len + 1; - Result (Len) := Wide_Wide_Character'Val (R); - end loop; - - return Result (1 .. Len); - end Decode; - - -- Decode UTF-16 input to Wide_Wide_String - - function Decode (Item : UTF_16_Wide_String) return Wide_Wide_String is - Result : Wide_Wide_String (1 .. Item'Length); - -- Result cannot be longer than the input string - - Len : Natural := 0; - -- Length of result - - Iptr : Natural; - -- Pointer to next element in Item - - C : Unsigned_16; - R : Unsigned_32; - - begin - -- Skip UTF-16 BOM at start - - Iptr := Item'First; - - if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then - Iptr := Iptr + 1; - end if; - - -- Loop through input characters - - while Iptr <= Item'Last loop - C := To_Unsigned_16 (Item (Iptr)); - Iptr := Iptr + 1; - - -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# - -- represent their own value. - - if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then - Len := Len + 1; - Result (Len) := Wide_Wide_Character'Val (C); - - -- Codes in the range 16#D800#..16#DBFF# represent the first of the - -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". - -- The first surrogate provides 10 high order bits of the result. - - elsif C <= 16#DBFF# then - R := Shift_Left ((Unsigned_32 (C) - 16#D800#), 10); - - -- Error if at end of string - - if Iptr > Item'Last then - Raise_Encoding_Error (Iptr - 1); - - -- Otherwise next character must be valid low order surrogate - -- which provides the low 10 order bits of the result. - - else - C := To_Unsigned_16 (Item (Iptr)); - Iptr := Iptr + 1; - - if C not in 16#DC00# .. 16#DFFF# then - Raise_Encoding_Error (Iptr - 1); - - else - R := R or (Unsigned_32 (C) mod 2 ** 10); - - -- The final adjustment is to add 16#01_0000 to get the - -- result back in the required 21 bit range. - - R := R + 16#01_0000#; - Len := Len + 1; - Result (Len) := Wide_Wide_Character'Val (R); - end if; - end if; - - -- Remaining codes are invalid - - else - Raise_Encoding_Error (Iptr - 1); - end if; - end loop; - - return Result (1 .. Len); - end Decode; - - ------------ - -- Encode -- - ------------ - - -- Encode Wide_Wide_String in UTF-8, UTF-16BE or UTF-16LE - - function Encode - (Item : Wide_Wide_String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String - is - begin - if Output_Scheme = UTF_8 then - return Encode (Item, Output_BOM); - else - return From_UTF_16 (Encode (Item), Output_Scheme, Output_BOM); - end if; - end Encode; - - -- Encode Wide_Wide_String in UTF-8 - - function Encode - (Item : Wide_Wide_String; - Output_BOM : Boolean := False) return UTF_8_String - is - Result : String (1 .. 4 * Item'Length + 3); - -- Worst case is four bytes per input byte + space for BOM - - Len : Natural; - -- Number of output codes stored in Result - - C : Unsigned_32; - -- Single input character - - procedure Store (C : Unsigned_32); - pragma Inline (Store); - -- Store one output code (input is in range 0 .. 255) - - ----------- - -- Store -- - ----------- - - procedure Store (C : Unsigned_32) is - begin - Len := Len + 1; - Result (Len) := Character'Val (C); - end Store; - - -- Start of processing for Encode - - begin - -- Output BOM if required - - if Output_BOM then - Result (1 .. 3) := BOM_8; - Len := 3; - else - Len := 0; - end if; - - -- Loop through characters of input - - for Iptr in Item'Range loop - C := To_Unsigned_32 (Item (Iptr)); - - -- Codes in the range 16#00#..16#7F# are represented as - -- 0xxxxxxx - - if C <= 16#7F# then - Store (C); - - -- Codes in the range 16#80#..16#7FF# are represented as - -- 110yyyxx 10xxxxxx - - elsif C <= 16#7FF# then - Store (2#110_00000# or Shift_Right (C, 6)); - Store (2#10_000000# or (C and 2#00_111111#)); - - -- Codes in the range 16#800#..16#D7FF# or 16#E000#..16#FFFD# are - -- represented as - -- 1110yyyy 10yyyyxx 10xxxxxx - - elsif C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then - Store (2#1110_0000# or Shift_Right (C, 12)); - Store (2#10_000000# or - Shift_Right (C and 2#111111_000000#, 6)); - Store (2#10_000000# or (C and 2#00_111111#)); - - -- Codes in the range 16#10000# - 16#10FFFF# are represented as - -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx - - elsif C in 16#1_0000# .. 16#10_FFFF# then - Store (2#11110_000# or - Shift_Right (C, 18)); - Store (2#10_000000# or - Shift_Right (C and 2#111111_000000_000000#, 12)); - Store (2#10_000000# or - Shift_Right (C and 2#111111_000000#, 6)); - Store (2#10_000000# or - (C and 2#00_111111#)); - - -- All other codes are invalid - - else - Raise_Encoding_Error (Iptr); - end if; - end loop; - - return Result (1 .. Len); - end Encode; - - -- Encode Wide_Wide_String in UTF-16 - - function Encode - (Item : Wide_Wide_String; - Output_BOM : Boolean := False) return UTF_16_Wide_String - is - Result : UTF_16_Wide_String (1 .. 2 * Item'Length + 1); - -- Worst case is each input character generates two output characters - -- plus one for possible BOM. - - Len : Integer; - -- Length of output string - - C : Unsigned_32; - - begin - -- Output BOM if needed - - if Output_BOM then - Result (1) := BOM_16 (1); - Len := 1; - else - Len := 0; - end if; - - -- Loop through input characters encoding them - - for Iptr in Item'Range loop - C := To_Unsigned_32 (Item (Iptr)); - - -- Codes in the range 16#00_0000#..16#00_D7FF# or 16#E000#..16#FFFD# - -- are output unchanged - - if C <= 16#00_D7FF# or else C in 16#E000# .. 16#FFFD# then - Len := Len + 1; - Result (Len) := Wide_Character'Val (C); - - -- Codes in the range 16#01_0000#..16#10_FFFF# are output using two - -- surrogate characters. First 16#1_0000# is subtracted from the code - -- point to give a 20-bit value. This is then split into two separate - -- 10-bit values each of which is represented as a surrogate with the - -- most significant half placed in the first surrogate. The ranges of - -- values used for the two surrogates are 16#D800#-16#DBFF# for the - -- first, most significant surrogate and 16#DC00#-16#DFFF# for the - -- second, least significant surrogate. - - elsif C in 16#1_0000# .. 16#10_FFFF# then - C := C - 16#1_0000#; - - Len := Len + 1; - Result (Len) := Wide_Character'Val (16#D800# + C / 2 ** 10); - - Len := Len + 1; - Result (Len) := Wide_Character'Val (16#DC00# + C mod 2 ** 10); - - -- All other codes are invalid - - else - Raise_Encoding_Error (Iptr); - end if; - end loop; - - return Result (1 .. Len); - end Encode; - -end Ada.Strings.UTF_Encoding.Wide_Wide_String_Encoding; diff --git a/gcc/ada/a-suezse.ads b/gcc/ada/a-suezse.ads deleted file mode 100755 index 1882f42..0000000 --- a/gcc/ada/a-suezse.ads +++ /dev/null @@ -1,64 +0,0 @@ ------------------------------------------------------------------------------- --- -- --- GNAT RUN-TIME COMPONENTS -- --- -- --- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRING_ENCODING -- --- -- --- S p e c -- --- -- --- This specification is derived from the Ada Reference Manual for use with -- --- GNAT. In accordance with the copyright of that document, you can freely -- --- copy and modify this specification, provided that if you redistribute a -- --- modified version, any changes that you have made are clearly indicated. -- --- -- ------------------------------------------------------------------------------- - --- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding --- and decoding Wide_String values using UTF encodings. Note: this package is --- consistent with Ada 2005, and may be used in Ada 2005 mode, but cannot be --- used in Ada 95 mode, since Wide_Wide_Character is an Ada 2005 feature. - -package Ada.Strings.UTF_Encoding.Wide_Wide_String_Encoding is - pragma Pure (Wide_Wide_String_Encoding); - - -- The encoding routines take a Wide_Wide_String as input and encode the - -- result using the specified UTF encoding method. The result includes a - -- BOM if the Output_BOM parameter is set to True. - - function Encode - (Item : Wide_Wide_String; - Output_Scheme : Encoding_Scheme; - Output_BOM : Boolean := False) return UTF_String; - -- Encode Wide_Wide_String using UTF-8, UTF-16LE or UTF-16BE encoding as - -- specified by the Output_Scheme parameter. - - function Encode - (Item : Wide_Wide_String; - Output_BOM : Boolean := False) return UTF_8_String; - -- Encode Wide_Wide_String using UTF-8 encoding - - function Encode - (Item : Wide_Wide_String; - Output_BOM : Boolean := False) return UTF_16_Wide_String; - -- Encode Wide_Wide_String using UTF_16 encoding - - -- The decoding routines take a UTF String as input, and return a decoded - -- Wide_String. If the UTF String starts with a BOM that matches the - -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error. - - function Decode - (Item : UTF_String; - Input_Scheme : Encoding_Scheme) return Wide_Wide_String; - -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the - -- Input_Scheme parameter. It is decoded and returned as a Wide_Wide_String - -- value. Note: a convenient form for Scheme may be Encoding (UTF_String). - - function Decode - (Item : UTF_8_String) return Wide_Wide_String; - -- The input is encoded in UTF-8 and returned as a Wide_Wide_String value - - function Decode - (Item : UTF_16_Wide_String) return Wide_Wide_String; - -- The input is encoded in UTF-16 and returned as a Wide_String value - -end Ada.Strings.UTF_Encoding.Wide_Wide_String_Encoding; diff --git a/gcc/ada/a-suezst.adb b/gcc/ada/a-suezst.adb new file mode 100755 index 0000000..0e5bba7 --- /dev/null +++ b/gcc/ada/a-suezst.adb @@ -0,0 +1,429 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRINGS -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +package body Ada.Strings.UTF_Encoding.Wide_Wide_Strings is + use Interfaces; + + ------------ + -- Decode -- + ------------ + + -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_Wide_String + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_Wide_String + is + begin + if Input_Scheme = UTF_8 then + return Decode (Item); + else + return Decode (To_UTF_16 (Item, Input_Scheme)); + end if; + end Decode; + + -- Decode UTF-8 input to Wide_Wide_String + + function Decode (Item : UTF_8_String) return Wide_Wide_String is + Result : Wide_Wide_String (1 .. Item'Length); + -- Result string (worst case is same length as input) + + Len : Natural := 0; + -- Length of result stored so far + + Iptr : Natural; + -- Input string pointer + + C : Unsigned_8; + R : Unsigned_32; + + procedure Get_Continuation; + -- Reads a continuation byte of the form 10xxxxxx, shifts R left + -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On + -- return Ptr is incremented. Raises exceptioon if continuation + -- byte does not exist or is invalid. + + ---------------------- + -- Get_Continuation -- + ---------------------- + + procedure Get_Continuation is + begin + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + else + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 2#10_000000# .. 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + else + R := Shift_Left (R, 6) or Unsigned_32 (C and 2#00_111111#); + end if; + end if; + end Get_Continuation; + + -- Start of processing for Decode + + begin + Iptr := Item'First; + + -- Skip BOM at start + + if Item'Length >= 3 + and then Item (Iptr .. Iptr + 2) = BOM_8 + then + Iptr := Iptr + 3; + + -- Error if bad BOM + + elsif Item'Length >= 2 + and then (Item (Iptr .. Iptr + 1) = BOM_16BE + or else + Item (Iptr .. Iptr + 1) = BOM_16LE) + then + Raise_Encoding_Error (Iptr); + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + R := Unsigned_32 (C); + + -- No initial code can be of the form 10xxxxxx. Such codes are used + -- only for continuations. + + elsif C <= 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 2#110_11111# then + R := Unsigned_32 (C and 2#000_11111#); + Get_Continuation; + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#1110_1111# then + R := Unsigned_32 (C and 2#0000_1111#); + Get_Continuation; + Get_Continuation; + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#11110_111# then + R := Unsigned_32 (C and 2#00000_111#); + Get_Continuation; + Get_Continuation; + Get_Continuation; + + -- Any other code is an error + + else + Raise_Encoding_Error (Iptr - 1); + end if; + + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (R); + end loop; + + return Result (1 .. Len); + end Decode; + + -- Decode UTF-16 input to Wide_Wide_String + + function Decode (Item : UTF_16_Wide_String) return Wide_Wide_String is + Result : Wide_Wide_String (1 .. Item'Length); + -- Result cannot be longer than the input string + + Len : Natural := 0; + -- Length of result + + Iptr : Natural; + -- Pointer to next element in Item + + C : Unsigned_16; + R : Unsigned_32; + + begin + -- Skip UTF-16 BOM at start + + Iptr := Item'First; + + if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then + Iptr := Iptr + 1; + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# + -- represent their own value. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (C); + + -- Codes in the range 16#D800#..16#DBFF# represent the first of the + -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". + -- The first surrogate provides 10 high order bits of the result. + + elsif C <= 16#DBFF# then + R := Shift_Left ((Unsigned_32 (C) - 16#D800#), 10); + + -- Error if at end of string + + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + -- Otherwise next character must be valid low order surrogate + -- which provides the low 10 order bits of the result. + + else + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 16#DC00# .. 16#DFFF# then + Raise_Encoding_Error (Iptr - 1); + + else + R := R or (Unsigned_32 (C) mod 2 ** 10); + + -- The final adjustment is to add 16#01_0000 to get the + -- result back in the required 21 bit range. + + R := R + 16#01_0000#; + Len := Len + 1; + Result (Len) := Wide_Wide_Character'Val (R); + end if; + end if; + + -- Remaining codes are invalid + + else + Raise_Encoding_Error (Iptr - 1); + end if; + end loop; + + return Result (1 .. Len); + end Decode; + + ------------ + -- Encode -- + ------------ + + -- Encode Wide_Wide_String in UTF-8, UTF-16BE or UTF-16LE + + function Encode + (Item : Wide_Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String + is + begin + if Output_Scheme = UTF_8 then + return Encode (Item, Output_BOM); + else + return From_UTF_16 (Encode (Item), Output_Scheme, Output_BOM); + end if; + end Encode; + + -- Encode Wide_Wide_String in UTF-8 + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_8_String + is + Result : String (1 .. 4 * Item'Length + 3); + -- Worst case is four bytes per input byte + space for BOM + + Len : Natural; + -- Number of output codes stored in Result + + C : Unsigned_32; + -- Single input character + + procedure Store (C : Unsigned_32); + pragma Inline (Store); + -- Store one output code (input is in range 0 .. 255) + + ----------- + -- Store -- + ----------- + + procedure Store (C : Unsigned_32) is + begin + Len := Len + 1; + Result (Len) := Character'Val (C); + end Store; + + -- Start of processing for Encode + + begin + -- Output BOM if required + + if Output_BOM then + Result (1 .. 3) := BOM_8; + Len := 3; + else + Len := 0; + end if; + + -- Loop through characters of input + + for Iptr in Item'Range loop + C := To_Unsigned_32 (Item (Iptr)); + + -- Codes in the range 16#00#..16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + Store (C); + + -- Codes in the range 16#80#..16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 16#7FF# then + Store (2#110_00000# or Shift_Right (C, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#800#..16#D7FF# or 16#E000#..16#FFFD# are + -- represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Store (2#1110_0000# or Shift_Right (C, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + elsif C in 16#1_0000# .. 16#10_FFFF# then + Store (2#11110_000# or + Shift_Right (C, 18)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000_000000#, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or + (C and 2#00_111111#)); + + -- All other codes are invalid + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + + -- Encode Wide_Wide_String in UTF-16 + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String + is + Result : UTF_16_Wide_String (1 .. 2 * Item'Length + 1); + -- Worst case is each input character generates two output characters + -- plus one for possible BOM. + + Len : Integer; + -- Length of output string + + C : Unsigned_32; + + begin + -- Output BOM if needed + + if Output_BOM then + Result (1) := BOM_16 (1); + Len := 1; + else + Len := 0; + end if; + + -- Loop through input characters encoding them + + for Iptr in Item'Range loop + C := To_Unsigned_32 (Item (Iptr)); + + -- Codes in the range 16#00_0000#..16#00_D7FF# or 16#E000#..16#FFFD# + -- are output unchanged + + if C <= 16#00_D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in the range 16#01_0000#..16#10_FFFF# are output using two + -- surrogate characters. First 16#1_0000# is subtracted from the code + -- point to give a 20-bit value. This is then split into two separate + -- 10-bit values each of which is represented as a surrogate with the + -- most significant half placed in the first surrogate. The ranges of + -- values used for the two surrogates are 16#D800#-16#DBFF# for the + -- first, most significant surrogate and 16#DC00#-16#DFFF# for the + -- second, least significant surrogate. + + elsif C in 16#1_0000# .. 16#10_FFFF# then + C := C - 16#1_0000#; + + Len := Len + 1; + Result (Len) := Wide_Character'Val (16#D800# + C / 2 ** 10); + + Len := Len + 1; + Result (Len) := Wide_Character'Val (16#DC00# + C mod 2 ** 10); + + -- All other codes are invalid + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + +end Ada.Strings.UTF_Encoding.Wide_Wide_Strings; diff --git a/gcc/ada/a-suezst.ads b/gcc/ada/a-suezst.ads new file mode 100755 index 0000000..86d344d --- /dev/null +++ b/gcc/ada/a-suezst.ads @@ -0,0 +1,64 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_WIDE_STRINGS -- +-- -- +-- S p e c -- +-- -- +-- This specification is derived from the Ada Reference Manual for use with -- +-- GNAT. In accordance with the copyright of that document, you can freely -- +-- copy and modify this specification, provided that if you redistribute a -- +-- modified version, any changes that you have made are clearly indicated. -- +-- -- +------------------------------------------------------------------------------ + +-- This is an Ada 2012 package defined in AI05-0137-1. It is used for encoding +-- and decoding Wide_String values using UTF encodings. Note: this package is +-- consistent with Ada 2005, and may be used in Ada 2005 mode, but cannot be +-- used in Ada 95 mode, since Wide_Wide_Character is an Ada 2005 feature. + +package Ada.Strings.UTF_Encoding.Wide_Wide_Strings is + pragma Pure (Wide_Wide_Strings); + + -- The encoding routines take a Wide_Wide_String as input and encode the + -- result using the specified UTF encoding method. The result includes a + -- BOM if the Output_BOM parameter is set to True. + + function Encode + (Item : Wide_Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String; + -- Encode Wide_Wide_String using UTF-8, UTF-16LE or UTF-16BE encoding as + -- specified by the Output_Scheme parameter. + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_8_String; + -- Encode Wide_Wide_String using UTF-8 encoding + + function Encode + (Item : Wide_Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String; + -- Encode Wide_Wide_String using UTF_16 encoding + + -- The decoding routines take a UTF String as input, and return a decoded + -- Wide_String. If the UTF String starts with a BOM that matches the + -- encoding method, it is ignored. An incorrect BOM raises Encoding_Error. + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_Wide_String; + -- The input is encoded in UTF_8, UTF_16LE or UTF_16BE as specified by the + -- Input_Scheme parameter. It is decoded and returned as a Wide_Wide_String + -- value. Note: a convenient form for Scheme may be Encoding (UTF_String). + + function Decode + (Item : UTF_8_String) return Wide_Wide_String; + -- The input is encoded in UTF-8 and returned as a Wide_Wide_String value + + function Decode + (Item : UTF_16_Wide_String) return Wide_Wide_String; + -- The input is encoded in UTF-16 and returned as a Wide_String value + +end Ada.Strings.UTF_Encoding.Wide_Wide_Strings; diff --git a/gcc/ada/a-textio.adb b/gcc/ada/a-textio.adb index f8538ab..721deca 100644 --- a/gcc/ada/a-textio.adb +++ b/gcc/ada/a-textio.adb @@ -684,10 +684,18 @@ package body Ada.Text_IO is Get_Immediate (Current_In, Item, Available); end Get_Immediate; + -------------- + -- Get_Line -- + -------------- + procedure Get_Line (File : File_Type; Item : out String; Last : out Natural) is separate; + -- The implementation of Ada.Text_IO.Get_Line is split into a subunit so + -- that different implementations can be used on different systems. In + -- particular the standard implementation uses low level stuff that is + -- not appropriate for the JVM and .NET implementations. procedure Get_Line (Item : out String; diff --git a/gcc/ada/exp_ch5.adb b/gcc/ada/exp_ch5.adb index f53ac1f..a28c5ab 100644 --- a/gcc/ada/exp_ch5.adb +++ b/gcc/ada/exp_ch5.adb @@ -3151,8 +3151,11 @@ package body Exp_Ch5 is else -- We're about to drop Return_Object_Declarations on the floor, so -- we need to insert it, in case it got expanded into useful code. + -- Remove side effects from expression, which may be duplicated in + -- subsequent checks (see Expand_Simple_Function_Return). Insert_List_Before (N, Return_Object_Declarations (N)); + Remove_Side_Effects (Exp); -- Build simple_return_statement that returns the expression directly @@ -4248,29 +4251,35 @@ package body Exp_Ch5 is end; -- AI05-0073: If function has a controlling access result, check that - -- the tag of the return value matches the designated type. + -- the tag of the return value, if it is not null, matches designated + -- type of return type. -- The "or else True" needs commenting here ??? elsif Ekind (R_Type) = E_Anonymous_Access_Type and then Has_Controlling_Result (Scope_Id) - and then (Ada_Version >= Ada_12 or else True) then - Insert_Action (Exp, + Insert_Action (N, Make_Raise_Constraint_Error (Loc, Condition => - Make_Op_Ne (Loc, - Left_Opnd => - Make_Selected_Component (Loc, - Prefix => Duplicate_Subexpr (Exp), - Selector_Name => - Make_Identifier (Loc, Chars => Name_uTag)), - Right_Opnd => - Make_Attribute_Reference (Loc, - Prefix => - New_Occurrence_Of (Designated_Type (R_Type), Loc), - Attribute_Name => Name_Tag)), - Reason => CE_Tag_Check_Failed)); + Make_And_Then (Loc, + Left_Opnd => + Make_Op_Ne (Loc, + Left_Opnd => Exp, + Right_Opnd => Make_Null (Loc)), + Right_Opnd => Make_Op_Ne (Loc, + Left_Opnd => + Make_Selected_Component (Loc, + Prefix => Duplicate_Subexpr (Exp), + Selector_Name => + Make_Identifier (Loc, Chars => Name_uTag)), + Right_Opnd => + Make_Attribute_Reference (Loc, + Prefix => + New_Occurrence_Of (Designated_Type (R_Type), Loc), + Attribute_Name => Name_Tag))), + Reason => CE_Tag_Check_Failed), + Suppress => All_Checks); end if; -- If we are returning an object that may not be bit-aligned, then copy diff --git a/gcc/ada/impunit.adb b/gcc/ada/impunit.adb index 6b427f0..098622e 100644 --- a/gcc/ada/impunit.adb +++ b/gcc/ada/impunit.adb @@ -180,8 +180,8 @@ package body Impunit is "a-wichha", -- Ada.Wide_Characters.Handling "a-stuten", -- Ada.Strings.UTF_Encoding "a-suenco", -- Ada.Strings.UTF_Encoding.Conversions - "a-suesen", -- Ada.Strings.UTF_Encoding.String_Encoding - "a-suewse", -- Ada.Strings.UTF_Encoding.Wide_String_Encoding + "a-suenst", -- Ada.Strings.UTF_Encoding.Strings + "a-suewst", -- Ada.Strings.UTF_Encoding.Wide_Strings --------------------------- -- GNAT Special IO Units -- @@ -474,7 +474,7 @@ package body Impunit is -- Note: strictly the following should be Ada 2012 units, but it seems -- harmless (and useful) to make then available in Ada 2005 mode. - "a-suezse", -- Ada.Strings.UTF_Encoding.Wide_Wide_String_Encoding + "a-suezst", -- Ada.Strings.UTF_Encoding.Wide_Wide_Strings --------------------------- -- GNAT Special IO Units -- diff --git a/gcc/ada/rtsfind.ads b/gcc/ada/rtsfind.ads index 33621f5..ca61bd1 100644 --- a/gcc/ada/rtsfind.ads +++ b/gcc/ada/rtsfind.ads @@ -265,7 +265,6 @@ package Rtsfind is System_Machine_Code, System_Mantissa, System_Memcop, - System_Multiprocessors, System_Pack_03, System_Pack_05, System_Pack_06, diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb index 6957ed2..f5dcd5c 100644 --- a/gcc/ada/sem_ch6.adb +++ b/gcc/ada/sem_ch6.adb @@ -495,8 +495,16 @@ package body Sem_Ch6 is -- In GNAT mode, this is just a warning, to allow it to be -- evilly turned off. Otherwise it is a real error. + -- In a generic context, simplify the warning because it makes + -- no sense to discuss pass-by-reference or copy. + elsif Warn_On_Ada_2005_Compatibility or GNAT_Mode then - if Is_Immutably_Limited_Type (R_Type) then + if Inside_A_Generic then + Error_Msg_N + ("return of limited object not permitted in Ada2005 " & + "(RM-2005 6.5(5.5/2))?", Expr); + + elsif Is_Immutably_Limited_Type (R_Type) then Error_Msg_N ("return by reference not permitted in Ada 2005 " & "(RM-2005 6.5(5.5/2))?", Expr); @@ -512,9 +520,11 @@ package body Sem_Ch6 is return; -- skip continuation messages below end if; - Error_Msg_N - ("\consider switching to return of access type", Expr); - Explain_Limited_Type (R_Type, Expr); + if not Inside_A_Generic then + Error_Msg_N + ("\consider switching to return of access type", Expr); + Explain_Limited_Type (R_Type, Expr); + end if; end if; end Check_Limited_Return; @@ -764,16 +774,25 @@ package body Sem_Ch6 is and then Object_Access_Level (Expr) > Subprogram_Access_Level (Scope_Id) then - Rewrite (N, - Make_Raise_Program_Error (Loc, - Reason => PE_Accessibility_Check_Failed)); - Analyze (N); - Error_Msg_N - ("cannot return a local value by reference?", N); - Error_Msg_NE - ("\& will be raised at run time?", - N, Standard_Program_Error); + -- Suppress the message in a generic, where the rewriting + -- is irrelevant. + + if Inside_A_Generic then + null; + + else + Rewrite (N, + Make_Raise_Program_Error (Loc, + Reason => PE_Accessibility_Check_Failed)); + Analyze (N); + + Error_Msg_N + ("cannot return a local value by reference?", N); + Error_Msg_NE + ("\& will be raised at run time?", + N, Standard_Program_Error); + end if; end if; if Known_Null (Expr) @@ -4255,9 +4274,11 @@ package body Sem_Ch6 is declare Typ : constant Entity_Id := Etype (Designator); Utyp : constant Entity_Id := Underlying_Type (Typ); + begin if Is_Immutably_Limited_Type (Typ) then Set_Returns_By_Ref (Designator); + elsif Present (Utyp) and then CW_Or_Has_Controlled_Part (Utyp) then Set_Returns_By_Ref (Designator); end if; -- cgit v1.1