------------------------------------------------------------------------------ -- -- -- GNAT RUN-TIME COMPONENTS -- -- -- -- A D A . C H A R A C T E R S . H A N D L I N G -- -- -- -- B o d y -- -- -- -- Copyright (C) 1992-2024, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- -- ware Foundation; either version 3, or (at your option) any later ver- -- -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- -- or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- As a special exception under Section 7 of GPL version 3, you are granted -- -- additional permissions described in the GCC Runtime Library Exception, -- -- version 3.1, as published by the Free Software Foundation. -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- -- GNAT was originally developed by the GNAT team at New York University. -- -- Extensive contributions were provided by Ada Core Technologies Inc. -- -- -- ------------------------------------------------------------------------------ -- Loop invariants in this unit are meant for analysis only, not for run-time -- checking, as it would be too costly otherwise. This is enforced by setting -- the assertion policy to Ignore. pragma Assertion_Policy (Loop_Invariant => Ignore); with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; with Ada.Strings.Maps; use Ada.Strings.Maps; with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; package body Ada.Characters.Handling with SPARK_Mode is ------------------------------------ -- Character Classification Table -- ------------------------------------ type Character_Flags is mod 256; for Character_Flags'Size use 8; Control : constant Character_Flags := 1; Lower : constant Character_Flags := 2; Upper : constant Character_Flags := 4; Basic : constant Character_Flags := 8; Hex_Digit : constant Character_Flags := 16; Digit : constant Character_Flags := 32; Special : constant Character_Flags := 64; Line_Term : constant Character_Flags := 128; Letter : constant Character_Flags := Lower or Upper; Alphanum : constant Character_Flags := Letter or Digit; Graphic : constant Character_Flags := Alphanum or Special; Char_Map : constant array (Character) of Character_Flags := [ NUL => Control, SOH => Control, STX => Control, ETX => Control, EOT => Control, ENQ => Control, ACK => Control, BEL => Control, BS => Control, HT => Control, LF => Control + Line_Term, VT => Control + Line_Term, FF => Control + Line_Term, CR => Control + Line_Term, SO => Control, SI => Control, DLE => Control, DC1 => Control, DC2 => Control, DC3 => Control, DC4 => Control, NAK => Control, SYN => Control, ETB => Control, CAN => Control, EM => Control, SUB => Control, ESC => Control, FS => Control, GS => Control, RS => Control, US => Control, Space => Special, Exclamation => Special, Quotation => Special, Number_Sign => Special, Dollar_Sign => Special, Percent_Sign => Special, Ampersand => Special, Apostrophe => Special, Left_Parenthesis => Special, Right_Parenthesis => Special, Asterisk => Special, Plus_Sign => Special, Comma => Special, Hyphen => Special, Full_Stop => Special, Solidus => Special, '0' .. '9' => Digit + Hex_Digit, Colon => Special, Semicolon => Special, Less_Than_Sign => Special, Equals_Sign => Special, Greater_Than_Sign => Special, Question => Special, Commercial_At => Special, 'A' .. 'F' => Upper + Basic + Hex_Digit, 'G' .. 'Z' => Upper + Basic, Left_Square_Bracket => Special, Reverse_Solidus => Special, Right_Square_Bracket => Special, Circumflex => Special, Low_Line => Special, Grave => Special, 'a' .. 'f' => Lower + Basic + Hex_Digit, 'g' .. 'z' => Lower + Basic, Left_Curly_Bracket => Special, Vertical_Line => Special, Right_Curly_Bracket => Special, Tilde => Special, DEL => Control, Reserved_128 => Control, Reserved_129 => Control, BPH => Control, NBH => Control, Reserved_132 => Control, NEL => Control + Line_Term, SSA => Control, ESA => Control, HTS => Control, HTJ => Control, VTS => Control, PLD => Control, PLU => Control, RI => Control, SS2 => Control, SS3 => Control, DCS => Control, PU1 => Control, PU2 => Control, STS => Control, CCH => Control, MW => Control, SPA => Control, EPA => Control, SOS => Control, Reserved_153 => Control, SCI => Control, CSI => Control, ST => Control, OSC => Control, PM => Control, APC => Control, No_Break_Space => Special, Inverted_Exclamation => Special, Cent_Sign => Special, Pound_Sign => Special, Currency_Sign => Special, Yen_Sign => Special, Broken_Bar => Special, Section_Sign => Special, Diaeresis => Special, Copyright_Sign => Special, Feminine_Ordinal_Indicator => Special, Left_Angle_Quotation => Special, Not_Sign => Special, Soft_Hyphen => Special, Registered_Trade_Mark_Sign => Special, Macron => Special, Degree_Sign => Special, Plus_Minus_Sign => Special, Superscript_Two => Special, Superscript_Three => Special, Acute => Special, Micro_Sign => Special, Pilcrow_Sign => Special, Middle_Dot => Special, Cedilla => Special, Superscript_One => Special, Masculine_Ordinal_Indicator => Special, Right_Angle_Quotation => Special, Fraction_One_Quarter => Special, Fraction_One_Half => Special, Fraction_Three_Quarters => Special, Inverted_Question => Special, UC_A_Grave => Upper, UC_A_Acute => Upper, UC_A_Circumflex => Upper, UC_A_Tilde => Upper, UC_A_Diaeresis => Upper, UC_A_Ring => Upper, UC_AE_Diphthong => Upper + Basic, UC_C_Cedilla => Upper, UC_E_Grave => Upper, UC_E_Acute => Upper, UC_E_Circumflex => Upper, UC_E_Diaeresis => Upper, UC_I_Grave => Upper, UC_I_Acute => Upper, UC_I_Circumflex => Upper, UC_I_Diaeresis => Upper, UC_Icelandic_Eth => Upper + Basic, UC_N_Tilde => Upper, UC_O_Grave => Upper, UC_O_Acute => Upper, UC_O_Circumflex => Upper, UC_O_Tilde => Upper, UC_O_Diaeresis => Upper, Multiplication_Sign => Special, UC_O_Oblique_Stroke => Upper, UC_U_Grave => Upper, UC_U_Acute => Upper, UC_U_Circumflex => Upper, UC_U_Diaeresis => Upper, UC_Y_Acute => Upper, UC_Icelandic_Thorn => Upper + Basic, LC_German_Sharp_S => Lower + Basic, LC_A_Grave => Lower, LC_A_Acute => Lower, LC_A_Circumflex => Lower, LC_A_Tilde => Lower, LC_A_Diaeresis => Lower, LC_A_Ring => Lower, LC_AE_Diphthong => Lower + Basic, LC_C_Cedilla => Lower, LC_E_Grave => Lower, LC_E_Acute => Lower, LC_E_Circumflex => Lower, LC_E_Diaeresis => Lower, LC_I_Grave => Lower, LC_I_Acute => Lower, LC_I_Circumflex => Lower, LC_I_Diaeresis => Lower, LC_Icelandic_Eth => Lower + Basic, LC_N_Tilde => Lower, LC_O_Grave => Lower, LC_O_Acute => Lower, LC_O_Circumflex => Lower, LC_O_Tilde => Lower, LC_O_Diaeresis => Lower, Division_Sign => Special, LC_O_Oblique_Stroke => Lower, LC_U_Grave => Lower, LC_U_Acute => Lower, LC_U_Circumflex => Lower, LC_U_Diaeresis => Lower, LC_Y_Acute => Lower, LC_Icelandic_Thorn => Lower + Basic, LC_Y_Diaeresis => Lower ]; --------------------- -- Is_Alphanumeric -- --------------------- function Is_Alphanumeric (Item : Character) return Boolean is begin return (Char_Map (Item) and Alphanum) /= 0; end Is_Alphanumeric; -------------- -- Is_Basic -- -------------- function Is_Basic (Item : Character) return Boolean is begin return (Char_Map (Item) and Basic) /= 0; end Is_Basic; ------------------ -- Is_Character -- ------------------ function Is_Character (Item : Wide_Character) return Boolean is (Wide_Character'Pos (Item) < 256); ---------------- -- Is_Control -- ---------------- function Is_Control (Item : Character) return Boolean is begin return (Char_Map (Item) and Control) /= 0; end Is_Control; -------------- -- Is_Digit -- -------------- function Is_Digit (Item : Character) return Boolean is begin return Item in '0' .. '9'; end Is_Digit; ---------------- -- Is_Graphic -- ---------------- function Is_Graphic (Item : Character) return Boolean is begin return (Char_Map (Item) and Graphic) /= 0; end Is_Graphic; -------------------------- -- Is_Hexadecimal_Digit -- -------------------------- function Is_Hexadecimal_Digit (Item : Character) return Boolean is begin return (Char_Map (Item) and Hex_Digit) /= 0; end Is_Hexadecimal_Digit; ---------------- -- Is_ISO_646 -- ---------------- function Is_ISO_646 (Item : Character) return Boolean is (Item in ISO_646); -- Note: much more efficient coding of the following function is possible -- by testing several 16#80# bits in a complete word in a single operation function Is_ISO_646 (Item : String) return Boolean is begin for J in Item'Range loop if Item (J) not in ISO_646 then return False; end if; pragma Loop_Invariant (for all K in Item'First .. J => Is_ISO_646 (Item (K))); end loop; return True; end Is_ISO_646; --------------- -- Is_Letter -- --------------- function Is_Letter (Item : Character) return Boolean is begin return (Char_Map (Item) and Letter) /= 0; end Is_Letter; ------------------------ -- Is_Line_Terminator -- ------------------------ function Is_Line_Terminator (Item : Character) return Boolean is begin return (Char_Map (Item) and Line_Term) /= 0; end Is_Line_Terminator; -------------- -- Is_Lower -- -------------- function Is_Lower (Item : Character) return Boolean is begin return (Char_Map (Item) and Lower) /= 0; end Is_Lower; ------------- -- Is_Mark -- ------------- function Is_Mark (Item : Character) return Boolean is pragma Unreferenced (Item); begin return False; end Is_Mark; ------------- -- Is_NFKC -- ------------- function Is_NFKC (Item : Character) return Boolean is begin return Character'Pos (Item) not in 160 | 168 | 170 | 175 | 178 | 179 | 180 | 181 | 184 | 185 | 186 | 188 | 189 | 190; end Is_NFKC; --------------------- -- Is_Other_Format -- --------------------- function Is_Other_Format (Item : Character) return Boolean is begin return Item = Soft_Hyphen; end Is_Other_Format; ------------------------------ -- Is_Punctuation_Connector -- ------------------------------ function Is_Punctuation_Connector (Item : Character) return Boolean is begin return Item = '_'; end Is_Punctuation_Connector; -------------- -- Is_Space -- -------------- function Is_Space (Item : Character) return Boolean is begin return Item = ' ' or else Item = No_Break_Space; end Is_Space; ---------------- -- Is_Special -- ---------------- function Is_Special (Item : Character) return Boolean is begin return (Char_Map (Item) and Special) /= 0; end Is_Special; --------------- -- Is_String -- --------------- function Is_String (Item : Wide_String) return Boolean is begin for J in Item'Range loop if Wide_Character'Pos (Item (J)) >= 256 then return False; end if; pragma Loop_Invariant (for all K in Item'First .. J => Is_Character (Item (K))); end loop; return True; end Is_String; -------------- -- Is_Upper -- -------------- function Is_Upper (Item : Character) return Boolean is begin return (Char_Map (Item) and Upper) /= 0; end Is_Upper; -------------- -- To_Basic -- -------------- function To_Basic (Item : Character) return Character is (Value (Basic_Map, Item)); function To_Basic (Item : String) return String is begin return Result : String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); pragma Loop_Invariant (Result (1 .. J - Item'First + 1)'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_Basic (Item (K))); end loop; end return; end To_Basic; ------------------ -- To_Character -- ------------------ function To_Character (Item : Wide_Character; Substitute : Character := ' ') return Character is begin if Is_Character (Item) then return Character'Val (Wide_Character'Pos (Item)); else return Substitute; end if; end To_Character; ---------------- -- To_ISO_646 -- ---------------- function To_ISO_646 (Item : Character; Substitute : ISO_646 := ' ') return ISO_646 is (if Item in ISO_646 then Item else Substitute); function To_ISO_646 (Item : String; Substitute : ISO_646 := ' ') return String is begin return Result : String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := (if Item (J) in ISO_646 then Item (J) else Substitute); pragma Loop_Invariant (Result (1 .. J - Item'First + 1)'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_ISO_646 (Item (K), Substitute)); end loop; end return; end To_ISO_646; -------------- -- To_Lower -- -------------- function To_Lower (Item : Character) return Character is (Value (Lower_Case_Map, Item)); function To_Lower (Item : String) return String is begin return Result : String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); pragma Loop_Invariant (Result (1 .. J - Item'First + 1)'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_Lower (Item (K))); end loop; end return; end To_Lower; --------------- -- To_String -- --------------- function To_String (Item : Wide_String; Substitute : Character := ' ') return String is begin return Result : String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); pragma Loop_Invariant (Result (1 .. J - (Item'First - 1))'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_Character (Item (K), Substitute)); end loop; end return; end To_String; -------------- -- To_Upper -- -------------- function To_Upper (Item : Character) return Character is (Value (Upper_Case_Map, Item)); function To_Upper (Item : String) return String is begin return Result : String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); pragma Loop_Invariant (Result (1 .. J - Item'First + 1)'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_Upper (Item (K))); end loop; end return; end To_Upper; ----------------------- -- To_Wide_Character -- ----------------------- function To_Wide_Character (Item : Character) return Wide_Character is begin return Wide_Character'Val (Character'Pos (Item)); end To_Wide_Character; -------------------- -- To_Wide_String -- -------------------- function To_Wide_String (Item : String) return Wide_String is begin return Result : Wide_String (1 .. Item'Length) with Relaxed_Initialization do for J in Item'Range loop Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); pragma Loop_Invariant (Result (1 .. J - (Item'First - 1))'Initialized); pragma Loop_Invariant (for all K in Item'First .. J => Result (K - (Item'First - 1)) = To_Wide_Character (Item (K))); end loop; end return; end To_Wide_String; end Ada.Characters.Handling;