diff options
author | Robert Dewar <dewar@adacore.com> | 2008-03-26 08:43:18 +0100 |
---|---|---|
committer | Arnaud Charlet <charlet@gcc.gnu.org> | 2008-03-26 08:43:18 +0100 |
commit | 14063a127cde2742fc889284db0185b144a7c873 (patch) | |
tree | 75dbd24d57fb4aa9b026b8565de51a5e073092d2 /gcc | |
parent | 2a6b365a3049bc42f9e94060c32d4b639244cf3f (diff) | |
download | gcc-14063a127cde2742fc889284db0185b144a7c873.zip gcc-14063a127cde2742fc889284db0185b144a7c873.tar.gz gcc-14063a127cde2742fc889284db0185b144a7c873.tar.bz2 |
g-byorma.adb (Read_BOM): Reorder tests so that UTF_32 is recognized
2008-03-26 Robert Dewar <dewar@adacore.com>
* g-byorma.adb (Read_BOM): Reorder tests so that UTF_32 is recognized
From-SVN: r133584
Diffstat (limited to 'gcc')
-rwxr-xr-x | gcc/ada/g-byorma.adb | 39 |
1 files changed, 21 insertions, 18 deletions
diff --git a/gcc/ada/g-byorma.adb b/gcc/ada/g-byorma.adb index 9cc6f08..6bbaedf 100755 --- a/gcc/ada/g-byorma.adb +++ b/gcc/ada/g-byorma.adb @@ -44,27 +44,13 @@ package body GNAT.Byte_Order_Mark is XML_Support : Boolean := False) is begin - -- UTF-16 (big-endian) - - if Str'Length >= 2 - and then Str (Str'First) = Character'Val (16#FE#) - and then Str (Str'First + 1) = Character'Val (16#FF#) - then - Len := 2; - BOM := UTF16_BE; - - -- UTF-16 (little-endian) - - elsif Str'Length >= 2 - and then Str (Str'First) = Character'Val (16#FF#) - and then Str (Str'First + 1) = Character'Val (16#FE#) - then - Len := 2; - BOM := UTF16_LE; + -- Note: the order of these tests is important, because in some cases + -- one sequence is a prefix of a longer sequence, and we must test for + -- the longer sequence first -- UTF-32 (big-endian) - elsif Str'Length >= 4 + if Str'Length >= 4 and then Str (Str'First) = Character'Val (16#00#) and then Str (Str'First + 1) = Character'Val (16#00#) and then Str (Str'First + 2) = Character'Val (16#FE#) @@ -84,6 +70,23 @@ package body GNAT.Byte_Order_Mark is Len := 4; BOM := UTF32_LE; + -- UTF-16 (big-endian) + + elsif Str'Length >= 2 + and then Str (Str'First) = Character'Val (16#FE#) + and then Str (Str'First + 1) = Character'Val (16#FF#) + then + Len := 2; + BOM := UTF16_BE; + + -- UTF-16 (little-endian) + + elsif Str'Length >= 2 + and then Str (Str'First) = Character'Val (16#FF#) + and then Str (Str'First + 1) = Character'Val (16#FE#) + then + Len := 2; + BOM := UTF16_LE; -- UTF-8 (endian-independent) elsif Str'Length >= 3 |