aboutsummaryrefslogtreecommitdiff
path: root/winsup/cygwin
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2009-11-02 11:42:04 +0000
committerCorinna Vinschen <corinna@vinschen.de>2009-11-02 11:42:04 +0000
commita657970571be50681055aa60289e35f312dea761 (patch)
tree0a419098d960bb4a80124595a990028e4c544f25 /winsup/cygwin
parent9725900d860b5d86a316faf66d357a3c9a5d7f83 (diff)
downloadnewlib-a657970571be50681055aa60289e35f312dea761.zip
newlib-a657970571be50681055aa60289e35f312dea761.tar.gz
newlib-a657970571be50681055aa60289e35f312dea761.tar.bz2
* miscfuncs.h (transform_chars): Declare. Define inline variation here.
* mount.cc (mount_info::from_fstab): Remove extern declaration of transform_chars. * path.cc (tfx_chars): Move to strfuncs.cc. (transform_chars): Ditto. * strfunc.cc (tfx_chars): Moved here from path.cc. (transform_chars): Ditto. (sys_cp_wcstombs): Make UNICODE private use area conversion roundtrip save for all characters. (sys_cp_mbstowcs): Ditto, by removing special case for UTF-8 sequences representing U+f0XX UNICODE chars. Fix typo in comment.
Diffstat (limited to 'winsup/cygwin')
-rw-r--r--winsup/cygwin/ChangeLog16
-rw-r--r--winsup/cygwin/miscfuncs.h8
-rw-r--r--winsup/cygwin/mount.cc1
-rw-r--r--winsup/cygwin/path.cc57
-rw-r--r--winsup/cygwin/strfuncs.cc69
5 files changed, 83 insertions, 68 deletions
diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog
index b5b93ac..99fcb9d 100644
--- a/winsup/cygwin/ChangeLog
+++ b/winsup/cygwin/ChangeLog
@@ -1,5 +1,19 @@
2009-11-02 Corinna Vinschen <corinna@vinschen.de>
+ * miscfuncs.h (transform_chars): Declare. Define inline variation here.
+ * mount.cc (mount_info::from_fstab): Remove extern declaration of
+ transform_chars.
+ * path.cc (tfx_chars): Move to strfuncs.cc.
+ (transform_chars): Ditto.
+ * strfunc.cc (tfx_chars): Moved here from path.cc.
+ (transform_chars): Ditto.
+ (sys_cp_wcstombs): Make UNICODE private use area conversion roundtrip
+ save for all characters.
+ (sys_cp_mbstowcs): Ditto, by removing special case for UTF-8 sequences
+ representing U+f0XX UNICODE chars. Fix typo in comment.
+
+2009-11-02 Corinna Vinschen <corinna@vinschen.de>
+
* path.cc (tfx_chars): Constify.
2009-10-31 Corinna Vinschen <corinna@vinschen.de>
@@ -362,7 +376,7 @@
(fhandler_console::write_normal): Always use codepage 437 for alternate
charset. Otherwise always default to the current internal locale.
Replace ASCII SO with ASCII CAN.
- * strfuncs.cc: Tweka comments according to below changes.
+ * strfuncs.cc: Tweak comments according to below changes.
(sys_cp_wcstombs): Constify charset parameter. Convert all wchar_t
values in the Unicode private use area U+F0xx to the singlebyte
counterpart. Drop special handling creating ASCII SO sequence from
diff --git a/winsup/cygwin/miscfuncs.h b/winsup/cygwin/miscfuncs.h
index 4755d72..489f8b7 100644
--- a/winsup/cygwin/miscfuncs.h
+++ b/winsup/cygwin/miscfuncs.h
@@ -25,6 +25,14 @@ void backslashify (const char *, char *, bool);
void slashify (const char *, char *, bool);
#define isslash(c) ((c) == '/')
+extern void transform_chars (PWCHAR, PWCHAR);
+inline void
+transform_chars (PUNICODE_STRING upath, USHORT start_idx)
+{
+ transform_chars (upath->Buffer + start_idx,
+ upath->Buffer + upath->Length / sizeof (WCHAR) - 1);
+}
+
/* Memory checking */
int __stdcall check_invalid_virtual_addr (const void *s, unsigned sz) __attribute__ ((regparm(2)));
diff --git a/winsup/cygwin/mount.cc b/winsup/cygwin/mount.cc
index 8c9b72a..b99a9b8 100644
--- a/winsup/cygwin/mount.cc
+++ b/winsup/cygwin/mount.cc
@@ -997,7 +997,6 @@ mount_info::from_fstab (bool user, WCHAR fstab[], PWCHAR fstab_end)
if (user)
{
- extern void transform_chars (PWCHAR, PWCHAR);
PWCHAR username;
sys_mbstowcs (username = wcpcpy (fstab_end, L".d\\"),
NT_MAX_PATH - (fstab_end - fstab),
diff --git a/winsup/cygwin/path.cc b/winsup/cygwin/path.cc
index fdc42d3..1f95073 100644
--- a/winsup/cygwin/path.cc
+++ b/winsup/cygwin/path.cc
@@ -395,63 +395,6 @@ path_conv::set_normalized_path (const char *path_copy)
}
}
-/* Transform characters invalid for Windows filenames to the Unicode private
- use area in the U+f0XX range. The affected characters are all control
- chars 1 <= c <= 31, as well as the characters " * : < > ? |. The backslash
- is affected as well, but we can't transform it as long as we accept Win32
- paths as input.
- The reverse functionality is in strfuncs.cc, function sys_cp_wcstombs. */
-static const WCHAR tfx_chars[] = {
- 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3,
- 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7,
- 0xf000 | 8, 0xf000 | 9, 0xf000 | 10, 0xf000 | 11,
- 0xf000 | 12, 0xf000 | 13, 0xf000 | 14, 0xf000 | 15,
- 0xf000 | 16, 0xf000 | 17, 0xf000 | 18, 0xf000 | 19,
- 0xf000 | 20, 0xf000 | 21, 0xf000 | 22, 0xf000 | 23,
- 0xf000 | 24, 0xf000 | 25, 0xf000 | 26, 0xf000 | 27,
- 0xf000 | 28, 0xf000 | 29, 0xf000 | 30, 0xf000 | 31,
- ' ', '!', 0xf000 | '"', '#',
- '$', '%', '&', 39,
- '(', ')', 0xf000 | '*', '+',
- ',', '-', '.', '\\',
- '0', '1', '2', '3',
- '4', '5', '6', '7',
- '8', '9', 0xf000 | ':', ';',
- 0xf000 | '<', '=', 0xf000 | '>', 0xf000 | '?',
- '@', 'A', 'B', 'C',
- 'D', 'E', 'F', 'G',
- 'H', 'I', 'J', 'K',
- 'L', 'M', 'N', 'O',
- 'P', 'Q', 'R', 'S',
- 'T', 'U', 'V', 'W',
- 'X', 'Y', 'Z', '[',
- '\\', ']', '^', '_',
- '`', 'a', 'b', 'c',
- 'd', 'e', 'f', 'g',
- 'h', 'i', 'j', 'k',
- 'l', 'm', 'n', 'o',
- 'p', 'q', 'r', 's',
- 't', 'u', 'v', 'w',
- 'x', 'y', 'z', '{',
- 0xf000 | '|', '}', '~', 127
-};
-
-void
-transform_chars (PWCHAR path, PWCHAR path_end)
-{
- for (; path <= path_end; ++path)
- if (*path < 128)
- *path = tfx_chars[*path];
-}
-
-static inline
-void
-transform_chars (PUNICODE_STRING upath, USHORT start_idx)
-{
- transform_chars (upath->Buffer + start_idx,
- upath->Buffer + upath->Length / sizeof (WCHAR) - 1);
-}
-
static inline void
str2uni_cat (UNICODE_STRING &tgt, const char *srcstr)
{
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 009af17..61df650 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -22,6 +22,55 @@ details. */
#include "cygheap.h"
#include "tls_pbuf.h"
+/* Transform characters invalid for Windows filenames to the Unicode private
+ use area in the U+f0XX range. The affected characters are all control
+ chars 1 <= c <= 31, as well as the characters " * : < > ? |. The backslash
+ is affected as well, but we can't transform it as long as we accept Win32
+ paths as input.
+ The reverse functionality is in function sys_cp_wcstombs. */
+static const WCHAR tfx_chars[] = {
+ 0, 0xf000 | 1, 0xf000 | 2, 0xf000 | 3,
+ 0xf000 | 4, 0xf000 | 5, 0xf000 | 6, 0xf000 | 7,
+ 0xf000 | 8, 0xf000 | 9, 0xf000 | 10, 0xf000 | 11,
+ 0xf000 | 12, 0xf000 | 13, 0xf000 | 14, 0xf000 | 15,
+ 0xf000 | 16, 0xf000 | 17, 0xf000 | 18, 0xf000 | 19,
+ 0xf000 | 20, 0xf000 | 21, 0xf000 | 22, 0xf000 | 23,
+ 0xf000 | 24, 0xf000 | 25, 0xf000 | 26, 0xf000 | 27,
+ 0xf000 | 28, 0xf000 | 29, 0xf000 | 30, 0xf000 | 31,
+ ' ', '!', 0xf000 | '"', '#',
+ '$', '%', '&', 39,
+ '(', ')', 0xf000 | '*', '+',
+ ',', '-', '.', '\\',
+ '0', '1', '2', '3',
+ '4', '5', '6', '7',
+ '8', '9', 0xf000 | ':', ';',
+ 0xf000 | '<', '=', 0xf000 | '>', 0xf000 | '?',
+ '@', 'A', 'B', 'C',
+ 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K',
+ 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S',
+ 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', '[',
+ '\\', ']', '^', '_',
+ '`', 'a', 'b', 'c',
+ 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k',
+ 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's',
+ 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', '{',
+ 0xf000 | '|', '}', '~', 127
+};
+
+void
+transform_chars (PWCHAR path, PWCHAR path_end)
+{
+ for (; path <= path_end; ++path)
+ if (*path < 128)
+ *path = tfx_chars[*path];
+}
+
/* The SJIS, JIS and eucJP conversion in newlib does not use UTF as
wchar_t character representation. That's unfortunate for us since
we require UTF for the OS. What we do here is to have our own
@@ -426,16 +475,19 @@ sys_cp_wcstombs (wctomb_p f_wctomb, const char *charset, char *dst, size_t len,
{
wchar_t pw = *pwcs;
int bytes;
+ unsigned char cwc;
/* Convert UNICODE private use area. Reverse functionality for the
- ASCII area <= 0x7f (only for path names) is transform_chars in
- path.cc. Reverse functionality for invalid bytes in a multibyte
- sequence is in sys_cp_mbstowcs. */
- if ((pw & 0xff00) == 0xf000 && ((pw & 0xff) <= 0x7f || MB_CUR_MAX > 1))
+ ASCII area <= 0x7f (only for path names) is transform_chars above.
+ Reverse functionality for invalid bytes in a multibyte sequence is
+ in sys_cp_mbstowcs below. */
+ if ((pw & 0xff00) == 0xf000
+ && (((cwc = (pw & 0xff)) <= 0x7f && tfx_chars[cwc] >= 0xf000)
+ || (cwc >= 0x80 && MB_CUR_MAX > 1)))
{
- buf[0] = pw & 0xff;
+ buf[0] = (char) cwc;
bytes = 1;
- }
+ }
else
{
bytes = f_wctomb (_REENT, buf, pw, charset, &ps);
@@ -603,15 +655,14 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst,
}
}
else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms,
- charset, &ps)) < 0
- || (bytes == 3 && pmbs[0] == 0xef && (pmbs[1] & 0xf4) == 0x80))
+ charset, &ps)) < 0)
{
/* The technique is based on a discussion here:
http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html
Invalid bytes in a multibyte secuence are converted to
the private use area which is already used to store ASCII
- chars invalid in Windows filenames. This techinque allows
+ chars invalid in Windows filenames. This technque allows
to store them in a symmetric way. */
bytes = 1;
if (dst)