diff options
Diffstat (limited to 'src/lib/krb5/unicode/ure/README')
-rw-r--r-- | src/lib/krb5/unicode/ure/README | 212 |
1 files changed, 0 insertions, 212 deletions
diff --git a/src/lib/krb5/unicode/ure/README b/src/lib/krb5/unicode/ure/README deleted file mode 100644 index c9918f5..0000000 --- a/src/lib/krb5/unicode/ure/README +++ /dev/null @@ -1,212 +0,0 @@ -# -# $Id: README,v 1.3 1999/09/21 15:47:43 mleisher Exp $ -# -# Copyright 1997, 1998, 1999 Computing Research Labs, -# New Mexico State University -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT -# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - - - Unicode and Regular Expressions - Version 0.5 - -This is a simple regular expression package for matching against Unicode text -in UCS2 form. The implementation of this URE package is a variation on the -RE->DFA algorithm done by Mark Hopkins (markh@csd4.csd.uwm.edu). Mark -Hopkins' algorithm had the virtue of being very simple, so it was used as a -model. - ---------------------------------------------------------------------------- - -Assumptions: - - o Regular expression and text already normalized. - - o Conversion to lower case assumes a 1-1 mapping. - -Definitions: - - Separator - any one of U+2028, U+2029, '\n', '\r'. - -Operators: - . - match any character. - * - match zero or more of the last subexpression. - + - match one or more of the last subexpression. - ? - match zero or one of the last subexpression. - () - subexpression grouping. - - Notes: - - o The "." operator normally does not match separators, but a flag is - available for the ure_exec() function that will allow this operator to - match a separator. - -Literals and Constants: - - c - literal UCS2 character. - \x.... - hexadecimal number of up to 4 digits. - \X.... - hexadecimal number of up to 4 digits. - \u.... - hexadecimal number of up to 4 digits. - \U.... - hexadecimal number of up to 4 digits. - -Character classes: - - [...] - Character class. - [^...] - Negated character class. - \pN1,N2,...,Nn - Character properties class. - \PN1,N2,...,Nn - Negated character properties class. - - POSIX character classes recognized: - - :alnum: - :alpha: - :cntrl: - :digit: - :graph: - :lower: - :print: - :punct: - :space: - :upper: - :xdigit: - - Notes: - - o Character property classes are \p or \P followed by a comma separated - list of integers between 1 and 32. These integers are references to - the following character properties: - - N Character Property - -------------------------- - 1 _URE_NONSPACING - 2 _URE_COMBINING - 3 _URE_NUMDIGIT - 4 _URE_NUMOTHER - 5 _URE_SPACESEP - 6 _URE_LINESEP - 7 _URE_PARASEP - 8 _URE_CNTRL - 9 _URE_PUA - 10 _URE_UPPER - 11 _URE_LOWER - 12 _URE_TITLE - 13 _URE_MODIFIER - 14 _URE_OTHERLETTER - 15 _URE_DASHPUNCT - 16 _URE_OPENPUNCT - 17 _URE_CLOSEPUNCT - 18 _URE_OTHERPUNCT - 19 _URE_MATHSYM - 20 _URE_CURRENCYSYM - 21 _URE_OTHERSYM - 22 _URE_LTR - 23 _URE_RTL - 24 _URE_EURONUM - 25 _URE_EURONUMSEP - 26 _URE_EURONUMTERM - 27 _URE_ARABNUM - 28 _URE_COMMONSEP - 29 _URE_BLOCKSEP - 30 _URE_SEGMENTSEP - 31 _URE_WHITESPACE - 32 _URE_OTHERNEUT - - o Character classes can contain literals, constants, and character - property classes. Example: - - [abc\U10A\p1,3,4] - ---------------------------------------------------------------------------- - -Before using URE ----------------- -Before URE is used, two functions need to be created. One to check if a -character matches a set of URE character properties, and one to convert a -character to lower case. - -Stubs for these function are located in the urestubs.c file. - -Using URE ---------- - -Sample pseudo-code fragment. - - ure_buffer_t rebuf; - ure_dfa_t dfa; - ucs2_t *re, *text; - unsigned long relen, textlen; - unsigned long match_start, match_end; - - /* - * Allocate the dynamic storage needed to compile regular expressions. - */ - rebuf = ure_buffer_create(); - - for each regular expression in a list { - re = next regular expression; - relen = length(re); - - /* - * Compile the regular expression with the case insensitive flag - * turned on. - */ - dfa = ure_compile(re, relen, 1, rebuf); - - /* - * Look for the first match in some text. The matching will be done - * in a case insensitive manner because the expression was compiled - * with the case insensitive flag on. - */ - if (ure_exec(dfa, 0, text, textlen, &match_start, &match_end)) - printf("MATCH: %ld %ld\n", match_start, match_end); - - /* - * Look for the first match in some text, ignoring non-spacing - * characters. - */ - if (ure_exec(dfa, URE_IGNORE_NONSPACING, text, textlen, - &match_start, &match_end)) - printf("MATCH: %ld %ld\n", match_start, match_end); - - /* - * Free the DFA. - */ - ure_free_dfa(dfa); - } - - /* - * Free the dynamic storage used for compiling the expressions. - */ - ure_free_buffer(rebuf); - ---------------------------------------------------------------------------- - -Mark Leisher <mleisher@crl.nmsu.edu> -29 March 1997 - -=========================================================================== - -CHANGES -------- - -Version: 0.5 -Date : 21 September 1999 -========================== - 1. Added copyright stuff and put in CVS. |