aboutsummaryrefslogtreecommitdiff
path: root/src/regex/tre.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/tre.h')
-rw-r--r--src/regex/tre.h269
1 files changed, 269 insertions, 0 deletions
diff --git a/src/regex/tre.h b/src/regex/tre.h
new file mode 100644
index 0000000..bfd171f
--- /dev/null
+++ b/src/regex/tre.h
@@ -0,0 +1,269 @@
+/*
+ tre-internal.h - TRE internal definitions
+
+ Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include <regex.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define TRE_MULTIBYTE 1
+#undef TRE_MBSTATE
+#define TRE_WCHAR 1
+#define TRE_USE_SYSTEM_WCTYPE 1
+#define HAVE_WCSTOMBS 1
+#define TRE_MB_CUR_MAX MB_CUR_MAX
+
+#define NDEBUG
+
+#define TRE_REGEX_T_FIELD __opaque
+typedef int reg_errcode_t;
+
+typedef wchar_t tre_char_t;
+
+
+#ifdef TRE_DEBUG
+#include <stdio.h>
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(0)
+#else /* !TRE_DEBUG */
+#define DPRINT(msg) do { } while(0)
+#endif /* !TRE_DEBUG */
+
+#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
+
+#if 1
+int __mbtowc(wchar_t *, const char *);
+#define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s)))
+#else
+#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
+#endif
+
+/* Wide characters. */
+typedef wint_t tre_cint_t;
+#define TRE_CHAR_MAX WCHAR_MAX
+
+#ifdef TRE_MULTIBYTE
+#define TRE_MB_CUR_MAX MB_CUR_MAX
+#else /* !TRE_MULTIBYTE */
+#define TRE_MB_CUR_MAX 1
+#endif /* !TRE_MULTIBYTE */
+
+#define tre_isalnum iswalnum
+#define tre_isalpha iswalpha
+#define tre_isblank iswblank
+#define tre_iscntrl iswcntrl
+#define tre_isdigit iswdigit
+#define tre_isgraph iswgraph
+#define tre_islower iswlower
+#define tre_isprint iswprint
+#define tre_ispunct iswpunct
+#define tre_isspace iswspace
+#define tre_isupper iswupper
+#define tre_isxdigit iswxdigit
+
+#define tre_tolower towlower
+#define tre_toupper towupper
+#define tre_strlen wcslen
+
+/* Use system provided iswctype() and wctype(). */
+typedef wctype_t tre_ctype_t;
+#define tre_isctype iswctype
+#define tre_ctype wctype
+
+/* Returns number of bytes to add to (char *)ptr to make it
+ properly aligned for the type. */
+#define ALIGN(ptr, type) \
+ ((((long)ptr) % sizeof(type)) \
+ ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
+ : 0)
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
+#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
+
+/* Define STRF to the correct printf formatter for strings. */
+#define STRF "ls"
+
+/* TNFA transition type. A TNFA state is an array of transitions,
+ the terminator is a transition with NULL `state'. */
+typedef struct tnfa_transition tre_tnfa_transition_t;
+
+struct tnfa_transition {
+ /* Range of accepted characters. */
+ tre_cint_t code_min;
+ tre_cint_t code_max;
+ /* Pointer to the destination state. */
+ tre_tnfa_transition_t *state;
+ /* ID number of the destination state. */
+ int state_id;
+ /* -1 terminated array of tags (or NULL). */
+ int *tags;
+ /* Assertion bitmap. */
+ int assertions;
+ /* Assertion parameters. */
+ union {
+ /* Character class assertion. */
+ tre_ctype_t class;
+ /* Back reference assertion. */
+ int backref;
+ } u;
+ /* Negative character class assertions. */
+ tre_ctype_t *neg_classes;
+};
+
+
+/* Assertions. */
+#define ASSERT_AT_BOL 1 /* Beginning of line. */
+#define ASSERT_AT_EOL 2 /* End of line. */
+#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
+#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
+#define ASSERT_AT_BOW 16 /* Beginning of word. */
+#define ASSERT_AT_EOW 32 /* End of word. */
+#define ASSERT_AT_WB 64 /* Word boundary. */
+#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
+#define ASSERT_BACKREF 256 /* A back reference in `backref'. */
+#define ASSERT_LAST 256
+
+/* Tag directions. */
+typedef enum {
+ TRE_TAG_MINIMIZE = 0,
+ TRE_TAG_MAXIMIZE = 1
+} tre_tag_direction_t;
+
+/* Instructions to compute submatch register values from tag values
+ after a successful match. */
+struct tre_submatch_data {
+ /* Tag that gives the value for rm_so (submatch start offset). */
+ int so_tag;
+ /* Tag that gives the value for rm_eo (submatch end offset). */
+ int eo_tag;
+ /* List of submatches this submatch is contained in. */
+ int *parents;
+};
+
+typedef struct tre_submatch_data tre_submatch_data_t;
+
+
+/* TNFA definition. */
+typedef struct tnfa tre_tnfa_t;
+
+struct tnfa {
+ tre_tnfa_transition_t *transitions;
+ unsigned int num_transitions;
+ tre_tnfa_transition_t *initial;
+ tre_tnfa_transition_t *final;
+ tre_submatch_data_t *submatch_data;
+ unsigned int num_submatches;
+ tre_tag_direction_t *tag_directions;
+ int num_tags;
+ int end_tag;
+ int num_states;
+ int cflags;
+ int have_backrefs;
+};
+
+#if 0
+static int
+tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
+
+static void
+tre_free(regex_t *preg);
+
+static void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, int *tags, int match_eo);
+
+static reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags, int eflags,
+ int *match_end_ofs);
+
+static reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+ tre_str_type_t type, int *match_tags, int eflags,
+ int *match_end_ofs);
+
+static reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+ int len, tre_str_type_t type, int *match_tags,
+ int eflags, int *match_end_ofs);
+#endif
+
+/* from tre-mem.h: */
+
+#define TRE_MEM_BLOCK_SIZE 1024
+
+typedef struct tre_list {
+ void *data;
+ struct tre_list *next;
+} tre_list_t;
+
+typedef struct tre_mem_struct {
+ tre_list_t *blocks;
+ tre_list_t *current;
+ char *ptr;
+ size_t n;
+ int failed;
+ void **provided;
+} *tre_mem_t;
+
+#define tre_mem_new_impl __tre_mem_new_impl
+#define tre_mem_alloc_impl __tre_mem_alloc_impl
+#define tre_mem_destroy __tre_mem_destroy
+
+tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
+void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+ int zero, size_t size);
+
+/* Returns a new memory allocator or NULL if out of memory. */
+#define tre_mem_new() tre_mem_new_impl(0, NULL)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. */
+#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. The memory
+ is set to zero. */
+#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
+
+#ifdef TRE_USE_ALLOCA
+/* alloca() versions. Like above, but memory is allocated with alloca()
+ instead of malloc(). */
+
+#define tre_mem_newa() \
+ tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
+
+#define tre_mem_alloca(mem, size) \
+ ((mem)->n >= (size) \
+ ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
+ : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
+#endif /* TRE_USE_ALLOCA */
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void tre_mem_destroy(tre_mem_t mem);
+
+#define xmalloc malloc
+#define xcalloc calloc
+#define xfree free
+#define xrealloc realloc
+
+/* EOF */