diff options
| author | Stefan Esser | 2010-02-21 11:44:54 +0100 |
|---|---|---|
| committer | Stefan Esser | 2010-02-21 11:44:54 +0100 |
| commit | 36dbfacbe64697d959f524e537b15b73c090d898 (patch) | |
| tree | f1c7ce1409b0e7765fc72d550546967fcf0f9717 /mbregex/mbregex.h | |
Inital commit
Diffstat (limited to 'mbregex/mbregex.h')
| -rw-r--r-- | mbregex/mbregex.h | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/mbregex/mbregex.h b/mbregex/mbregex.h new file mode 100644 index 0000000..03292bc --- /dev/null +++ b/mbregex/mbregex.h | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | /* Definitions for data structures and routines for the regular | ||
| 2 | expression library, version 0.12. | ||
| 3 | Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | This file is part of the GNU C Library. Its master source is NOT part of | ||
| 6 | the C library, however. The master source lives in /gd/gnu/lib. | ||
| 7 | |||
| 8 | The GNU C Library is free software; you can redistribute it and/or | ||
| 9 | modify it under the terms of the GNU Library General Public License as | ||
| 10 | published by the Free Software Foundation; either version 2 of the | ||
| 11 | License, or (at your option) any later version. | ||
| 12 | |||
| 13 | The GNU C Library is distributed in the hope that it will be useful, | ||
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | Library General Public License for more details. | ||
| 17 | |||
| 18 | You should have received a copy of the GNU Library General Public | ||
| 19 | License along with the GNU C Library; see the file COPYING.LIB. If not, | ||
| 20 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | Boston, MA 02111-1307, USA. */ | ||
| 22 | /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto) | ||
| 23 | Last change: May 21, 1993 by t^2 */ | ||
| 24 | /* modified for Ruby by matz@netlab.co.jp */ | ||
| 25 | |||
| 26 | #ifndef __MB_REGEXP_LIBRARY | ||
| 27 | #define __MB_REGEXP_LIBRARY | ||
| 28 | |||
| 29 | #include <stddef.h> | ||
| 30 | |||
| 31 | /* Define number of parens for which we record the beginnings and ends. | ||
| 32 | This affects how much space the `struct re_registers' type takes up. */ | ||
| 33 | #ifndef MBRE_NREGS | ||
| 34 | #define MBRE_NREGS 10 | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #define MBRE_BYTEWIDTH 8 | ||
| 38 | |||
| 39 | #define MBRE_REG_MAX ((1<<MBRE_BYTEWIDTH)-1) | ||
| 40 | |||
| 41 | /* Maximum number of duplicates an interval can allow. */ | ||
| 42 | #ifndef MBRE_DUP_MAX | ||
| 43 | #define MBRE_DUP_MAX ((1 << 15) - 1) | ||
| 44 | #endif | ||
| 45 | |||
| 46 | |||
| 47 | /* If this bit is set, then character classes are supported; they are: | ||
| 48 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | ||
| 49 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | ||
| 50 | If not set, then character classes are not supported. */ | ||
| 51 | #define MBRE_CHAR_CLASSES (1L << 9) | ||
| 52 | |||
| 53 | /* match will be done case insensetively */ | ||
| 54 | #define MBRE_OPTION_IGNORECASE (1L) | ||
| 55 | /* perl-style extended pattern available */ | ||
| 56 | #define MBRE_OPTION_EXTENDED (MBRE_OPTION_IGNORECASE<<1) | ||
| 57 | /* newline will be included for . */ | ||
| 58 | #define MBRE_OPTION_MULTILINE (MBRE_OPTION_EXTENDED<<1) | ||
| 59 | /* ^ and $ ignore newline */ | ||
| 60 | #define MBRE_OPTION_SINGLELINE (MBRE_OPTION_MULTILINE<<1) | ||
| 61 | /* works line Perl's /s; it's called POSIX for wrong reason */ | ||
| 62 | #define MBRE_OPTION_POSIXLINE (MBRE_OPTION_MULTILINE|MBRE_OPTION_SINGLELINE) | ||
| 63 | /* search for longest match, in accord with POSIX regexp */ | ||
| 64 | #define MBRE_OPTION_LONGEST (MBRE_OPTION_SINGLELINE<<1) | ||
| 65 | |||
| 66 | #define MBRE_MAY_IGNORECASE (MBRE_OPTION_LONGEST<<1) | ||
| 67 | #define MBRE_OPTIMIZE_ANCHOR (MBRE_MAY_IGNORECASE<<1) | ||
| 68 | #define MBRE_OPTIMIZE_EXACTN (MBRE_OPTIMIZE_ANCHOR<<1) | ||
| 69 | #define MBRE_OPTIMIZE_NO_BM (MBRE_OPTIMIZE_EXACTN<<1) | ||
| 70 | #define MBRE_OPTIMIZE_BMATCH (MBRE_OPTIMIZE_NO_BM<<1) | ||
| 71 | |||
| 72 | /* For multi-byte char support */ | ||
| 73 | #define MBCTYPE_ASCII 0 | ||
| 74 | #define MBCTYPE_EUC 1 | ||
| 75 | #define MBCTYPE_SJIS 2 | ||
| 76 | #define MBCTYPE_UTF8 3 | ||
| 77 | |||
| 78 | #if 0 | ||
| 79 | #if defined IMPORT || defined USEIMPORTLIB | ||
| 80 | extern __declspec(dllimport) | ||
| 81 | #elif defined EXPORT | ||
| 82 | extern __declspec(dllexport) | ||
| 83 | #else | ||
| 84 | extern | ||
| 85 | #endif | ||
| 86 | const unsigned char *re_mbctab; | ||
| 87 | #if defined(__STDC__) | ||
| 88 | void re_mbcinit (int); | ||
| 89 | #else | ||
| 90 | void re_mbcinit (); | ||
| 91 | #endif | ||
| 92 | #endif | ||
| 93 | |||
| 94 | #undef ismbchar | ||
| 95 | #define ismbchar(c) re_mbctab[(unsigned char)(c)] | ||
| 96 | #define mbclen(c) (re_mbctab[(unsigned char)(c)]+1) | ||
| 97 | |||
| 98 | /* Structure used in re_match() */ | ||
| 99 | |||
| 100 | typedef union | ||
| 101 | { | ||
| 102 | unsigned char *word; | ||
| 103 | struct { | ||
| 104 | unsigned is_active : 1; | ||
| 105 | unsigned matched_something : 1; | ||
| 106 | } bits; | ||
| 107 | } mbre_register_info_type; | ||
| 108 | |||
| 109 | /* This data structure is used to represent a compiled pattern. */ | ||
| 110 | |||
| 111 | struct mbre_pattern_buffer | ||
| 112 | { | ||
| 113 | char *buffer; /* Space holding the compiled pattern commands. */ | ||
| 114 | int allocated; /* Size of space that `buffer' points to. */ | ||
| 115 | int used; /* Length of portion of buffer actually occupied */ | ||
| 116 | char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ | ||
| 117 | /* re_search uses the fastmap, if there is one, | ||
| 118 | to skip over totally implausible characters. */ | ||
| 119 | char *must; /* Pointer to exact pattern which strings should have | ||
| 120 | to be matched. */ | ||
| 121 | int *must_skip; /* Pointer to exact pattern skip table for bm_search */ | ||
| 122 | char *stclass; /* Pointer to character class list at top */ | ||
| 123 | long options; /* Flags for options such as extended_pattern. */ | ||
| 124 | long re_nsub; /* Number of subexpressions found by the compiler. */ | ||
| 125 | char fastmap_accurate; | ||
| 126 | /* Set to zero when a new pattern is stored, | ||
| 127 | set to one when the fastmap is updated from it. */ | ||
| 128 | char can_be_null; /* Set to one by compiling fastmap | ||
| 129 | if this pattern might match the null string. | ||
| 130 | It does not necessarily match the null string | ||
| 131 | in that case, but if this is zero, it cannot. | ||
| 132 | 2 as value means can match null string | ||
| 133 | but at end of range or before a character | ||
| 134 | listed in the fastmap. */ | ||
| 135 | |||
| 136 | /* stack & working area for re_match() */ | ||
| 137 | unsigned char **regstart; | ||
| 138 | unsigned char **regend; | ||
| 139 | unsigned char **old_regstart; | ||
| 140 | unsigned char **old_regend; | ||
| 141 | mbre_register_info_type *reg_info; | ||
| 142 | unsigned char **best_regstart; | ||
| 143 | unsigned char **best_regend; | ||
| 144 | |||
| 145 | int mbctype; | ||
| 146 | }; | ||
| 147 | |||
| 148 | typedef struct mbre_pattern_buffer mb_regex_t; | ||
| 149 | |||
| 150 | /* Structure to store register contents data in. | ||
| 151 | |||
| 152 | Pass the address of such a structure as an argument to re_match, etc., | ||
| 153 | if you want this information back. | ||
| 154 | |||
| 155 | For i from 1 to MBRE_NREGS - 1, start[i] records the starting index in | ||
| 156 | the string of where the ith subexpression matched, and end[i] records | ||
| 157 | one after the ending index. start[0] and end[0] are analogous, for | ||
| 158 | the entire pattern. */ | ||
| 159 | |||
| 160 | struct mbre_registers | ||
| 161 | { | ||
| 162 | int allocated; | ||
| 163 | int num_regs; | ||
| 164 | int *beg; | ||
| 165 | int *end; | ||
| 166 | }; | ||
| 167 | |||
| 168 | #if 0 | ||
| 169 | /* Type for byte offsets within the string. POSIX mandates this. */ | ||
| 170 | typedef size_t regoff_t; | ||
| 171 | |||
| 172 | /* POSIX specification for registers. Aside from the different names than | ||
| 173 | `mbre_registers', POSIX uses an array of structures, instead of a | ||
| 174 | structure of arrays. */ | ||
| 175 | typedef struct | ||
| 176 | { | ||
| 177 | regoff_t rm_so; /* Byte offset from string's start to substring's start. */ | ||
| 178 | regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ | ||
| 179 | } regmatch_t; | ||
| 180 | #endif | ||
| 181 | |||
| 182 | |||
| 183 | #ifdef __STDC__ | ||
| 184 | |||
| 185 | extern char *mbre_compile_pattern (const char *, int, struct mbre_pattern_buffer *); | ||
| 186 | void mbre_free_pattern (struct mbre_pattern_buffer *); | ||
| 187 | /* Is this really advertised? */ | ||
| 188 | extern int mbre_adjust_startpos (struct mbre_pattern_buffer *, const char*, int, int, int); | ||
| 189 | extern void mbre_compile_fastmap (struct mbre_pattern_buffer *); | ||
| 190 | extern int mbre_search (struct mbre_pattern_buffer *, const char*, int, int, int, | ||
| 191 | struct mbre_registers *); | ||
| 192 | extern int mbre_match (struct mbre_pattern_buffer *, const char *, int, int, | ||
| 193 | struct mbre_registers *); | ||
| 194 | extern void mbre_set_casetable (const char *table); | ||
| 195 | extern void mbre_copy_registers (struct mbre_registers*, struct mbre_registers*); | ||
| 196 | extern void mbre_free_registers (struct mbre_registers*); | ||
| 197 | |||
| 198 | #else /* !__STDC__ */ | ||
| 199 | |||
| 200 | extern char *mbre_compile_pattern (); | ||
| 201 | void mbre_free_pattern (); | ||
| 202 | /* Is this really advertised? */ | ||
| 203 | extern int mbre_adjust_startpos (); | ||
| 204 | extern void mbre_compile_fastmap (); | ||
| 205 | extern int mbre_search (); | ||
| 206 | extern int mbre_match (); | ||
| 207 | extern void mbre_set_casetable (); | ||
| 208 | extern void mbre_copy_registers (); | ||
| 209 | extern void mbre_free_registers (); | ||
| 210 | |||
| 211 | #endif /* __STDC__ */ | ||
| 212 | |||
| 213 | #endif /* !__MB_REGEXP_LIBRARY */ | ||
