summaryrefslogtreecommitdiff
path: root/mbregex/mbregex.h
diff options
context:
space:
mode:
authorStefan Esser2010-02-21 11:44:54 +0100
committerStefan Esser2010-02-21 11:44:54 +0100
commit36dbfacbe64697d959f524e537b15b73c090d898 (patch)
treef1c7ce1409b0e7765fc72d550546967fcf0f9717 /mbregex/mbregex.h
Inital commit
Diffstat (limited to 'mbregex/mbregex.h')
-rw-r--r--mbregex/mbregex.h213
1 files changed, 213 insertions, 0 deletions
diff --git a/mbregex/mbregex.h b/mbregex/mbregex.h
new file mode 100644
index 0000000..03292bc
--- /dev/null
+++ b/mbregex/mbregex.h
@@ -0,0 +1,213 @@
1/* Definitions for data structures and routines for the regular
2 expression library, version 0.12.
3 Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library. Its master source is NOT part of
6 the C library, however. The master source lives in /gd/gnu/lib.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public
19 License along with the GNU C Library; see the file COPYING.LIB. If not,
20 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
23 Last change: May 21, 1993 by t^2 */
24/* modified for Ruby by matz@netlab.co.jp */
25
26#ifndef __MB_REGEXP_LIBRARY
27#define __MB_REGEXP_LIBRARY
28
29#include <stddef.h>
30
31/* Define number of parens for which we record the beginnings and ends.
32 This affects how much space the `struct re_registers' type takes up. */
33#ifndef MBRE_NREGS
34#define MBRE_NREGS 10
35#endif
36
37#define MBRE_BYTEWIDTH 8
38
39#define MBRE_REG_MAX ((1<<MBRE_BYTEWIDTH)-1)
40
41/* Maximum number of duplicates an interval can allow. */
42#ifndef MBRE_DUP_MAX
43#define MBRE_DUP_MAX ((1 << 15) - 1)
44#endif
45
46
47/* If this bit is set, then character classes are supported; they are:
48 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
49 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
50 If not set, then character classes are not supported. */
51#define MBRE_CHAR_CLASSES (1L << 9)
52
53/* match will be done case insensetively */
54#define MBRE_OPTION_IGNORECASE (1L)
55/* perl-style extended pattern available */
56#define MBRE_OPTION_EXTENDED (MBRE_OPTION_IGNORECASE<<1)
57/* newline will be included for . */
58#define MBRE_OPTION_MULTILINE (MBRE_OPTION_EXTENDED<<1)
59/* ^ and $ ignore newline */
60#define MBRE_OPTION_SINGLELINE (MBRE_OPTION_MULTILINE<<1)
61/* works line Perl's /s; it's called POSIX for wrong reason */
62#define MBRE_OPTION_POSIXLINE (MBRE_OPTION_MULTILINE|MBRE_OPTION_SINGLELINE)
63/* search for longest match, in accord with POSIX regexp */
64#define MBRE_OPTION_LONGEST (MBRE_OPTION_SINGLELINE<<1)
65
66#define MBRE_MAY_IGNORECASE (MBRE_OPTION_LONGEST<<1)
67#define MBRE_OPTIMIZE_ANCHOR (MBRE_MAY_IGNORECASE<<1)
68#define MBRE_OPTIMIZE_EXACTN (MBRE_OPTIMIZE_ANCHOR<<1)
69#define MBRE_OPTIMIZE_NO_BM (MBRE_OPTIMIZE_EXACTN<<1)
70#define MBRE_OPTIMIZE_BMATCH (MBRE_OPTIMIZE_NO_BM<<1)
71
72/* For multi-byte char support */
73#define MBCTYPE_ASCII 0
74#define MBCTYPE_EUC 1
75#define MBCTYPE_SJIS 2
76#define MBCTYPE_UTF8 3
77
78#if 0
79#if defined IMPORT || defined USEIMPORTLIB
80extern __declspec(dllimport)
81#elif defined EXPORT
82extern __declspec(dllexport)
83#else
84extern
85#endif
86const unsigned char *re_mbctab;
87#if defined(__STDC__)
88void re_mbcinit (int);
89#else
90void re_mbcinit ();
91#endif
92#endif
93
94#undef ismbchar
95#define ismbchar(c) re_mbctab[(unsigned char)(c)]
96#define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
97
98/* Structure used in re_match() */
99
100typedef union
101{
102 unsigned char *word;
103 struct {
104 unsigned is_active : 1;
105 unsigned matched_something : 1;
106 } bits;
107} mbre_register_info_type;
108
109/* This data structure is used to represent a compiled pattern. */
110
111struct mbre_pattern_buffer
112 {
113 char *buffer; /* Space holding the compiled pattern commands. */
114 int allocated; /* Size of space that `buffer' points to. */
115 int used; /* Length of portion of buffer actually occupied */
116 char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
117 /* re_search uses the fastmap, if there is one,
118 to skip over totally implausible characters. */
119 char *must; /* Pointer to exact pattern which strings should have
120 to be matched. */
121 int *must_skip; /* Pointer to exact pattern skip table for bm_search */
122 char *stclass; /* Pointer to character class list at top */
123 long options; /* Flags for options such as extended_pattern. */
124 long re_nsub; /* Number of subexpressions found by the compiler. */
125 char fastmap_accurate;
126 /* Set to zero when a new pattern is stored,
127 set to one when the fastmap is updated from it. */
128 char can_be_null; /* Set to one by compiling fastmap
129 if this pattern might match the null string.
130 It does not necessarily match the null string
131 in that case, but if this is zero, it cannot.
132 2 as value means can match null string
133 but at end of range or before a character
134 listed in the fastmap. */
135
136 /* stack & working area for re_match() */
137 unsigned char **regstart;
138 unsigned char **regend;
139 unsigned char **old_regstart;
140 unsigned char **old_regend;
141 mbre_register_info_type *reg_info;
142 unsigned char **best_regstart;
143 unsigned char **best_regend;
144
145 int mbctype;
146 };
147
148typedef struct mbre_pattern_buffer mb_regex_t;
149
150/* Structure to store register contents data in.
151
152 Pass the address of such a structure as an argument to re_match, etc.,
153 if you want this information back.
154
155 For i from 1 to MBRE_NREGS - 1, start[i] records the starting index in
156 the string of where the ith subexpression matched, and end[i] records
157 one after the ending index. start[0] and end[0] are analogous, for
158 the entire pattern. */
159
160struct mbre_registers
161 {
162 int allocated;
163 int num_regs;
164 int *beg;
165 int *end;
166 };
167
168#if 0
169/* Type for byte offsets within the string. POSIX mandates this. */
170typedef size_t regoff_t;
171
172/* POSIX specification for registers. Aside from the different names than
173 `mbre_registers', POSIX uses an array of structures, instead of a
174 structure of arrays. */
175typedef struct
176{
177 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
178 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
179} regmatch_t;
180#endif
181
182
183#ifdef __STDC__
184
185extern char *mbre_compile_pattern (const char *, int, struct mbre_pattern_buffer *);
186void mbre_free_pattern (struct mbre_pattern_buffer *);
187/* Is this really advertised? */
188extern int mbre_adjust_startpos (struct mbre_pattern_buffer *, const char*, int, int, int);
189extern void mbre_compile_fastmap (struct mbre_pattern_buffer *);
190extern int mbre_search (struct mbre_pattern_buffer *, const char*, int, int, int,
191 struct mbre_registers *);
192extern int mbre_match (struct mbre_pattern_buffer *, const char *, int, int,
193 struct mbre_registers *);
194extern void mbre_set_casetable (const char *table);
195extern void mbre_copy_registers (struct mbre_registers*, struct mbre_registers*);
196extern void mbre_free_registers (struct mbre_registers*);
197
198#else /* !__STDC__ */
199
200extern char *mbre_compile_pattern ();
201void mbre_free_pattern ();
202/* Is this really advertised? */
203extern int mbre_adjust_startpos ();
204extern void mbre_compile_fastmap ();
205extern int mbre_search ();
206extern int mbre_match ();
207extern void mbre_set_casetable ();
208extern void mbre_copy_registers ();
209extern void mbre_free_registers ();
210
211#endif /* __STDC__ */
212
213#endif /* !__MB_REGEXP_LIBRARY */