summaryrefslogtreecommitdiff
path: root/src/sp_config_scanner.re
diff options
context:
space:
mode:
authorjvoisin2023-04-29 16:46:02 +0200
committerjvoisin2023-04-29 16:46:02 +0200
commitcee55351d6c2865447e72d9e3e8ba5922647162d (patch)
tree8172322ef3cb9560611b00e3addf8ce89d51053a /src/sp_config_scanner.re
parent8532f01c7f3c356fbb2dda593477d3902491de77 (diff)
Improve how the parser is generated
- use long variant of options for re2c in its makefile - use `define` instead of magic numbers - add some consts - trailing `;` are now mandatory for conditions - NULL bytes are no longer allowed in configuration file - the parser shouldn't crash in the absence of trailing new line at the end of its configuration file
Diffstat (limited to 'src/sp_config_scanner.re')
-rw-r--r--src/sp_config_scanner.re139
1 files changed, 81 insertions, 58 deletions
diff --git a/src/sp_config_scanner.re b/src/sp_config_scanner.re
index 6a14a11..82359d6 100644
--- a/src/sp_config_scanner.re
+++ b/src/sp_config_scanner.re
@@ -8,15 +8,19 @@
8#define cs_log_info(fmt, ...) sp_log_msg("config", SP_LOG_INFO, fmt, ##__VA_ARGS__) 8#define cs_log_info(fmt, ...) sp_log_msg("config", SP_LOG_INFO, fmt, ##__VA_ARGS__)
9#define cs_log_warning(fmt, ...) sp_log_warn("config", fmt, ##__VA_ARGS__) 9#define cs_log_warning(fmt, ...) sp_log_warn("config", fmt, ##__VA_ARGS__)
10 10
11#define MAX_CONDITIONS 100
12#define MAX_KEYWORDS 16
13
11 14
12zend_string *sp_get_arg_string(sp_parsed_keyword const *const kw) { 15zend_string *sp_get_arg_string(sp_parsed_keyword const *const kw) {
13 if (!kw || !kw->arg) { 16 if (!kw || !kw->arg) {
14 return NULL; 17 return NULL;
15 } 18 }
19
16 zend_string *ret = zend_string_init(kw->arg, kw->arglen, 1); 20 zend_string *ret = zend_string_init(kw->arg, kw->arglen, 1);
17 char *pin, *pout; 21 char *pin, *pout;
18 pin = pout = ZSTR_VAL(ret); 22 pin = pout = ZSTR_VAL(ret);
19 char *pend = pin + ZSTR_LEN(ret); 23 char const *const pend = pin + ZSTR_LEN(ret);
20 24
21 while (pin < pend) { 25 while (pin < pend) {
22 if (*pin == '\\') { 26 if (*pin == '\\') {
@@ -37,31 +41,38 @@ zend_string *sp_get_arg_string(sp_parsed_keyword const *const kw) {
37 41
38zend_string *sp_get_textual_representation(sp_parsed_keyword const *const parsed_rule) { 42zend_string *sp_get_textual_representation(sp_parsed_keyword const *const parsed_rule) {
39 // a rule is "sp.keyword...keyword(arg);\0" 43 // a rule is "sp.keyword...keyword(arg);\0"
40 size_t len = 3; // sp + ; 44 size_t len = 3; // "sp" + ";"
41 for (const sp_parsed_keyword *kw = parsed_rule; kw->kw; kw++) { 45 for (const sp_parsed_keyword *kw = parsed_rule; kw->kw; kw++) {
42 len++; // . 46 len++; // .
43 len += kw->kwlen; 47 len += kw->kwlen;
44 if (kw->argtype == SP_ARGTYPE_EMPTY) { 48 if (kw->argtype == SP_ARGTYPE_EMPTY) {
45 len += 2; // () 49 len += 2; // ()
46 } else if (kw->argtype == SP_ARGTYPE_STR) { 50 } else if (kw->argtype == SP_ARGTYPE_STR) {
47 len += 4; 51 len += 2; // ("
48 len += kw->arglen; 52 len += kw->arglen;
53 len += 2; // ")
49 } 54 }
50 } 55 }
51 56
52 zend_string *ret = zend_string_alloc(len, 1); 57 zend_string *ret = zend_string_alloc(len, 1);
53 char *ptr = ZSTR_VAL(ret); 58 char *ptr = ZSTR_VAL(ret);
54 59
55 memcpy(ptr, "sp", 2); ptr += 2; 60 memcpy(ptr, "sp", 2);
61 ptr += 2;
62
56 for (const sp_parsed_keyword *kw = parsed_rule; kw->kw; kw++) { 63 for (const sp_parsed_keyword *kw = parsed_rule; kw->kw; kw++) {
57 *ptr++ = '.'; 64 *ptr++ = '.';
58 memcpy(ptr, kw->kw, kw->kwlen); ptr += kw->kwlen; 65
66 memcpy(ptr, kw->kw, kw->kwlen);
67 ptr += kw->kwlen;
68
59 if (kw->argtype == SP_ARGTYPE_EMPTY || kw->argtype == SP_ARGTYPE_STR || kw->argtype == SP_ARGTYPE_UNKNOWN) { 69 if (kw->argtype == SP_ARGTYPE_EMPTY || kw->argtype == SP_ARGTYPE_STR || kw->argtype == SP_ARGTYPE_UNKNOWN) {
60 *ptr++ = '('; 70 *ptr++ = '(';
61 } 71 }
62 if (kw->argtype == SP_ARGTYPE_STR && kw->arg) { 72 if (kw->argtype == SP_ARGTYPE_STR && kw->arg) {
63 *ptr++ = '"'; 73 *ptr++ = '"';
64 memcpy(ptr, kw->arg, kw->arglen); ptr += kw->arglen; 74 memcpy(ptr, kw->arg, kw->arglen);
75 ptr += kw->arglen;
65 *ptr++ = '"'; 76 *ptr++ = '"';
66 } 77 }
67 if (kw->argtype == SP_ARGTYPE_EMPTY || kw->argtype == SP_ARGTYPE_STR || kw->argtype == SP_ARGTYPE_UNKNOWN) { 78 if (kw->argtype == SP_ARGTYPE_EMPTY || kw->argtype == SP_ARGTYPE_STR || kw->argtype == SP_ARGTYPE_UNKNOWN) {
@@ -79,16 +90,16 @@ static void str_dtor(zval *zv) {
79 90
80// sy_ functions and macros are helpers for the shunting yard algorithm 91// sy_ functions and macros are helpers for the shunting yard algorithm
81#define sy_res_push(val) \ 92#define sy_res_push(val) \
82 if (cond_res_i >= 100) { cs_log_error("condition too complex on line %d", lineno); goto out; } \ 93 if (cond_res_i >= MAX_CONDITIONS) { cs_log_error("condition too complex on line %d", lineno); goto out; } \
83 cond_res[cond_res_i++] = val; 94 cond_res[cond_res_i++] = val;
84#define sy_res_pop() cond_res[--cond_res_i] 95#define sy_res_pop() cond_res[--cond_res_i]
85#define sy_op_push(op) \ 96#define sy_op_push(op) \
86 if (cond_op_i >= 100) { cs_log_error("condition too complex on line %d", lineno); goto out; } \ 97 if (cond_op_i >= MAX_CONDITIONS) { cs_log_error("condition too complex on line %d", lineno); goto out; } \
87 cond_op[cond_op_i++] = op; 98 cond_op[cond_op_i++] = op;
88#define sy_op_pop() cond_op[--cond_op_i] 99#define sy_op_pop() cond_op[--cond_op_i]
89#define sy_op_peek() cond_op[cond_op_i-1] 100#define sy_op_peek() cond_op[cond_op_i-1]
90 101
91static inline int sy_op_precedence(char op) { 102static inline int sy_op_precedence(const char op) {
92 switch (op) { 103 switch (op) {
93 case '!': return 120; 104 case '!': return 120;
94 case '<': 105 case '<':
@@ -102,13 +113,15 @@ static inline int sy_op_precedence(char op) {
102 } 113 }
103 return 0; 114 return 0;
104} 115}
105static inline int sy_op_is_left_assoc(char op) { 116
117static inline int sy_op_is_left_assoc(const char op) {
106 switch (op) { 118 switch (op) {
107 case '!': return 0; 119 case '!': return 0;
108 } 120 }
109 return 1; 121 return 1;
110} 122}
111static int sy_apply_op(char op, int a, int b) { 123
124static int sy_apply_op(const char op, const int a, const int b) {
112 switch (op) { 125 switch (op) {
113 case '!': return !a; 126 case '!': return !a;
114 case '&': return (b && a); 127 case '&': return (b && a);
@@ -138,79 +151,77 @@ static int sy_apply_op(char op, int a, int b) {
138 tmpstr[tmplen] = 0; 151 tmpstr[tmplen] = 0;
139 152
140 153
141zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_keyword*)) 154zend_result sp_config_scan(const char *data, zend_result (*process_rule)(sp_parsed_keyword*))
142{ 155{
143 const char *YYCURSOR = data;
144 const char *YYMARKER, *t1, *t2, *t3, *t4; 156 const char *YYMARKER, *t1, *t2, *t3, *t4;
145 /*!stags:re2c format = 'const char *@@;\n'; */
146 157
147 int ret = FAILURE; 158 int ret = FAILURE;
148 159 sp_parsed_keyword parsed_rule[MAX_KEYWORDS+1];
149 const int max_keywords = 16;
150 sp_parsed_keyword parsed_rule[max_keywords+1];
151 int kw_i = 0; 160 int kw_i = 0;
152 161
153 HashTable vars; 162 HashTable vars;
154 zend_hash_init(&vars, 10, NULL, str_dtor, 1); 163 zend_hash_init(&vars, 10, NULL, str_dtor, 1);
155 zend_hash_str_add_ptr(&vars, ZEND_STRL("PHP_VERSION_ID"), zend_string_init(ZEND_STRL(ZEND_TOSTR(PHP_VERSION_ID)), 1)); 164 zend_hash_str_add_ptr(&vars, ZEND_STRL("PHP_VERSION_ID"), zend_string_init(ZEND_STRL(ZEND_TOSTR(PHP_VERSION_ID)), 1));
156 165
157 166 int cond_res[MAX_CONDITIONS] = {1};
158 int cond_res[100] = {1};
159 int cond_res_i = 0; 167 int cond_res_i = 0;
160 char cond_op[100] = {0}; 168 char cond_op[MAX_CONDITIONS] = {0};
161 int cond_op_i = 0; 169 int cond_op_i = 0;
162 170
163 int cond = yycinit; 171 int cond = yycinit;
164 long lineno = 1; 172 long lineno = 1;
165 173
174 /*!stags:re2c format = 'const char *@@;\n'; */
166 /*!re2c 175 /*!re2c
167 re2c:define:YYCTYPE = "unsigned char"; 176 re2c:define:YYCTYPE = char;
168 // re2c:define:YYCURSOR = data; 177 re2c:define:YYCURSOR = data;
178 //re2c:sentinel = 0;
169 re2c:yyfill:enable = 0; 179 re2c:yyfill:enable = 0;
180 re2c:eof = -1;
170 re2c:flags:tags = 1; 181 re2c:flags:tags = 1;
171 re2c:api:style = free-form; 182 re2c:api:style = free-form;
172 re2c:define:YYGETCONDITION = "cond"; 183 re2c:define:YYGETCONDITION = "cond";
173 re2c:define:YYSETCONDITION = "cond = @@;"; 184 re2c:define:YYSETCONDITION = "cond = @@;";
174 185
175 end = "\x00"; 186 end = "\x00";
176 nl = "\r"?"\n"; 187 newline = "\r"?"\n";
177 ws = [ \t]; 188 whitespace = [ \t];
178 keyword = [a-zA-Z_][a-zA-Z0-9_]*; 189 keyword = [a-zA-Z][a-zA-Z0-9_]*;
179 string = "\"" ("\\\"" | [^"\r\n])* "\""; 190 string = ["] ("\\"["] | [^"\r\n\x00])* ["];
180 191
181 <init> * { cs_log_error("Parser error on line %d", lineno); goto out; } 192 <init> * { cs_log_error("parser error on line %d", lineno); goto out; }
182 <init> ws+ { goto yyc_init; } 193 <init> whitespace+ { goto yyc_init; }
183 <init> [;#] .* { goto yyc_init; } 194 <init> [;#] [^\r\n\x00]* { goto yyc_init; }
184 <init> nl { lineno++; goto yyc_init; } 195 <init> newline { lineno++; goto yyc_init; }
185 <init> "sp" { kw_i = 0; goto yyc_rule; } 196 <init> "sp" { kw_i = 0; goto yyc_rule; }
186 <init> end { ret = SUCCESS; goto out; } 197 <init> end { ret = SUCCESS; goto out; }
187 <init> "@"? "set" ws+ @t1 keyword @t2 ws+ @t3 string @t4 ws* ";"? { 198 <init> "@"? "set" whitespace+ @t1 keyword @t2 whitespace+ @t3 string @t4 whitespace* ";" {
188 if (!cond_res[0]) { goto yyc_init; } 199 if (!cond_res[0]) { goto yyc_init; }
189 char *key = (char*)t1; 200 char *key = (char*)t1;
190 int keylen = t2-t1; 201 int keylen = t2 - t1;
191 zend_string *tmp = zend_hash_str_find_ptr(&vars, key, keylen); 202 zend_string *tmp = zend_hash_str_find_ptr(&vars, key, keylen);
192 if (tmp) { 203 if (tmp) {
193 zend_hash_str_del(&vars, key, keylen); 204 zend_hash_str_del(&vars, key, keylen);
194 } 205 }
195 tmp = zend_string_init(t3+1, t4-t3-2, 1); 206 tmp = zend_string_init(t3+1, t4-t3-2, 1); // `-2` for the surrounding double quotes.
196 zend_hash_str_add_ptr(&vars, key, keylen, tmp); 207 zend_hash_str_add_ptr(&vars, key, keylen, tmp);
197 goto yyc_init; 208 goto yyc_init;
198 } 209 }
199 <init> "@condition" ws+ { cond_res_i = 0; goto yyc_cond; } 210 <init> "@condition" whitespace+ { cond_res_i = 0; goto yyc_cond; }
200 <init> "@end_condition" ws* ";" { cond_res[0] = 1; cond_res_i = 0; goto yyc_init; } 211 <init> "@end_condition" whitespace* ";" { cond_res[0] = 1; cond_res_i = 0; goto yyc_init; }
201 <init> ( "@log" | "@info" ) ws+ @t1 string @t2 ";"? { 212 <init> ( "@log" | "@info" ) whitespace+ @t1 string @t2 ";" {
202 if (!cond_res[0]) { goto yyc_init; } 213 if (!cond_res[0]) { goto yyc_init; }
203 TMPSTR(tmpstr, t2, t1); 214 TMPSTR(tmpstr, t2, t1);
204 cs_log_info("[line %d]: %s", lineno, tmpstr); 215 cs_log_info("[line %d]: %s", lineno, tmpstr);
205 goto yyc_init; 216 goto yyc_init;
206 } 217 }
207 <init> ( "@warn" | "@warning" ) ws+ @t1 string @t2 ";"? { 218 <init> ( "@warn" | "@warning" ) whitespace+ @t1 string @t2 ";" {
208 if (!cond_res[0]) { goto yyc_init; } 219 if (!cond_res[0]) { goto yyc_init; }
209 TMPSTR(tmpstr, t2, t1); 220 TMPSTR(tmpstr, t2, t1);
210 cs_log_warning("[line %d]: %s", lineno, tmpstr); 221 cs_log_warning("[line %d]: %s", lineno, tmpstr);
211 goto yyc_init; 222 goto yyc_init;
212 } 223 }
213 <init> ( "@err" | "@error" ) ws+ @t1 string @t2 ";"? { 224 <init> ( "@err" | "@error" ) whitespace+ @t1 string @t2 ";" {
214 if (!cond_res[0]) { goto yyc_init; } 225 if (!cond_res[0]) { goto yyc_init; }
215 TMPSTR(tmpstr, t2, t1); 226 TMPSTR(tmpstr, t2, t1);
216 cs_log_error("[line %d]: %s", lineno, tmpstr); 227 cs_log_error("[line %d]: %s", lineno, tmpstr);
@@ -218,8 +229,8 @@ zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_key
218 } 229 }
219 230
220 231
221 <cond> ws+ { goto yyc_cond; } 232 <cond> whitespace+ { goto yyc_cond; }
222 <cond> nl { lineno++; goto yyc_cond; } 233 <cond> newline { lineno++; goto yyc_cond; }
223 <cond> @t1 keyword @t2 "(" @t3 string? @t4 ")" { 234 <cond> @t1 keyword @t2 "(" @t3 string? @t4 ")" {
224 if (t4-t3 >= 2 && strlen("extension_loaded") == t2-t1 && strncmp("extension_loaded", t1, t2-t1) == 0) { 235 if (t4-t3 >= 2 && strlen("extension_loaded") == t2-t1 && strncmp("extension_loaded", t1, t2-t1) == 0) {
225 int is_loaded = (zend_hash_str_find_ptr(&module_registry, t3+1, t4-t3-2) != NULL); 236 int is_loaded = (zend_hash_str_find_ptr(&module_registry, t3+1, t4-t3-2) != NULL);
@@ -240,10 +251,10 @@ zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_key
240 goto yyc_cond_op; 251 goto yyc_cond_op;
241 } 252 }
242 <cond> @t1 [0-9]+ @t2 { sy_res_push(atoi(t1)); goto yyc_cond_op; } 253 <cond> @t1 [0-9]+ @t2 { sy_res_push(atoi(t1)); goto yyc_cond_op; }
243 <cond> @t1 "!" { sy_op_push(*t1); goto yyc_cond; } 254 <cond> @t1 "!" { sy_op_push(*t1); goto yyc_cond; }
244 <cond> @t1 "(" { sy_op_push(*t1); goto yyc_cond; } 255 <cond> @t1 "(" { sy_op_push(*t1); goto yyc_cond; }
245 <cond_op> ws+ { goto yyc_cond_op; } 256 <cond_op> whitespace+ { goto yyc_cond_op; }
246 <cond_op> nl { lineno++; goto yyc_cond_op; } 257 <cond_op> newline { lineno++; goto yyc_cond_op; }
247 <cond_op> @t1 ( "&&" | "||" | "<" | ">" | "==" | "<=" | ">=") @t2 { 258 <cond_op> @t1 ( "&&" | "||" | "<" | ">" | "==" | "<=" | ">=") @t2 {
248 char op1 = *t1; 259 char op1 = *t1;
249 if (t2-t1 == 2) { 260 if (t2-t1 == 2) {
@@ -252,7 +263,13 @@ zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_key
252 case '>': op1 = 'G'; break; // >= 263 case '>': op1 = 'G'; break; // >=
253 } 264 }
254 } 265 }
255 while (cond_op_i && sy_op_peek() != '(' && ((sy_op_precedence(sy_op_peek()) > sy_op_precedence(*t1)) || (sy_op_precedence(sy_op_peek()) == sy_op_precedence(*t1) && sy_op_is_left_assoc(*t1)))) { 266 while (cond_op_i &&
267 sy_op_peek() != '(' &&
268 (
269 (sy_op_precedence(sy_op_peek()) > sy_op_precedence(*t1)) ||
270 (sy_op_precedence(sy_op_peek()) == sy_op_precedence(*t1) && sy_op_is_left_assoc(*t1))
271 )
272 ) {
256 SY_APPLY_OP_FROM_STACK(); 273 SY_APPLY_OP_FROM_STACK();
257 } 274 }
258 sy_op_push(*t1); 275 sy_op_push(*t1);
@@ -263,30 +280,37 @@ zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_key
263 SY_APPLY_OP_FROM_STACK(); 280 SY_APPLY_OP_FROM_STACK();
264 } 281 }
265 if (cond_op_i == 0 || sy_op_peek() != '(') { 282 if (cond_op_i == 0 || sy_op_peek() != '(') {
266 cs_log_error("unbalanced parathesis on line %d", lineno); goto out; 283 cs_log_error("unbalanced parenthesis on line %d", lineno); goto out;
267 } 284 }
268 cond_op_i--; 285 cond_op_i--;
269 goto yyc_cond_op; 286 goto yyc_cond_op;
270 } 287 }
271 <cond_op> ";" { 288 <cond_op> ";" {
272 while (cond_op_i) { 289 while (cond_op_i) {
273 if (sy_op_peek() == '(') { cs_log_error("unbalanced parathesis on line %d", lineno); goto out; } 290 if (sy_op_peek() == '(') { cs_log_error("unbalanced parenthesis on line %d", lineno); goto out; }
274 SY_APPLY_OP_FROM_STACK(); 291 SY_APPLY_OP_FROM_STACK();
275 } 292 }
276 if (cond_res_i > 1) { cs_log_error("invalid condition on line %d", lineno); goto out; } 293 if (cond_res_i > 1) { cs_log_error("invalid condition on line %d", lineno); goto out; }
277 goto yyc_init; 294 goto yyc_init;
278 } 295 }
279 <cond, cond_op> * { cs_log_error("Syntax error in condition on line %d", lineno); goto out; } 296 <cond, cond_op> * { cs_log_error("syntax error in condition on line %d", lineno); goto out; }
280 297
281 <rule> ws+ { goto yyc_rule; } 298 <rule> whitespace+ { goto yyc_rule; }
282 <rule> nl / ( nl | ws )* "." { lineno++; goto yyc_rule; } 299 <rule> newline / ( newline | whitespace )* "." { lineno++; goto yyc_rule; }
283 <rule> "." @t1 keyword @t2 ( "(" @t3 ( string? | keyword ) @t4 ")" )? { 300 <rule> "." @t1 keyword @t2 ( "(" @t3 ( string? | keyword ) @t4 ")" )? {
284 if (!cond_res[0]) { goto yyc_rule; } 301 if (!cond_res[0]) { goto yyc_rule; }
285 if (kw_i == max_keywords) { 302 if (kw_i == MAX_KEYWORDS) {
286 cs_log_error("Too many keywords in rule (more than %d) on line %d", max_keywords, lineno); 303 cs_log_error("too many keywords in rule (more than %d) on line %d", MAX_KEYWORDS, lineno);
287 goto out; 304 goto out;
288 } 305 }
289 sp_parsed_keyword kw = {.kw = (char*)t1, .kwlen = t2-t1, .arg = (char*)t3, .arglen = t4-t3, .argtype = SP_ARGTYPE_UNKNOWN, .lineno = lineno}; 306 sp_parsed_keyword kw = {
307 .kw = (char*)t1,
308 .kwlen = t2-t1,
309 .arg = (char*)t3,
310 .arglen = t4-t3,
311 .argtype = SP_ARGTYPE_UNKNOWN,
312 .lineno = lineno
313 };
290 if (t3 && t4) { 314 if (t3 && t4) {
291 if (t3 == t4) { 315 if (t3 == t4) {
292 kw.argtype = SP_ARGTYPE_EMPTY; 316 kw.argtype = SP_ARGTYPE_EMPTY;
@@ -320,7 +344,6 @@ zend_result sp_config_scan(char *data, zend_result (*process_rule)(sp_parsed_key
320 goto yyc_init; 344 goto yyc_init;
321 } 345 }
322 <rule> * { goto end_of_rule; } 346 <rule> * { goto end_of_rule; }
323
324 */ 347 */
325out: 348out:
326 zend_hash_destroy(&vars); 349 zend_hash_destroy(&vars);