diff options
| author | xXx-caillou-xXx | 2017-12-20 18:09:53 +0100 |
|---|---|---|
| committer | jvoisin | 2017-12-20 18:09:53 +0100 |
| commit | e7f541396715ee2895abcf73044b91ae9b746201 (patch) | |
| tree | ba0e9765e7f14f04b92585df1f3fcd1830ab4b00 /src/sp_var_parser.c | |
| parent | 8d6cc4f2b63c3f0dc31fe6cecd34ac023ea1cccb (diff) | |
Better parsing of the rules
Thanks to this huge commit from @xXx-caillou-xXx, we can now write amazingly flexible rules.
Diffstat (limited to 'src/sp_var_parser.c')
| -rw-r--r-- | src/sp_var_parser.c | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/src/sp_var_parser.c b/src/sp_var_parser.c new file mode 100644 index 0000000..bc0a80e --- /dev/null +++ b/src/sp_var_parser.c | |||
| @@ -0,0 +1,247 @@ | |||
| 1 | #include "php_snuffleupagus.h" | ||
| 2 | |||
| 3 | static int parse_str_tokens(const char *str, const sp_token_t token, | ||
| 4 | sp_node_t *tokens_list) { | ||
| 5 | const char *cur_str = str; | ||
| 6 | |||
| 7 | while ((cur_str = strchr(cur_str, token.token[0]))) { | ||
| 8 | if (0 == strncmp(cur_str, token.token, strlen(token.token))) { | ||
| 9 | sp_token_t *token_elm = pecalloc(sizeof(sp_token_t), 1, 1); | ||
| 10 | token_elm->pos = cur_str - str; | ||
| 11 | token_elm->token = token.token; | ||
| 12 | token_elm->type = token.type; | ||
| 13 | sp_list_insert(tokens_list, token_elm); | ||
| 14 | cur_str += strlen(token.token); | ||
| 15 | } else { | ||
| 16 | cur_str++; | ||
| 17 | } | ||
| 18 | } | ||
| 19 | return 0; | ||
| 20 | } | ||
| 21 | |||
| 22 | static bool is_var_name_valid(const char *name) { | ||
| 23 | static pcre *regexp_const = NULL; | ||
| 24 | static pcre *regexp_var = NULL; | ||
| 25 | const char *pcre_error; | ||
| 26 | int pcre_error_offset; | ||
| 27 | |||
| 28 | if (!name) { | ||
| 29 | return false; | ||
| 30 | } | ||
| 31 | if (NULL == regexp_var || NULL == regexp_const) { | ||
| 32 | regexp_var = sp_pcre_compile(REGEXP_VAR, PCRE_CASELESS, &pcre_error, | ||
| 33 | &pcre_error_offset, NULL); | ||
| 34 | regexp_const = sp_pcre_compile(REGEXP_CONST, PCRE_CASELESS, &pcre_error, | ||
| 35 | &pcre_error_offset, NULL); | ||
| 36 | } | ||
| 37 | if (NULL == regexp_var || NULL == regexp_const) { | ||
| 38 | sp_log_err("config", "Could not compile regexp."); | ||
| 39 | return false; | ||
| 40 | } | ||
| 41 | if (0 > sp_pcre_exec(regexp_var, NULL, name, strlen(name), 0, 0, NULL, 0) | ||
| 42 | && 0 > sp_pcre_exec(regexp_const, NULL, name, strlen(name), 0, 0, NULL, 0)) { | ||
| 43 | return false; | ||
| 44 | } | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | |||
| 48 | static int create_var(sp_tree *tree, const char *restrict value, | ||
| 49 | size_t value_len, elem_type _type, const char *restrict idx) { | ||
| 50 | sp_tree *var_node = NULL; | ||
| 51 | |||
| 52 | if (!tree) { | ||
| 53 | return -1; | ||
| 54 | } | ||
| 55 | if (tree->next == NULL && tree->type == 0) { | ||
| 56 | var_node = tree; | ||
| 57 | } else { | ||
| 58 | var_node = pecalloc(sizeof(sp_tree), 1, 1); | ||
| 59 | } | ||
| 60 | |||
| 61 | var_node->value = NULL; | ||
| 62 | var_node->next = NULL; | ||
| 63 | var_node->idx = NULL; | ||
| 64 | var_node->type = _type; | ||
| 65 | // Check if a constant is a variable | ||
| 66 | if (value && value[0] == VARIABLE_TOKEN && _type == CONSTANT) { | ||
| 67 | var_node->type = VAR; | ||
| 68 | } | ||
| 69 | if (!(var_node->value = pestrndup(value, value_len, 1))) { | ||
| 70 | sp_log_err("config", "Can't allocate a strndup"); | ||
| 71 | return -1; | ||
| 72 | } | ||
| 73 | if (var_node->type != STRING_DELIMITER && !is_var_name_valid(var_node->value)) { | ||
| 74 | sp_log_err("config", "Invalid var name: %s.", var_node->value); | ||
| 75 | return -1; | ||
| 76 | } | ||
| 77 | var_node->idx = parse_var(idx); | ||
| 78 | |||
| 79 | if (tree != var_node) { | ||
| 80 | while (tree->next) { | ||
| 81 | tree = tree->next; | ||
| 82 | } | ||
| 83 | tree->next = var_node; | ||
| 84 | } | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 88 | int cmp_tokens(sp_node_t *list1, sp_node_t *list2) { | ||
| 89 | return (((sp_token_t *)list1->data)->pos | ||
| 90 | - ((sp_token_t *)list2->data)->pos); | ||
| 91 | } | ||
| 92 | |||
| 93 | static int is_next_token_empty(sp_token_t *token, sp_token_t *token_next, | ||
| 94 | const char * restrict str) { | ||
| 95 | if ((token_next && token_next->pos == token->pos + strlen(token->token)) | ||
| 96 | || (!token_next && token->pos == strlen(str) - strlen(token->token))) { | ||
| 97 | return -1; | ||
| 98 | } | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | static int is_token_valid(sp_node_t *tokens_list, elem_type ignore, | ||
| 103 | int array_count, const char * restrict str, | ||
| 104 | size_t pos) { | ||
| 105 | sp_token_t *token = (sp_token_t *)tokens_list->data; | ||
| 106 | sp_token_t *token_next = NULL; | ||
| 107 | |||
| 108 | if (tokens_list->next) { | ||
| 109 | token_next = (sp_token_t *)tokens_list->next->data; | ||
| 110 | } | ||
| 111 | switch (token->type) { | ||
| 112 | case ESC_STRING_DELIMITER: | ||
| 113 | case STRING_DELIMITER: | ||
| 114 | if (ignore == token->type) { | ||
| 115 | if (token_next) { | ||
| 116 | if (token_next->pos != token->pos + 1) { | ||
| 117 | return -1; | ||
| 118 | } | ||
| 119 | } else if (token->pos != strlen(str) - 1) { | ||
| 120 | return -1; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | break; | ||
| 124 | case ARRAY_END: | ||
| 125 | if (!ignore) { | ||
| 126 | if (array_count < 1) { | ||
| 127 | return -1; | ||
| 128 | } else if (token_next) { | ||
| 129 | if (token_next->type == STRING_DELIMITER | ||
| 130 | || token_next->type == ESC_STRING_DELIMITER) { | ||
| 131 | return -1; | ||
| 132 | } | ||
| 133 | } else if (token->pos != strlen(str) - strlen(token->token)) { | ||
| 134 | return -1; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | break; | ||
| 138 | case OBJECT: | ||
| 139 | if (!ignore && -1 == is_next_token_empty(token, token_next, str)) { | ||
| 140 | return -1; | ||
| 141 | } | ||
| 142 | if (pos == 0 && *str != VARIABLE_TOKEN) { | ||
| 143 | return -1; | ||
| 144 | } | ||
| 145 | break; | ||
| 146 | case CLASS: | ||
| 147 | if (!ignore && -1 == is_next_token_empty(token, token_next, str)) { | ||
| 148 | return -1; | ||
| 149 | } | ||
| 150 | break; | ||
| 151 | default: | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | return 0; | ||
| 155 | } | ||
| 156 | |||
| 157 | static sp_tree *parse_tokens(const char * restrict str, | ||
| 158 | sp_node_t *tokens_list) { | ||
| 159 | size_t pos = 0; | ||
| 160 | int array_count = 0, pos_idx_start = -1; | ||
| 161 | elem_type ignore = 0; | ||
| 162 | sp_tree *tree = sp_tree_new(); | ||
| 163 | |||
| 164 | for (; tokens_list && tokens_list->data; tokens_list = tokens_list->next) { | ||
| 165 | sp_token_t *token = (sp_token_t *)tokens_list->data; | ||
| 166 | size_t value_len; | ||
| 167 | char *idx = NULL; | ||
| 168 | |||
| 169 | if (-1 == is_token_valid(tokens_list, ignore, array_count, str, pos)) { | ||
| 170 | sp_log_err("config", "Invalid `%s` position.", token->token); | ||
| 171 | goto error; | ||
| 172 | } | ||
| 173 | if (token->type == STRING_DELIMITER || token->type == ESC_STRING_DELIMITER) { | ||
| 174 | pos = (!ignore && !array_count) ? pos + strlen(token->token) : pos; | ||
| 175 | ignore = (!ignore) ? token->type : (ignore == token->type) ? 0 : ignore; | ||
| 176 | token->type = STRING_DELIMITER; | ||
| 177 | } | ||
| 178 | if (ignore == 0) { | ||
| 179 | if (token->type == ARRAY) { | ||
| 180 | pos_idx_start = (array_count) ? pos_idx_start : | ||
| 181 | (int)(token->pos + strlen(token->token)); | ||
| 182 | array_count++; | ||
| 183 | } else if (token->type == ARRAY_END) { | ||
| 184 | array_count--; | ||
| 185 | token->type = ARRAY; | ||
| 186 | } | ||
| 187 | if (array_count == 0) { | ||
| 188 | value_len = token->pos - pos; | ||
| 189 | if (token->type == ARRAY) { | ||
| 190 | value_len -= strlen(token->token); | ||
| 191 | } | ||
| 192 | if (pos_idx_start > 0) { | ||
| 193 | idx = estrndup(&(str[pos_idx_start]), token->pos - pos_idx_start); | ||
| 194 | value_len -= token->pos - pos_idx_start; | ||
| 195 | } | ||
| 196 | if (create_var(tree, &str[pos], value_len, token->type, idx)) { | ||
| 197 | goto error; | ||
| 198 | } | ||
| 199 | efree(idx); | ||
| 200 | pos = token->pos + strlen(token->token); | ||
| 201 | pos_idx_start = -1; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | if (ignore != 0 || array_count != 0) { | ||
| 207 | error: | ||
| 208 | sp_tree_free(tree); | ||
| 209 | return NULL; | ||
| 210 | } | ||
| 211 | if (pos != strlen(str) | ||
| 212 | && create_var(tree, &str[pos], strlen(str) - pos, CONSTANT, NULL)) { | ||
| 213 | goto error; | ||
| 214 | } | ||
| 215 | return tree; | ||
| 216 | } | ||
| 217 | |||
| 218 | sp_tree *parse_var(const char *line) { | ||
| 219 | sp_node_t *tokens_list = NULL; | ||
| 220 | sp_tree *tree = NULL; | ||
| 221 | const sp_token_t delimiter_list[] = { | ||
| 222 | {.type=OBJECT, .token=OBJECT_TOKEN}, | ||
| 223 | {.type=ARRAY, .token=ARRAY_TOKEN}, | ||
| 224 | {.type=ARRAY_END, .token=ARRAY_END_TOKEN}, | ||
| 225 | {.type=STRING_DELIMITER, .token=STRING_TOKEN}, | ||
| 226 | {.type=ESC_STRING_DELIMITER, .token=ESC_STRING_TOKEN}, | ||
| 227 | {.type=CLASS, .token=CLASS_TOKEN} | ||
| 228 | }; | ||
| 229 | |||
| 230 | |||
| 231 | if (!line) { | ||
| 232 | return NULL; | ||
| 233 | } | ||
| 234 | tokens_list = sp_list_new(); | ||
| 235 | for (unsigned int i = 0; i < sizeof(delimiter_list) / sizeof(sp_token_t); i++) { | ||
| 236 | parse_str_tokens(line, delimiter_list[i], tokens_list); | ||
| 237 | } | ||
| 238 | tokens_list = sp_list_sort(tokens_list, cmp_tokens); | ||
| 239 | tree = parse_tokens(line, tokens_list); | ||
| 240 | sp_list_free(tokens_list); | ||
| 241 | // Check if tree is empty. | ||
| 242 | if (tree && tree->next == NULL && tree->type == 0) { | ||
| 243 | tree->type = CONSTANT; | ||
| 244 | tree->value = pestrdup("", 1); | ||
| 245 | } | ||
| 246 | return tree; | ||
| 247 | } | ||
