From dd270c094df080ff8438d29e14ec1bbffe0ca993 Mon Sep 17 00:00:00 2001 From: Ben Fuhrmannek Date: Fri, 11 Jul 2014 16:15:21 +0200 Subject: remove_binary and disallow_binary allow utf-8. +testcases --- .../suhosin_upload_disallow_binary_utf8.phpt | 43 ++++++++ tests/filter/suhosin_upload_remove_binary.phpt | Bin 0 -> 796 bytes .../filter/suhosin_upload_remove_binary_utf8.phpt | 31 ++++++ ufilter.c | 112 +++++++++++++++------ 4 files changed, 155 insertions(+), 31 deletions(-) create mode 100644 tests/filter/suhosin_upload_disallow_binary_utf8.phpt create mode 100644 tests/filter/suhosin_upload_remove_binary.phpt create mode 100644 tests/filter/suhosin_upload_remove_binary_utf8.phpt diff --git a/tests/filter/suhosin_upload_disallow_binary_utf8.phpt b/tests/filter/suhosin_upload_disallow_binary_utf8.phpt new file mode 100644 index 0000000..4661dc9 --- /dev/null +++ b/tests/filter/suhosin_upload_disallow_binary_utf8.phpt @@ -0,0 +1,43 @@ +--TEST-- +Testing: suhosin.upload.disallow_binary=On with UTF-8 +--INI-- +suhosin.log.syslog=0 +suhosin.log.sapi=0 +suhosin.log.stdout=255 +suhosin.log.script=0 +file_uploads=1 +suhosin.upload.disallow_binary=On +max_file_uploads=40 +suhosin.upload.max_uploads=40 +--SKIPIF-- + +--COOKIE-- +--GET-- +--POST_RAW-- +Content-Type: multipart/form-data; boundary=bound +--bound +Content-Disposition: form-data; name="test"; filename="test" + +Spaß am Gerät! + +--bound-- +--FILE-- + +--EXPECTF-- +array(1) { + ["test"]=> + array(5) { + ["name"]=> + string(4) "test" + ["type"]=> + string(0) "" + ["tmp_name"]=> + string(%d) "%s" + ["error"]=> + int(0) + ["size"]=> + int(17) + } +} diff --git a/tests/filter/suhosin_upload_remove_binary.phpt b/tests/filter/suhosin_upload_remove_binary.phpt new file mode 100644 index 0000000..f4337d9 Binary files /dev/null and b/tests/filter/suhosin_upload_remove_binary.phpt differ diff --git a/tests/filter/suhosin_upload_remove_binary_utf8.phpt b/tests/filter/suhosin_upload_remove_binary_utf8.phpt new file mode 100644 index 0000000..2d10eaa --- /dev/null +++ b/tests/filter/suhosin_upload_remove_binary_utf8.phpt @@ -0,0 +1,31 @@ +--TEST-- +Testing: suhosin.upload.remove_binary=On with UTF-8 +--INI-- +suhosin.log.syslog=0 +suhosin.log.sapi=0 +suhosin.log.stdout=255 +suhosin.log.script=0 +file_uploads=1 +suhosin.upload.disallow_binary=Off +suhosin.upload.remove_binary=On +max_file_uploads=40 +suhosin.upload.max_uploads=40 +--SKIPIF-- + +--COOKIE-- +--GET-- +--POST_RAW-- +Content-Type: multipart/form-data; boundary=bound +--bound +Content-Disposition: form-data; name="test"; filename="test" + +Spaß am Gerät! + +--bound-- +--FILE-- + +--EXPECTF-- +string(17) "Spaß am Gerät! +" \ No newline at end of file diff --git a/ufilter.c b/ufilter.c index 5a85b54..2eeed56 100644 --- a/ufilter.c +++ b/ufilter.c @@ -197,6 +197,29 @@ return_failure: } /* }}} */ +static inline int suhosin_validate_utf8_multibyte(const char* cp) +{ + if ((*cp & 0xe0) == 0xc0 && // 1st byte is 110xxxxx + (*(cp+1) & 0xc0) == 0x80 && // 2nd byte is 10xxxxxx + (*cp & 0x1e)) { // overlong check 110[xxxx]x 10xxxxxx + return 2; + } + if ((*cp & 0xf0) == 0xe0 && // 1st byte is 1110xxxx + (*(cp+1) & 0xc0) == 0x80 && // 2nd byte is 10xxxxxx + (*(cp+2) & 0xc0) == 0x80 && // 3rd byte is 10xxxxxx + ((*cp & 0x0f) | (*(cp+1) & 0x20))) { // 1110[xxxx] 10[x]xxxxx 10xxxxxx + return 3; + } + if ((*cp & 0xf8) == 0xf0 && // 1st byte is 11110xxx + (*(cp+1) & 0xc0) == 0x80 && // 2nd byte is 10xxxxxx + (*(cp+2) & 0xc0) == 0x80 && // 3rd byte is 10xxxxxx + (*(cp+3) & 0xc0) == 0x80 && // 4th byte is 10xxxxxx + ((*cp & 0x07) | (*(cp+1) & 0x30))) { // 11110[xxx] 10[xx]xxxx 10xxxxxx 10xxxxxx + return 4; + } + return 0; +} + int suhosin_rfc1867_filter(unsigned int event, void *event_data, void **extra TSRMLS_DC) { int retval = SUCCESS; @@ -250,38 +273,65 @@ int suhosin_rfc1867_filter(unsigned int event, void *event_data, void **extra TS } } - if (SUHOSIN_G(upload_disallow_binary)) { - - multipart_event_file_data *mefd = (multipart_event_file_data *) event_data; - size_t i; - - for (i=0; ilength; i++) { - if (mefd->data[i] < 32 && !isspace(mefd->data[i])) { - suhosin_log(S_FILES, "uploaded file contains binary data - file dropped"); - if (!SUHOSIN_G(simulation)) { - goto continue_with_failure; - } - } - } - } + if (SUHOSIN_G(upload_disallow_binary)) { + + multipart_event_file_data *mefd = (multipart_event_file_data *) event_data; - if (SUHOSIN_G(upload_remove_binary)) { - - multipart_event_file_data *mefd = (multipart_event_file_data *) event_data; - size_t i, j; - - for (i=0, j=0; ilength; i++) { - if (mefd->data[i] >= 32 || isspace(mefd->data[i])) { - mefd->data[j++] = mefd->data[i]; - } - } - SDEBUG("removing binary %u %u",i,j); - /* IMPORTANT FOR DAISY CHAINING */ - mefd->length = j; - if (mefd->newlength) { - *mefd->newlength = j; - } - } + char *cp, *cpend; + int n; + cpend = mefd->data + mefd->length; + for (char *cp = mefd->data; cp < cpend; cp++) { + if (*cp >= 32) { + continue; + } + if (*cp & 0x80) { + SDEBUG("checking char %x", *cp); + if ((n = suhosin_validate_utf8_multibyte(cp))) { // valid UTF8 multibyte character + cp += n - 1; + continue; + } + } + + if (!isspace(*cp)) { + suhosin_log(S_FILES, "uploaded file contains binary data - file dropped"); + if (!SUHOSIN_G(simulation)) { + goto continue_with_failure; + } + break; + } + + } + + } + + if (SUHOSIN_G(upload_remove_binary)) { + + multipart_event_file_data *mefd = (multipart_event_file_data *) event_data; + size_t i, j; + int n; + + for (i=0, j=0; ilength; i++) { + if (mefd->data[i] >= 32 || isspace(mefd->data[i])) { + mefd->data[j++] = mefd->data[i]; + } else if (mefd->data[i] & 0x80) { + n = suhosin_validate_utf8_multibyte(mefd->data + i); + if (!n) { continue; } + while (n) { + mefd->data[j++] = mefd->data[i++]; + n--; + } + i--; + } + } + mefd->data[j] = '\0'; + + SDEBUG("removing binary %zu %zu",i,j); + /* IMPORTANT FOR DAISY CHAINING */ + mefd->length = j; + if (mefd->newlength) { + *mefd->newlength = j; + } + } break; -- cgit v1.3