[PATCH] Escape strings produced by the `%V` format specification

oscaretu . oscaretu at gmail.com
Tue Jan 3 12:57:47 UTC 2017


Hello, Joshua.

What values take %%TEST_GLOBALS%% and %%TEST_GLOBALS_HTTP%%?

I've searched them in Google and I didn't find anything related. Nor I was
unable to find anything about them in the documentation for Test::Nginx (
http://search.cpan.org/~agent/Test-Nginx-0.25/lib/Test/Nginx.pm) neither in
https://openresty.gitbooks.io/programming-openresty/content/

Kind regards,
Oscar



On Tue, Jan 3, 2017 at 12:14 PM, Joshua Spence <josh at joshuaspence.com>
wrote:

> I tested this change with the following script:
>
> ```
> #!/usr/bin/perl
>
> ############################################################
> ###################
>
> use warnings;
> use strict;
>
> use Test::More;
>
> BEGIN { use FindBin; chdir($FindBin::Bin); }
>
> use lib 'lib';
> use Test::Nginx;
>
> ############################################################
> ###################
>
> select STDERR; $| = 1;
> select STDOUT; $| = 1;
>
> my $t = Test::Nginx->new()
>     ->has(qw/http limit_req/)
>     ->plan(2)
>     ->write_file_expand('nginx.conf', <<'EOF');
>
> %%TEST_GLOBALS%%
>
> daemon off;
>
> events {}
>
> http {
>     %%TEST_GLOBALS_HTTP%%
>
>     limit_req_zone $binary_remote_addr zone=one:1m rate=1r/m;
>     limit_req zone=one;
>
>     server {
>         listen       127.0.0.1:8080;
>         server_name  localhost;
>
>         location / {
>             error_log stderr;
>         }
>     }
> }
>
> EOF
>
> open OLDERR, '>&', \*STDERR;
> open STDERR, '>', $t->testdir() . '/stderr' or die "Can't reopen STDERR:
> $!";
> open my $stderr, '<', $t->testdir() . '/stderr' or die "Can't open stderr
> file: $!";
>
> $t->run();
>
> open STDERR, '>&', \*OLDERR;
>
> ############################################################
> ###################
>
> # charge limit_req
> http_get('/');
>
> http_get('/"foo bar"');
> is(lines($t, 'request: "GET /\"foo bar\" HTTP/1.0"'), 1);
>
> http_get_with_referrer('/', 'https://www.google.com.au/search?hl=en&q="BauGutachter"
> +41 2016 .ch&num=100&start=0');
> is(lines($t, 'referrer: "https://www.google.com.au/search?hl=en&q=\
> "BauGutachter\" +41 2016 .ch&num=100&start=0"'), 1);
>
> ############################################################
> ###################
>
> sub http_get_with_referrer {
>     my ($url, $referer) = @_;
>     return http(<<EOF);
> GET $url HTTP/1.0
> Host: localhost
> Referer: $referer
>
> EOF
> }
>
> sub lines {
>     my ($t, $pattern) = @_;
>     return map { $_ =~ /\Q$pattern\E/ } (<$stderr>);
> }
>
> ############################################################
> ###################
> ```
>
> *Joshua Spence*
>
> *mobile:* +61 422 990 263 <+61%20422%20990%20263>
> *email:* josh at joshuaspence.com
> *web:* http://www.joshuaspence.com
>
> On 3 January 2017 at 17:50, Joshua Spence <josh at joshuaspence.com> wrote:
>
>> # HG changeset patch
>> # User Joshua Spence <josh at joshuaspence.com>
>> # Date 1483420178 -39600
>> #      Tue Jan 03 16:09:38 2017 +1100
>> # Branch error_log_escape
>> # Node ID 30a9ce42aadfcf135f3ad95a296c95e7044d5f9d
>> # Parent  a42afc225e98afe1f7c3b428c81c8af7eba362c0
>> Escape strings produced by the `%V` format specification
>>
>> Currently nginx error logs loosely adhere to the following pattern:
>>
>> ```
>> %{TIMESTAMP} [%{SEVERITY}] %{PROCESS_ID}#%{THREAD_ID}:(
>> *%{CONNECTION_ID})?%{MESSAGE}(, %{KEY}: "%{VALUE}")*
>> ```
>>
>> In our environment we are using Filebeat to ship nginx error logs into
>> Logstash. Our Logstash filter for parsing nginx error logs is as follows:
>>
>> ```
>> filter {
>>   grok {
>>     match => {
>>       "message" => "(?<timestamp>%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}
>> %{TIME}) \[%{LOGLEVEL:severity}\] %{POSINT:process_id:int}#%{NONNEGINT:thread_id:int}:(?:
>> \*%{NONNEGINT:connection_id:int})? (?<message>[^,]*)(?:,
>> %{GREEDYDATA:nginx_keyvalue_data})?"
>>     }
>>     overwrite => ["message"]
>>   }
>>
>>   date {
>>     match => ["timestamp", "yyyy/MM/dd HH:mm:ss"]
>>     remove_field => ["timestamp"]
>>   }
>>
>>   kv {
>>     field_split => ", "
>>     remove_field => "nginx_keyvalue_data"
>>     source => "nginx_keyvalue_data"
>>     value_split => ": "
>>   }
>> }
>> ```
>>
>> The problem, however, is that it is difficult to unambigiously parse the
>> key-value data from the nginx error log format because of a lack of data
>> sanitization. Specifically, consider the following entry from our nginx
>> error logs:
>>
>> ```
>> 2016/11/15 03:45:51 [error] 23770#23770: *21535000 open()
>> "/REDACTED/+"BauGutachter"++41+2016+.ch&tbo=1" failed (2: No such file
>> or directory), client: REDACTED, server: REDACTED, request: "GET
>> /job/+%22BauGutachter%22++41+2016+.ch&tbo=1 HTTP/1.1", host: "REDACTED",
>> referrer: "https://www.google.com.au/search?hl=en&q="BauGutachter" +41
>> 2016 .ch&num=100&start=0"
>> ```
>>
>> Specifically, the problem in the above sample is the `referrer` field,
>> the value of which contains double quotes. I think that it makes sense to
>> escape `\` and `"` characters before they are written to the error log.
>>
>> diff -r a42afc225e98 -r 30a9ce42aadf src/core/ngx_string.c
>> --- a/src/core/ngx_string.c     Tue Dec 27 17:23:08 2016 +0300
>> +++ b/src/core/ngx_string.c     Tue Jan 03 16:09:38 2017 +1100
>> @@ -15,7 +15,7 @@
>>      const u_char *basis, ngx_uint_t padding);
>>  static ngx_int_t ngx_decode_base64_internal(ngx_str_t *dst, ngx_str_t
>> *src,
>>      const u_char *basis);
>> -
>> +static uintptr_t ngx_escape_string(u_char *dst, u_char *src, size_t
>> size);
>>
>>  void
>>  ngx_strlow(u_char *dst, u_char *src, size_t n)
>> @@ -235,7 +235,7 @@
>>                  v = va_arg(args, ngx_str_t *);
>>
>>                  len = ngx_min(((size_t) (last - buf)), v->len);
>> -                buf = ngx_cpymem(buf, v->data, len);
>> +                buf = (u_char *) ngx_escape_string(buf, v->data, len);
>>                  fmt++;
>>
>>                  continue;
>> @@ -1844,6 +1844,43 @@
>>  }
>>
>>
>> +static uintptr_t
>> +ngx_escape_string(u_char *dst, u_char *src, size_t size)
>> +{
>> +    u_char      ch;
>> +    ngx_uint_t  len;
>> +
>> +    if (dst == NULL) {
>> +        len = 0;
>> +
>> +        while (size) {
>> +            ch = *src++;
>> +
>> +            if (ch == '\\' || ch == '"') {
>> +                len++;
>> +            }
>> +
>> +            size--;
>> +        }
>> +
>> +        return (uintptr_t) len;
>> +    }
>> +
>> +    while (size) {
>> +        ch = *src++;
>> +
>> +        if (ch == '\\' || ch == '"') {
>> +            *dst++ = '\\';
>> +        }
>> +
>> +        *dst++ = ch;
>> +        size--;
>> +    }
>> +
>> +    return (uintptr_t) dst;
>> +}
>> +
>> +
>>  void
>>  ngx_str_rbtree_insert_value(ngx_rbtree_node_t *temp,
>>      ngx_rbtree_node_t *node, ngx_rbtree_node_t *sentinel)
>>
>
>
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel
>



-- 
Oscar Fernandez Sierra
oscaretu at gmail.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20170103/52c9b072/attachment.html>


More information about the nginx-devel mailing list