[PATCH] Escape strings produced by the `%V` format specification

Joshua Spence josh at joshuaspence.com
Tue Jan 3 21:01:24 UTC 2017


%%TEST_GLOBALS%% and %%TEST_GLOBALS_HTTP%% are macros inserted by
Test::Nginx. I don't know if it is the same Test::Nginx from CPAN, it is
vendored in the nginx-tests repository.

*Joshua Spence*

*mobile:* +61 422 990 263
*email:* josh at joshuaspence.com
*web:* http://www.joshuaspence.com

On 3 January 2017 at 23:57, oscaretu . <oscaretu at gmail.com> wrote:

> Hello, Joshua.
>
> What values take %%TEST_GLOBALS%% and %%TEST_GLOBALS_HTTP%%?
>
> I've searched them in Google and I didn't find anything related. Nor I was
> unable to find anything about them in the documentation for Test::Nginx (
> http://search.cpan.org/~agent/Test-Nginx-0.25/lib/Test/Nginx.pm) neither
> in https://openresty.gitbooks.io/programming-openresty/content/
>
> Kind regards,
> Oscar
>
>
>
> On Tue, Jan 3, 2017 at 12:14 PM, Joshua Spence <josh at joshuaspence.com>
> wrote:
>
>> I tested this change with the following script:
>>
>> ```
>> #!/usr/bin/perl
>>
>> ############################################################
>> ###################
>>
>> use warnings;
>> use strict;
>>
>> use Test::More;
>>
>> BEGIN { use FindBin; chdir($FindBin::Bin); }
>>
>> use lib 'lib';
>> use Test::Nginx;
>>
>> ############################################################
>> ###################
>>
>> select STDERR; $| = 1;
>> select STDOUT; $| = 1;
>>
>> my $t = Test::Nginx->new()
>>     ->has(qw/http limit_req/)
>>     ->plan(2)
>>     ->write_file_expand('nginx.conf', <<'EOF');
>>
>> %%TEST_GLOBALS%%
>>
>> daemon off;
>>
>> events {}
>>
>> http {
>>     %%TEST_GLOBALS_HTTP%%
>>
>>     limit_req_zone $binary_remote_addr zone=one:1m rate=1r/m;
>>     limit_req zone=one;
>>
>>     server {
>>         listen       127.0.0.1:8080;
>>         server_name  localhost;
>>
>>         location / {
>>             error_log stderr;
>>         }
>>     }
>> }
>>
>> EOF
>>
>> open OLDERR, '>&', \*STDERR;
>> open STDERR, '>', $t->testdir() . '/stderr' or die "Can't reopen STDERR:
>> $!";
>> open my $stderr, '<', $t->testdir() . '/stderr' or die "Can't open stderr
>> file: $!";
>>
>> $t->run();
>>
>> open STDERR, '>&', \*OLDERR;
>>
>> ############################################################
>> ###################
>>
>> # charge limit_req
>> http_get('/');
>>
>> http_get('/"foo bar"');
>> is(lines($t, 'request: "GET /\"foo bar\" HTTP/1.0"'), 1);
>>
>> http_get_with_referrer('/', 'https://www.google.com.au/search?hl=en&q="BauGutachter"
>> +41 2016 .ch&num=100&start=0');
>> is(lines($t, 'referrer: "https://www.google.com.au/search?hl=en&q=\"BauGutachter\"
>> +41 2016 .ch&num=100&start=0"'), 1);
>>
>> ############################################################
>> ###################
>>
>> sub http_get_with_referrer {
>>     my ($url, $referer) = @_;
>>     return http(<<EOF);
>> GET $url HTTP/1.0
>> Host: localhost
>> Referer: $referer
>>
>> EOF
>> }
>>
>> sub lines {
>>     my ($t, $pattern) = @_;
>>     return map { $_ =~ /\Q$pattern\E/ } (<$stderr>);
>> }
>>
>> ############################################################
>> ###################
>> ```
>>
>> *Joshua Spence*
>>
>> *mobile:* +61 422 990 263 <+61%20422%20990%20263>
>> *email:* josh at joshuaspence.com
>> *web:* http://www.joshuaspence.com
>>
>> On 3 January 2017 at 17:50, Joshua Spence <josh at joshuaspence.com> wrote:
>>
>>> # HG changeset patch
>>> # User Joshua Spence <josh at joshuaspence.com>
>>> # Date 1483420178 -39600
>>> #      Tue Jan 03 16:09:38 2017 +1100
>>> # Branch error_log_escape
>>> # Node ID 30a9ce42aadfcf135f3ad95a296c95e7044d5f9d
>>> # Parent  a42afc225e98afe1f7c3b428c81c8af7eba362c0
>>> Escape strings produced by the `%V` format specification
>>>
>>> Currently nginx error logs loosely adhere to the following pattern:
>>>
>>> ```
>>> %{TIMESTAMP} [%{SEVERITY}] %{PROCESS_ID}#%{THREAD_ID}:(
>>> *%{CONNECTION_ID})?%{MESSAGE}(, %{KEY}: "%{VALUE}")*
>>> ```
>>>
>>> In our environment we are using Filebeat to ship nginx error logs into
>>> Logstash. Our Logstash filter for parsing nginx error logs is as follows:
>>>
>>> ```
>>> filter {
>>>   grok {
>>>     match => {
>>>       "message" => "(?<timestamp>%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}
>>> %{TIME}) \[%{LOGLEVEL:severity}\] %{POSINT:process_id:int}#%{NONNEGINT:thread_id:int}:(?:
>>> \*%{NONNEGINT:connection_id:int})? (?<message>[^,]*)(?:,
>>> %{GREEDYDATA:nginx_keyvalue_data})?"
>>>     }
>>>     overwrite => ["message"]
>>>   }
>>>
>>>   date {
>>>     match => ["timestamp", "yyyy/MM/dd HH:mm:ss"]
>>>     remove_field => ["timestamp"]
>>>   }
>>>
>>>   kv {
>>>     field_split => ", "
>>>     remove_field => "nginx_keyvalue_data"
>>>     source => "nginx_keyvalue_data"
>>>     value_split => ": "
>>>   }
>>> }
>>> ```
>>>
>>> The problem, however, is that it is difficult to unambigiously parse the
>>> key-value data from the nginx error log format because of a lack of data
>>> sanitization. Specifically, consider the following entry from our nginx
>>> error logs:
>>>
>>> ```
>>> 2016/11/15 03:45:51 [error] 23770#23770: *21535000 open()
>>> "/REDACTED/+"BauGutachter"++41+2016+.ch&tbo=1" failed (2: No such file
>>> or directory), client: REDACTED, server: REDACTED, request: "GET
>>> /job/+%22BauGutachter%22++41+2016+.ch&tbo=1 HTTP/1.1", host:
>>> "REDACTED", referrer: "https://www.google.com.au/search?hl=en&q="BauGutachter"
>>> +41 2016 .ch&num=100&start=0"
>>> ```
>>>
>>> Specifically, the problem in the above sample is the `referrer` field,
>>> the value of which contains double quotes. I think that it makes sense to
>>> escape `\` and `"` characters before they are written to the error log.
>>>
>>> diff -r a42afc225e98 -r 30a9ce42aadf src/core/ngx_string.c
>>> --- a/src/core/ngx_string.c     Tue Dec 27 17:23:08 2016 +0300
>>> +++ b/src/core/ngx_string.c     Tue Jan 03 16:09:38 2017 +1100
>>> @@ -15,7 +15,7 @@
>>>      const u_char *basis, ngx_uint_t padding);
>>>  static ngx_int_t ngx_decode_base64_internal(ngx_str_t *dst, ngx_str_t
>>> *src,
>>>      const u_char *basis);
>>> -
>>> +static uintptr_t ngx_escape_string(u_char *dst, u_char *src, size_t
>>> size);
>>>
>>>  void
>>>  ngx_strlow(u_char *dst, u_char *src, size_t n)
>>> @@ -235,7 +235,7 @@
>>>                  v = va_arg(args, ngx_str_t *);
>>>
>>>                  len = ngx_min(((size_t) (last - buf)), v->len);
>>> -                buf = ngx_cpymem(buf, v->data, len);
>>> +                buf = (u_char *) ngx_escape_string(buf, v->data, len);
>>>                  fmt++;
>>>
>>>                  continue;
>>> @@ -1844,6 +1844,43 @@
>>>  }
>>>
>>>
>>> +static uintptr_t
>>> +ngx_escape_string(u_char *dst, u_char *src, size_t size)
>>> +{
>>> +    u_char      ch;
>>> +    ngx_uint_t  len;
>>> +
>>> +    if (dst == NULL) {
>>> +        len = 0;
>>> +
>>> +        while (size) {
>>> +            ch = *src++;
>>> +
>>> +            if (ch == '\\' || ch == '"') {
>>> +                len++;
>>> +            }
>>> +
>>> +            size--;
>>> +        }
>>> +
>>> +        return (uintptr_t) len;
>>> +    }
>>> +
>>> +    while (size) {
>>> +        ch = *src++;
>>> +
>>> +        if (ch == '\\' || ch == '"') {
>>> +            *dst++ = '\\';
>>> +        }
>>> +
>>> +        *dst++ = ch;
>>> +        size--;
>>> +    }
>>> +
>>> +    return (uintptr_t) dst;
>>> +}
>>> +
>>> +
>>>  void
>>>  ngx_str_rbtree_insert_value(ngx_rbtree_node_t *temp,
>>>      ngx_rbtree_node_t *node, ngx_rbtree_node_t *sentinel)
>>>
>>
>>
>> _______________________________________________
>> nginx-devel mailing list
>> nginx-devel at nginx.org
>> http://mailman.nginx.org/mailman/listinfo/nginx-devel
>>
>
>
>
> --
> Oscar Fernandez Sierra
> oscaretu at gmail.com
>
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20170104/ba2c3e06/attachment-0001.html>


More information about the nginx-devel mailing list