[PATCH] Escape strings produced by the `%V` format specification

Joshua Spence josh at joshuaspence.com
Tue Jan 3 11:14:44 UTC 2017


I tested this change with the following script:

```
#!/usr/bin/perl

###############################################################################

use warnings;
use strict;

use Test::More;

BEGIN { use FindBin; chdir($FindBin::Bin); }

use lib 'lib';
use Test::Nginx;

###############################################################################

select STDERR; $| = 1;
select STDOUT; $| = 1;

my $t = Test::Nginx->new()
    ->has(qw/http limit_req/)
    ->plan(2)
    ->write_file_expand('nginx.conf', <<'EOF');

%%TEST_GLOBALS%%

daemon off;

events {}

http {
    %%TEST_GLOBALS_HTTP%%

    limit_req_zone $binary_remote_addr zone=one:1m rate=1r/m;
    limit_req zone=one;

    server {
        listen       127.0.0.1:8080;
        server_name  localhost;

        location / {
            error_log stderr;
        }
    }
}

EOF

open OLDERR, '>&', \*STDERR;
open STDERR, '>', $t->testdir() . '/stderr' or die "Can't reopen STDERR:
$!";
open my $stderr, '<', $t->testdir() . '/stderr' or die "Can't open stderr
file: $!";

$t->run();

open STDERR, '>&', \*OLDERR;

###############################################################################

# charge limit_req
http_get('/');

http_get('/"foo bar"');
is(lines($t, 'request: "GET /\"foo bar\" HTTP/1.0"'), 1);

http_get_with_referrer('/',
'https://www.google.com.au/search?hl=en&q="BauGutachter"
+41 2016 .ch&num=100&start=0');
is(lines($t, 'referrer:
"https://www.google.com.au/search?hl=en&q=\"BauGutachter\"
+41 2016 .ch&num=100&start=0"'), 1);

###############################################################################

sub http_get_with_referrer {
    my ($url, $referer) = @_;
    return http(<<EOF);
GET $url HTTP/1.0
Host: localhost
Referer: $referer

EOF
}

sub lines {
    my ($t, $pattern) = @_;
    return map { $_ =~ /\Q$pattern\E/ } (<$stderr>);
}

###############################################################################
```

*Joshua Spence*

*mobile:* +61 422 990 263
*email:* josh at joshuaspence.com
*web:* http://www.joshuaspence.com

On 3 January 2017 at 17:50, Joshua Spence <josh at joshuaspence.com> wrote:

> # HG changeset patch
> # User Joshua Spence <josh at joshuaspence.com>
> # Date 1483420178 -39600
> #      Tue Jan 03 16:09:38 2017 +1100
> # Branch error_log_escape
> # Node ID 30a9ce42aadfcf135f3ad95a296c95e7044d5f9d
> # Parent  a42afc225e98afe1f7c3b428c81c8af7eba362c0
> Escape strings produced by the `%V` format specification
>
> Currently nginx error logs loosely adhere to the following pattern:
>
> ```
> %{TIMESTAMP} [%{SEVERITY}] %{PROCESS_ID}#%{THREAD_ID}:(
> *%{CONNECTION_ID})?%{MESSAGE}(, %{KEY}: "%{VALUE}")*
> ```
>
> In our environment we are using Filebeat to ship nginx error logs into
> Logstash. Our Logstash filter for parsing nginx error logs is as follows:
>
> ```
> filter {
>   grok {
>     match => {
>       "message" => "(?<timestamp>%{YEAR}/%{MONTHNUM2}/%{MONTHDAY}
> %{TIME}) \[%{LOGLEVEL:severity}\] %{POSINT:process_id:int}#%{NONNEGINT:thread_id:int}:(?:
> \*%{NONNEGINT:connection_id:int})? (?<message>[^,]*)(?:,
> %{GREEDYDATA:nginx_keyvalue_data})?"
>     }
>     overwrite => ["message"]
>   }
>
>   date {
>     match => ["timestamp", "yyyy/MM/dd HH:mm:ss"]
>     remove_field => ["timestamp"]
>   }
>
>   kv {
>     field_split => ", "
>     remove_field => "nginx_keyvalue_data"
>     source => "nginx_keyvalue_data"
>     value_split => ": "
>   }
> }
> ```
>
> The problem, however, is that it is difficult to unambigiously parse the
> key-value data from the nginx error log format because of a lack of data
> sanitization. Specifically, consider the following entry from our nginx
> error logs:
>
> ```
> 2016/11/15 03:45:51 [error] 23770#23770: *21535000 open()
> "/REDACTED/+"BauGutachter"++41+2016+.ch&tbo=1" failed (2: No such file or
> directory), client: REDACTED, server: REDACTED, request: "GET
> /job/+%22BauGutachter%22++41+2016+.ch&tbo=1 HTTP/1.1", host: "REDACTED",
> referrer: "https://www.google.com.au/search?hl=en&q="BauGutachter" +41
> 2016 .ch&num=100&start=0"
> ```
>
> Specifically, the problem in the above sample is the `referrer` field, the
> value of which contains double quotes. I think that it makes sense to
> escape `\` and `"` characters before they are written to the error log.
>
> diff -r a42afc225e98 -r 30a9ce42aadf src/core/ngx_string.c
> --- a/src/core/ngx_string.c     Tue Dec 27 17:23:08 2016 +0300
> +++ b/src/core/ngx_string.c     Tue Jan 03 16:09:38 2017 +1100
> @@ -15,7 +15,7 @@
>      const u_char *basis, ngx_uint_t padding);
>  static ngx_int_t ngx_decode_base64_internal(ngx_str_t *dst, ngx_str_t
> *src,
>      const u_char *basis);
> -
> +static uintptr_t ngx_escape_string(u_char *dst, u_char *src, size_t size);
>
>  void
>  ngx_strlow(u_char *dst, u_char *src, size_t n)
> @@ -235,7 +235,7 @@
>                  v = va_arg(args, ngx_str_t *);
>
>                  len = ngx_min(((size_t) (last - buf)), v->len);
> -                buf = ngx_cpymem(buf, v->data, len);
> +                buf = (u_char *) ngx_escape_string(buf, v->data, len);
>                  fmt++;
>
>                  continue;
> @@ -1844,6 +1844,43 @@
>  }
>
>
> +static uintptr_t
> +ngx_escape_string(u_char *dst, u_char *src, size_t size)
> +{
> +    u_char      ch;
> +    ngx_uint_t  len;
> +
> +    if (dst == NULL) {
> +        len = 0;
> +
> +        while (size) {
> +            ch = *src++;
> +
> +            if (ch == '\\' || ch == '"') {
> +                len++;
> +            }
> +
> +            size--;
> +        }
> +
> +        return (uintptr_t) len;
> +    }
> +
> +    while (size) {
> +        ch = *src++;
> +
> +        if (ch == '\\' || ch == '"') {
> +            *dst++ = '\\';
> +        }
> +
> +        *dst++ = ch;
> +        size--;
> +    }
> +
> +    return (uintptr_t) dst;
> +}
> +
> +
>  void
>  ngx_str_rbtree_insert_value(ngx_rbtree_node_t *temp,
>      ngx_rbtree_node_t *node, ngx_rbtree_node_t *sentinel)
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20170103/42305a42/attachment.html>


More information about the nginx-devel mailing list